def search(self, isAccurate, query, start_time, end_time, min_lat, min_lng, max_lat, max_lng, rows=5000): start_time_str = TimeFunc.time_func_python_date_to_solr_date(start_time) end_time_str = TimeFunc.time_func_python_date_to_solr_date(end_time) if not query or len(query) == 0: query = ['*'] q = '' if isAccurate == True: q = 'text_accurate:' + ','.join(query) else: q = 'text:' + ','.join(query) filter_queries = [ 'created_at:[' + start_time_str + ' TO ' + end_time_str + ']', 'geolocation:[' + min_lat + "," + min_lng + ' TO ' + max_lat + "," + max_lng + ']' ] return_field = ['tweet_id', 'user_id', 'created_at', 'text', 'tokens', 'token_tags', 'screen_name', 'geolocation'] results = self.solr.search(q, fq=filter_queries, fl=return_field, rows=rows) print("Saw {0} result(s).".format(len(results))) # Just loop over it to access the results. # for result in results: # print("{1}, {0}".format(result['text'], result['created_at'], result['geolocation'])) # print("{0}".format(result['geolocation'])) return results
def search(self, isAccurate, query, start_time, end_time, min_lat, min_lng, max_lat, max_lng, rows=5000): start_time_str = TimeFunc.time_func_python_date_to_solr_date( start_time) end_time_str = TimeFunc.time_func_python_date_to_solr_date(end_time) if not query or len(query) == 0: query = ['*'] q = '' if isAccurate == True: q = 'text_accurate:' + ','.join(query) else: q = 'text:' + ','.join(query) filter_queries = [ 'created_at:[' + start_time_str + ' TO ' + end_time_str + ']', 'geolocation:[' + min_lat + "," + min_lng + ' TO ' + max_lat + "," + max_lng + ']' ] return_field = [ 'tweet_id', 'user_id', 'created_at', 'text', 'tokens', 'token_tags', 'screen_name', 'geolocation' ] results = self.solr.search(q, fq=filter_queries, fl=return_field, rows=rows) print("Saw {0} result(s).".format(len(results))) # Just loop over it to access the results. # for result in results: # print("{1}, {0}".format(result['text'], result['created_at'], result['geolocation'])) # print("{0}".format(result['geolocation'])) return results
def query(): term = request.query.get("query") graphID = request.query.get("graphID") radius = request.query.get("radius") center = request.query.get("center") term = term.split(' ') print("term, size: ", term, len(term), "; graphID: ", graphID, "; radius: ", radius, "; center: ", center) #test #term = ['shooting'] #radius = '1000' #center = str('40.437829,-86.921049') #graphID = "0" geobounds = {'type': 'circular', 'center': center, 'radius': radius} #test start_time = TimeFunc.time_func_solr_date_to_python_date( '2014-01-21T00:00:00Z') end_time = TimeFunc.time_func_solr_date_to_python_date( '2014-01-23T00:00:00Z') try: results = awg_manager.retrieve(term, graphID, start_time, end_time, geobounds, realtime=False) if len(results) == 0: raise ValueError('No topics formed from retrieved tweets.') except ValueError as err: # print("No topics found in this area") results = [] print(err.args) print(traceback.format_exc()) except: print("error retrieving topics") results = [] print(traceback.format_exc()) response.content_type = 'application/json' return dumps(results)
def query(): start_time = TimeFunc.time_func_solr_date_to_python_date('2015-05-13T00:00:00Z') end_time = TimeFunc.time_func_solr_date_to_python_date('2016-01-21T20:00:00Z') term = request.forms.get("queryText") if term is None: return None term = term.split() results = search.search(False, term, start_time, end_time, str(25), str(-129), str(50), str(-60)) rst = [] for result in results: rst.append(result) response.content_type = "application/json" return dumps(rst)
def query(): term = request.query.get("query") graphID = request.query.get("graphID") radius = request.query.get("radius") center = request.query.get("center") if term != None: term = term.split(' ') else: term = [] print("term, size: ", term, "; graphID: ", graphID, "; radius: ", radius, "; center: ", center) #test #term = ['shooting'] #radius = '1000' #center = str('40.437829,-86.921049') #graphID = "0" geobounds = {'type' : 'circular', 'center' : center, 'radius' : radius} #test start_time = TimeFunc.time_func_solr_date_to_python_date('2014-01-21T00:00:00Z') end_time = TimeFunc.time_func_solr_date_to_python_date('2014-01-23T00:00:00Z') try: results = awg_manager.retrieve(term, graphID, start_time, end_time, geobounds) if len(results) == 0: raise ValueError('No topics formed from retrieved tweets.') except ValueError as err: # print("No topics found in this area") results = [] print(err.args) print(traceback.format_exc()) except: print("error retrieving topics") results = [] print(traceback.format_exc()) response.content_type = 'application/json' return dumps(results)
def query(): start_time = TimeFunc.time_func_solr_date_to_python_date( '2015-05-13T00:00:00Z') end_time = TimeFunc.time_func_solr_date_to_python_date( '2016-01-21T20:00:00Z') term = request.forms.get("queryText") if term is None: return None term = term.split() results = search.search(False, term, start_time, end_time, str(25), str(-129), str(50), str(-60)) rst = [] for result in results: rst.append(result) response.content_type = "application/json" return dumps(rst)
def search_radius(self, isAccurate, query, start_time, end_time, center, radius, rows=5000): start_time_str = TimeFunc.time_func_python_date_to_solr_date( start_time) end_time_str = TimeFunc.time_func_python_date_to_solr_date(end_time) # q='' # for term in query: # if q is '': # if isAccurate == True: # q = 'text_accurate:' + ','.join(term) # center = 'pt:' + ','.join(center) # radius = 'd:' + ','.join(radius) # else: # q = 'text:' + ','.join(term) # # center = 'pt:' + ','.join(str(center)) # # radius = 'd:' + ','.join(str(radius)) # else: # q += ' OR ' + term # q = '' if not query or len(query) == 0: q = '*' else: if isAccurate == True: q = 'text_accurate:' + ','.join(query) # center = 'pt:' + ','.join(center) # radius = 'd:' + ','.join(radius) else: q = 'text:' + ','.join(query) # center = 'pt:' + ','.join(str(center)) # radius = 'd:' + ','.join(str(radius)) spatial = True if radius is None: radius = '*' spatial = False if center is None: center = '*' spatial = False return_field = [ 'tweet_id', 'user_id', 'created_at', 'text', 'tokens', 'token_tags', 'chunk', 'phrases', 'screen_name', 'geolocation' ] # print(q) if spatial is True: filter_queries = [ 'created_at:[' + start_time_str + ' TO ' + end_time_str + ']', "{!geofilt sfield=geolocation}" ] # {!geofilt%20sfield=geolocation}&pt=38,-86&d=1 results = self.solr.search(q, fq=filter_queries, fl=return_field, rows=rows, spatial=spatial, pt=center, sfield="geolocation", d=radius, sort="random_3721117253841 desc") else: filter_queries = [ 'created_at:[' + start_time_str + ' TO ' + end_time_str + ']' ] results = self.solr.search(q, fq=filter_queries, fl=return_field, rows=rows) print("Saw {0} result(s).".format(len(results))) # Just loop over it to access the results. # for result in results: # print("{1}, {0}".format(result['text'], result['created_at'], result['geolocation'])) # print("{0}".format(result['geolocation'])) return results
return_field = [ 'tweet_id', 'user_id', 'created_at', 'text', 'tokens', 'token_tags', 'screen_name', 'geolocation' ] results = self.solr.search(q, fq=filter_queries, fl=return_field, rows=rows) print("Saw {0} result(s).".format(len(results))) # Just loop over it to access the results. # for result in results: # print("{1}, {0}".format(result['text'], result['created_at'], result['geolocation'])) # print("{0}".format(result['geolocation'])) return results if __name__ == '__main__': solrSearcher = SolrSearcher( 'http://128.46.137.79:8983/solr/TwitterDB_1401/') start_time = TimeFunc.time_func_solr_date_to_python_date( '2014-01-21T12:00:00Z') end_time = TimeFunc.time_func_solr_date_to_python_date( '2014-01-22T12:00:00Z') term = [] rst = solrSearcher.search(False, term, start_time, end_time, str(38.5), str(-87.5), str(41.5), str(-85), 100000)
def query(): term = request.query.get("query") graphID = request.query.get("graphID") radius = request.query.get("radius") center = request.query.get("center") start_time = request.query.get("start_time") end_time = request.query.get("end_time") start_time = TimeFunc.time_func_solr_date_to_python_date(start_time) end_time = TimeFunc.time_func_solr_date_to_python_date(end_time) if term != None: term = term.split(' ') else: term = [] print("term, size: ", term, "; graphID: ", graphID, "; radius: ", radius, "; center: ", center) print("start_time: ", start_time, "; end_time: ", end_time) geobounds = {'type' : 'circular', 'center' : center, 'radius' : radius} # 41.855140, -88.373475; # 37.625001, -83.902040 try: # rst = search_in.search(False, term, start_time, end_time, str(38.5), str(-87.5), str(41.5), str(-85), 100000) rst = search.search_radius(False, term, start_time, end_time, center, radius, rows=500000) if len(rst) == 0: raise ValueError('No topics formed from retrieved tweets.') tweets = [] for t in rst: tweet = {} tweet['tweet_id'] = str(t['tweet_id']) tweet['created_at'] = t['created_at'] tweet['geolocation'] = {} tweet['geolocation']['lon'] = t['geolocation'].split(',')[1] tweet['geolocation']['lat'] = t['geolocation'].split(',')[0] tweet['text'] = t['tokens'] tweet['token_tags'] = t['token_tags'] #initialize tokens# #this is required by emterms mode 1 # nlp = NLPManager() # tokens = t['tokens'].split() # token_tags = list(t['token_tags']) # # tweet['tokens'] = [] # # tags = ['N', 'S', '^', 'Z', 'V', '#'] # # for idx, val in enumerate(tokens): # if token_tags[idx] in tags and val.lower() not in nlp.stop_list: # if val.lower() not in tweet['tokens']: # tweet['tokens'].append(val.lower()) #initialize tokens# tweets.append(tweet) print("num of tweets", len(tweets)) # return EMTerms().category_tweets_to_topics2(tweets) return EMTerms().category_tweets_to_topics_vn_pair(tweets) except ValueError as err: # print("No topics found in this area") results = [] print(err.args) print(traceback.format_exc()) except: print("error retrieving topics") results = [] print(traceback.format_exc()) response.content_type = 'application/json' return dumps(results)
if not EMTerms.instance: EMTerms.instance = EMTerms.__EMTerms() return EMTerms.instance @staticmethod def has(term): return EMTerms.instance.has(term) @staticmethod def category_tweets(tweets): return EMTerms.instance.category_tweets(tweets) if __name__ == '__main__': # rst = EMTerms().category_tweets(['this is one shooter', 'a shooting on campus', 'the victims is andrew bolt']) start_time = TimeFunc.time_func_solr_date_to_python_date('2014-01-21T00:00:00Z') end_time = TimeFunc.time_func_solr_date_to_python_date('2014-01-23T00:00:00Z') search = SolrSearcher.SolrSearcher('http://128.46.137.79:8983/solr/TwitterDB_Purdueshooting/') rst = search.search(False, [], start_time, end_time, str(25), str(-129), str(50), str(-60)) tweets = [] for t in rst: tweet = {} tweet['tweet_id'] = str(t['tweet_id']) tweet['created_at'] = t['created_at'] tweet['geolocation'] = {} tweet['geolocation']['lon'] = t['geolocation'].split(',')[1] tweet['geolocation']['lat'] = t['geolocation'].split(',')[0] tweet['text'] = t['text'] tweets.append(tweet)
def query(): term = request.query.get("query") graphID = request.query.get("graphID") radius = request.query.get("radius") center = request.query.get("center") start_time = request.query.get("start_time") end_time = request.query.get("end_time") start_time = TimeFunc.time_func_solr_date_to_python_date(start_time) end_time = TimeFunc.time_func_solr_date_to_python_date(end_time) if term != None: term = term.split(' ') else: term = [] print("term, size: ", term, "; graphID: ", graphID, "; radius: ", radius, "; center: ", center) print("start_time: ", start_time, "; end_time: ", end_time) geobounds = {'type': 'circular', 'center': center, 'radius': radius} # 41.855140, -88.373475; # 37.625001, -83.902040 try: # rst = search_in.search(False, term, start_time, end_time, str(38.5), str(-87.5), str(41.5), str(-85), 100000) rst = search.search_radius(False, term, start_time, end_time, center, radius, rows=500000) if len(rst) == 0: raise ValueError('No topics formed from retrieved tweets.') tweets = [] for t in rst: tweet = {} tweet['tweet_id'] = str(t['tweet_id']) tweet['created_at'] = t['created_at'] tweet['geolocation'] = {} tweet['geolocation']['lon'] = t['geolocation'].split(',')[1] tweet['geolocation']['lat'] = t['geolocation'].split(',')[0] tweet['text'] = t['tokens'] tweet['token_tags'] = t['token_tags'] #initialize tokens# #this is required by emterms mode 1 # nlp = NLPManager() # tokens = t['tokens'].split() # token_tags = list(t['token_tags']) # # tweet['tokens'] = [] # # tags = ['N', 'S', '^', 'Z', 'V', '#'] # # for idx, val in enumerate(tokens): # if token_tags[idx] in tags and val.lower() not in nlp.stop_list: # if val.lower() not in tweet['tokens']: # tweet['tokens'].append(val.lower()) #initialize tokens# tweets.append(tweet) print("num of tweets", len(tweets)) # return EMTerms().category_tweets_to_topics2(tweets) return EMTerms().category_tweets_to_topics_vn_pair(tweets) except ValueError as err: # print("No topics found in this area") results = [] print(err.args) print(traceback.format_exc()) except: print("error retrieving topics") results = [] print(traceback.format_exc()) response.content_type = 'application/json' return dumps(results)
def search_radius(self, isAccurate, query, start_time, end_time, center, radius, rows=5000): start_time_str = TimeFunc.time_func_python_date_to_solr_date(start_time) end_time_str = TimeFunc.time_func_python_date_to_solr_date(end_time) # q='' # for term in query: # if q is '': # if isAccurate == True: # q = 'text_accurate:' + ','.join(term) # center = 'pt:' + ','.join(center) # radius = 'd:' + ','.join(radius) # else: # q = 'text:' + ','.join(term) # # center = 'pt:' + ','.join(str(center)) # # radius = 'd:' + ','.join(str(radius)) # else: # q += ' OR ' + term # q = '' if not query or len(query) == 0: q = '*' else: if isAccurate == True: q = 'text_accurate:' + ','.join(query) # center = 'pt:' + ','.join(center) # radius = 'd:' + ','.join(radius) else: q = 'text:' + ','.join(query) # center = 'pt:' + ','.join(str(center)) # radius = 'd:' + ','.join(str(radius)) spatial = True if radius is None: radius = '*' spatial = False if center is None: center = '*' spatial = False return_field = ['tweet_id', 'user_id', 'created_at', 'text', 'tokens', 'token_tags', 'chunk', 'phrases', 'screen_name', 'geolocation'] # print(q) if spatial is True: filter_queries = [ 'created_at:[' + start_time_str + ' TO ' + end_time_str + ']', "{!geofilt sfield=geolocation}" ] # {!geofilt%20sfield=geolocation}&pt=38,-86&d=1 results = self.solr.search(q, fq=filter_queries, fl=return_field, rows=rows, spatial=spatial, pt=center, sfield="geolocation", d=radius, sort="random_3721117253841 desc") else: filter_queries = [ 'created_at:[' + start_time_str + ' TO ' + end_time_str + ']' ] results = self.solr.search(q, fq=filter_queries, fl=return_field, rows=rows) print("Saw {0} result(s).".format(len(results))) # Just loop over it to access the results. # for result in results: # print("{1}, {0}".format(result['text'], result['created_at'], result['geolocation'])) # print("{0}".format(result['geolocation'])) return results