def search(self, isAccurate, query, start_time, end_time, min_lat, min_lng, max_lat, max_lng, rows=5000):

        start_time_str = TimeFunc.time_func_python_date_to_solr_date(start_time)
        end_time_str = TimeFunc.time_func_python_date_to_solr_date(end_time)
        
        if not query or len(query) == 0:
            query = ['*']
        
        q = ''
        if isAccurate == True:
            q = 'text_accurate:' + ','.join(query)
        else:
            q = 'text:' + ','.join(query)

        filter_queries = [
            'created_at:[' + start_time_str + ' TO ' + end_time_str + ']',
            'geolocation:[' + min_lat + "," + min_lng + ' TO ' + max_lat + "," + max_lng + ']'
        ]

        return_field = ['tweet_id', 'user_id', 'created_at', 'text', 'tokens', 'token_tags',  'screen_name', 'geolocation']
        
        results = self.solr.search(q, fq=filter_queries, fl=return_field, rows=rows)
        
        print("Saw {0} result(s).".format(len(results)))

        # Just loop over it to access the results.
        # for result in results:
            # print("{1}, {0}".format(result['text'], result['created_at'], result['geolocation']))
            # print("{0}".format(result['geolocation']))

        return results
コード例 #2
0
    def search(self,
               isAccurate,
               query,
               start_time,
               end_time,
               min_lat,
               min_lng,
               max_lat,
               max_lng,
               rows=5000):

        start_time_str = TimeFunc.time_func_python_date_to_solr_date(
            start_time)
        end_time_str = TimeFunc.time_func_python_date_to_solr_date(end_time)

        if not query or len(query) == 0:
            query = ['*']

        q = ''
        if isAccurate == True:
            q = 'text_accurate:' + ','.join(query)
        else:
            q = 'text:' + ','.join(query)

        filter_queries = [
            'created_at:[' + start_time_str + ' TO ' + end_time_str + ']',
            'geolocation:[' + min_lat + "," + min_lng + ' TO ' + max_lat +
            "," + max_lng + ']'
        ]

        return_field = [
            'tweet_id', 'user_id', 'created_at', 'text', 'tokens',
            'token_tags', 'screen_name', 'geolocation'
        ]

        results = self.solr.search(q,
                                   fq=filter_queries,
                                   fl=return_field,
                                   rows=rows)

        print("Saw {0} result(s).".format(len(results)))

        # Just loop over it to access the results.
        # for result in results:
        # print("{1}, {0}".format(result['text'], result['created_at'], result['geolocation']))
        # print("{0}".format(result['geolocation']))

        return results
コード例 #3
0
def query():

    term = request.query.get("query")
    graphID = request.query.get("graphID")
    radius = request.query.get("radius")
    center = request.query.get("center")

    term = term.split(' ')

    print("term, size: ", term, len(term), "; graphID: ", graphID,
          "; radius: ", radius, "; center: ", center)

    #test
    #term = ['shooting']
    #radius = '1000'
    #center = str('40.437829,-86.921049')
    #graphID = "0"
    geobounds = {'type': 'circular', 'center': center, 'radius': radius}

    #test
    start_time = TimeFunc.time_func_solr_date_to_python_date(
        '2014-01-21T00:00:00Z')
    end_time = TimeFunc.time_func_solr_date_to_python_date(
        '2014-01-23T00:00:00Z')

    try:
        results = awg_manager.retrieve(term,
                                       graphID,
                                       start_time,
                                       end_time,
                                       geobounds,
                                       realtime=False)
        if len(results) == 0:
            raise ValueError('No topics formed from retrieved tweets.')
    except ValueError as err:
        # print("No topics found in this area")
        results = []
        print(err.args)
        print(traceback.format_exc())
    except:
        print("error retrieving topics")
        results = []
        print(traceback.format_exc())

    response.content_type = 'application/json'
    return dumps(results)
def query():
    start_time = TimeFunc.time_func_solr_date_to_python_date('2015-05-13T00:00:00Z')
    end_time = TimeFunc.time_func_solr_date_to_python_date('2016-01-21T20:00:00Z')

    term = request.forms.get("queryText")

    if term is None:
        return None
    
    term = term.split()

    results = search.search(False, term, start_time, end_time, str(25), str(-129), str(50), str(-60))

    rst = []
    
    for result in results:
        rst.append(result)
    
    response.content_type = "application/json"
    return dumps(rst)
def query():
    
    term = request.query.get("query")
    graphID = request.query.get("graphID")
    radius = request.query.get("radius")
    center = request.query.get("center")
    
    if term != None:
        term = term.split(' ')
    else:
        term = []
    
    print("term, size: ", term, "; graphID: ", graphID, "; radius: ", radius, "; center: ", center)
    
    #test
    #term = ['shooting']
    #radius = '1000'
    #center = str('40.437829,-86.921049')
    #graphID = "0"
    geobounds = {'type' : 'circular', 'center' : center, 'radius' : radius}

    #test
    start_time = TimeFunc.time_func_solr_date_to_python_date('2014-01-21T00:00:00Z')
    end_time = TimeFunc.time_func_solr_date_to_python_date('2014-01-23T00:00:00Z')
    
    try:
        results = awg_manager.retrieve(term, graphID, start_time, end_time, geobounds)
        if len(results) == 0:
            raise ValueError('No topics formed from retrieved tweets.')
    except ValueError as err:
        # print("No topics found in this area")
        results = []
        print(err.args)
        print(traceback.format_exc())
    except:
        print("error retrieving topics")
        results = []
        print(traceback.format_exc())

    response.content_type = 'application/json'
    return dumps(results)
コード例 #6
0
def query():
    start_time = TimeFunc.time_func_solr_date_to_python_date(
        '2015-05-13T00:00:00Z')
    end_time = TimeFunc.time_func_solr_date_to_python_date(
        '2016-01-21T20:00:00Z')

    term = request.forms.get("queryText")

    if term is None:
        return None

    term = term.split()

    results = search.search(False, term, start_time, end_time, str(25),
                            str(-129), str(50), str(-60))

    rst = []

    for result in results:
        rst.append(result)

    response.content_type = "application/json"
    return dumps(rst)
コード例 #7
0
    def search_radius(self,
                      isAccurate,
                      query,
                      start_time,
                      end_time,
                      center,
                      radius,
                      rows=5000):

        start_time_str = TimeFunc.time_func_python_date_to_solr_date(
            start_time)
        end_time_str = TimeFunc.time_func_python_date_to_solr_date(end_time)
        #         q=''
        #         for term in query:
        #             if q is '':
        #                 if isAccurate == True:
        #                     q = 'text_accurate:' + ','.join(term)
        #                     center = 'pt:' + ','.join(center)
        #                     radius = 'd:' + ','.join(radius)
        #                 else:
        #                     q = 'text:' + ','.join(term)
        #         #             center = 'pt:' + ','.join(str(center))
        #         #             radius = 'd:' + ','.join(str(radius))
        #             else:
        #                 q += ' OR ' + term
        #
        q = ''
        if not query or len(query) == 0:
            q = '*'
        else:
            if isAccurate == True:
                q = 'text_accurate:' + ','.join(query)
        #             center = 'pt:' + ','.join(center)
        #             radius = 'd:' + ','.join(radius)
            else:
                q = 'text:' + ','.join(query)
        #             center = 'pt:' + ','.join(str(center))
        #             radius = 'd:' + ','.join(str(radius))
        spatial = True

        if radius is None:
            radius = '*'
            spatial = False

        if center is None:
            center = '*'
            spatial = False

        return_field = [
            'tweet_id', 'user_id', 'created_at', 'text', 'tokens',
            'token_tags', 'chunk', 'phrases', 'screen_name', 'geolocation'
        ]

        #         print(q)
        if spatial is True:
            filter_queries = [
                'created_at:[' + start_time_str + ' TO ' + end_time_str + ']',
                "{!geofilt sfield=geolocation}"
            ]

            #         {!geofilt%20sfield=geolocation}&pt=38,-86&d=1
            results = self.solr.search(q,
                                       fq=filter_queries,
                                       fl=return_field,
                                       rows=rows,
                                       spatial=spatial,
                                       pt=center,
                                       sfield="geolocation",
                                       d=radius,
                                       sort="random_3721117253841 desc")
        else:
            filter_queries = [
                'created_at:[' + start_time_str + ' TO ' + end_time_str + ']'
            ]

            results = self.solr.search(q,
                                       fq=filter_queries,
                                       fl=return_field,
                                       rows=rows)
        print("Saw {0} result(s).".format(len(results)))

        # Just loop over it to access the results.
        # for result in results:
        # print("{1}, {0}".format(result['text'], result['created_at'], result['geolocation']))
        # print("{0}".format(result['geolocation']))

        return results
コード例 #8
0
        return_field = [
            'tweet_id', 'user_id', 'created_at', 'text', 'tokens',
            'token_tags', 'screen_name', 'geolocation'
        ]

        results = self.solr.search(q,
                                   fq=filter_queries,
                                   fl=return_field,
                                   rows=rows)

        print("Saw {0} result(s).".format(len(results)))

        # Just loop over it to access the results.
        # for result in results:
        # print("{1}, {0}".format(result['text'], result['created_at'], result['geolocation']))
        # print("{0}".format(result['geolocation']))

        return results


if __name__ == '__main__':
    solrSearcher = SolrSearcher(
        'http://128.46.137.79:8983/solr/TwitterDB_1401/')
    start_time = TimeFunc.time_func_solr_date_to_python_date(
        '2014-01-21T12:00:00Z')
    end_time = TimeFunc.time_func_solr_date_to_python_date(
        '2014-01-22T12:00:00Z')
    term = []

    rst = solrSearcher.search(False, term, start_time, end_time, str(38.5),
                              str(-87.5), str(41.5), str(-85), 100000)
def query():
     
    term = request.query.get("query")
    graphID = request.query.get("graphID")
    radius = request.query.get("radius")
    center = request.query.get("center")
    start_time = request.query.get("start_time")
    end_time = request.query.get("end_time")
    
    start_time = TimeFunc.time_func_solr_date_to_python_date(start_time)
    end_time = TimeFunc.time_func_solr_date_to_python_date(end_time)
     
    if term != None:
        term = term.split(' ')
    else:
        term = []
     
    print("term, size: ", term, "; graphID: ", graphID, "; radius: ", radius, "; center: ", center)
    print("start_time: ", start_time, "; end_time: ", end_time)
    
    geobounds = {'type' : 'circular', 'center' : center, 'radius' : radius}
    
    # 41.855140, -88.373475;
    # 37.625001, -83.902040
    try:
#         rst = search_in.search(False, term, start_time, end_time, str(38.5), str(-87.5), str(41.5), str(-85), 100000)
        rst = search.search_radius(False, term, start_time, end_time, center, radius, rows=500000)
        
        if len(rst) == 0:
            raise ValueError('No topics formed from retrieved tweets.')
        
        tweets = []
        for t in rst:
            tweet = {}
            tweet['tweet_id'] = str(t['tweet_id'])
            
            tweet['created_at'] = t['created_at']
            tweet['geolocation'] = {}
            tweet['geolocation']['lon'] = t['geolocation'].split(',')[1]
            tweet['geolocation']['lat'] = t['geolocation'].split(',')[0]
            tweet['text'] = t['tokens']
            tweet['token_tags'] = t['token_tags']
            
            #initialize tokens#
            #this is required by emterms mode 1
            
#             nlp = NLPManager()
#             tokens = t['tokens'].split()
#             token_tags = list(t['token_tags'])
#             
#             tweet['tokens'] = []
#             
#             tags = ['N', 'S', '^', 'Z', 'V', '#']
#             
#             for idx, val in enumerate(tokens):
#                 if token_tags[idx] in tags and val.lower() not in nlp.stop_list:
#                     if val.lower() not in tweet['tokens']:
#                         tweet['tokens'].append(val.lower())
            
            #initialize tokens#
            
            tweets.append(tweet)
        
        print("num of tweets", len(tweets))
        
#         return EMTerms().category_tweets_to_topics2(tweets)
        return EMTerms().category_tweets_to_topics_vn_pair(tweets)
         
    except ValueError as err:
        # print("No topics found in this area")
        results = []
        print(err.args)
        print(traceback.format_exc())
    except:
        print("error retrieving topics")
        results = []
        print(traceback.format_exc())
 
    response.content_type = 'application/json'
    return dumps(results)
        if not EMTerms.instance:
            EMTerms.instance = EMTerms.__EMTerms()
        return EMTerms.instance
    
    @staticmethod
    def has(term):
        return EMTerms.instance.has(term)
    
    @staticmethod
    def category_tweets(tweets):
        return EMTerms.instance.category_tweets(tweets)
    
if __name__ == '__main__':
    
#     rst = EMTerms().category_tweets(['this is one shooter', 'a shooting on campus', 'the victims is andrew bolt'])
    start_time = TimeFunc.time_func_solr_date_to_python_date('2014-01-21T00:00:00Z')
    end_time = TimeFunc.time_func_solr_date_to_python_date('2014-01-23T00:00:00Z')

    search = SolrSearcher.SolrSearcher('http://128.46.137.79:8983/solr/TwitterDB_Purdueshooting/')
    rst = search.search(False, [], start_time, end_time, str(25), str(-129), str(50), str(-60))
    
    tweets = []
    for t in rst:
        tweet = {}
        tweet['tweet_id'] = str(t['tweet_id'])
        tweet['created_at'] = t['created_at']
        tweet['geolocation'] = {}
        tweet['geolocation']['lon'] = t['geolocation'].split(',')[1]
        tweet['geolocation']['lat'] = t['geolocation'].split(',')[0]
        tweet['text'] = t['text']
        tweets.append(tweet)
コード例 #11
0
def query():

    term = request.query.get("query")
    graphID = request.query.get("graphID")
    radius = request.query.get("radius")
    center = request.query.get("center")
    start_time = request.query.get("start_time")
    end_time = request.query.get("end_time")

    start_time = TimeFunc.time_func_solr_date_to_python_date(start_time)
    end_time = TimeFunc.time_func_solr_date_to_python_date(end_time)

    if term != None:
        term = term.split(' ')
    else:
        term = []

    print("term, size: ", term, "; graphID: ", graphID, "; radius: ", radius,
          "; center: ", center)
    print("start_time: ", start_time, "; end_time: ", end_time)

    geobounds = {'type': 'circular', 'center': center, 'radius': radius}

    # 41.855140, -88.373475;
    # 37.625001, -83.902040
    try:
        #         rst = search_in.search(False, term, start_time, end_time, str(38.5), str(-87.5), str(41.5), str(-85), 100000)
        rst = search.search_radius(False,
                                   term,
                                   start_time,
                                   end_time,
                                   center,
                                   radius,
                                   rows=500000)

        if len(rst) == 0:
            raise ValueError('No topics formed from retrieved tweets.')

        tweets = []
        for t in rst:
            tweet = {}
            tweet['tweet_id'] = str(t['tweet_id'])

            tweet['created_at'] = t['created_at']
            tweet['geolocation'] = {}
            tweet['geolocation']['lon'] = t['geolocation'].split(',')[1]
            tweet['geolocation']['lat'] = t['geolocation'].split(',')[0]
            tweet['text'] = t['tokens']
            tweet['token_tags'] = t['token_tags']

            #initialize tokens#
            #this is required by emterms mode 1

            #             nlp = NLPManager()
            #             tokens = t['tokens'].split()
            #             token_tags = list(t['token_tags'])
            #
            #             tweet['tokens'] = []
            #
            #             tags = ['N', 'S', '^', 'Z', 'V', '#']
            #
            #             for idx, val in enumerate(tokens):
            #                 if token_tags[idx] in tags and val.lower() not in nlp.stop_list:
            #                     if val.lower() not in tweet['tokens']:
            #                         tweet['tokens'].append(val.lower())

            #initialize tokens#

            tweets.append(tweet)

        print("num of tweets", len(tweets))

        #         return EMTerms().category_tweets_to_topics2(tweets)
        return EMTerms().category_tweets_to_topics_vn_pair(tweets)

    except ValueError as err:
        # print("No topics found in this area")
        results = []
        print(err.args)
        print(traceback.format_exc())
    except:
        print("error retrieving topics")
        results = []
        print(traceback.format_exc())

    response.content_type = 'application/json'
    return dumps(results)
    def search_radius(self, isAccurate, query, start_time, end_time, center, radius, rows=5000):
        
        start_time_str = TimeFunc.time_func_python_date_to_solr_date(start_time)
        end_time_str = TimeFunc.time_func_python_date_to_solr_date(end_time)
#         q=''
#         for term in query:
#             if q is '':
#                 if isAccurate == True:
#                     q = 'text_accurate:' + ','.join(term)
#                     center = 'pt:' + ','.join(center)
#                     radius = 'd:' + ','.join(radius)
#                 else:
#                     q = 'text:' + ','.join(term)
#         #             center = 'pt:' + ','.join(str(center))
#         #             radius = 'd:' + ','.join(str(radius))
#             else:
#                 q += ' OR ' + term
#       
        q = ''         
        if not query or len(query) == 0:
            q = '*'
        else:          
            if isAccurate == True:
                q = 'text_accurate:' + ','.join(query)
        #             center = 'pt:' + ','.join(center)
        #             radius = 'd:' + ','.join(radius)
            else:
                q = 'text:' + ','.join(query)
        #             center = 'pt:' + ','.join(str(center))
        #             radius = 'd:' + ','.join(str(radius))
        spatial = True
        
        if radius is None:
            radius = '*'
            spatial = False
            
        if center is None:
            center = '*'   
            spatial = False   
            
        return_field =  ['tweet_id', 'user_id', 'created_at', 'text', 'tokens', 'token_tags', 'chunk', 'phrases', 'screen_name', 'geolocation']
        
#         print(q)
        if spatial is True:
            filter_queries = [
                'created_at:[' + start_time_str + ' TO ' + end_time_str + ']',
                "{!geofilt sfield=geolocation}"
            ]
        
#         {!geofilt%20sfield=geolocation}&pt=38,-86&d=1
            results = self.solr.search(q, fq=filter_queries, fl=return_field, rows=rows, spatial=spatial, pt=center, sfield="geolocation", d=radius,
                                       sort="random_3721117253841 desc")
        else:
            filter_queries = [
                'created_at:[' + start_time_str + ' TO ' + end_time_str + ']'
            ]
        
            results = self.solr.search(q, fq=filter_queries, fl=return_field, rows=rows)
        print("Saw {0} result(s).".format(len(results)))

        # Just loop over it to access the results.
        # for result in results:
            # print("{1}, {0}".format(result['text'], result['created_at'], result['geolocation']))
            # print("{0}".format(result['geolocation']))

        return results