Python searchの例、ruman.global_config.weibo_es.search Pythonの例

コード例 #1

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def get_topics(user):
    results = {'recommend':{},'own':{}}
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':{
                        'must':[{'term':{'comput_status':1}}],
                        'must_not':[{'term':{'submit_user':user}}]
                    }
                }
            }
        },
        'sort':{'submit_ts':{'order':'desc'}},
        'size':1000
        
    }
    topics = weibo_es.search(index=topic_index_name,doc_type=topic_index_type,body=query_body)
    if topics:
        topics = topics['hits']['hits']
        for topic in topics:
            try:
                if topic['_source']['en_name']=='sa-de-1500559853':
                    topic['_source']['en_name']='1478787000_1480255800_sa-de-1500559853_1788911247'
                results['recommend'][topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status'],topic['_source']['submit_user']])
            except:
                if topic['_source']['en_name']=='sa-de-1500559853':
                    topic['_source']['en_name']='1478787000_1480255800_sa-de-1500559853_1788911247'
                results['recommend'][topic['_source']['en_name']] = [[topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status'],topic['_source']['submit_user']]]
    query_own = {
        'query':{
            'filtered':{
                'filter':{
                    'term':{'submit_user':user}
                }
            }
        },
        "size": 1000
    }
    own_topics =  weibo_es.search(index=topic_index_name,doc_type=topic_index_type,body=query_own)
    if own_topics:
        topics = own_topics['hits']['hits']
        for topic in topics:
            try:
                if topic['_source']['en_name']=='sa-de-1500559853':
                    topic['_source']['en_name']='1478787000_1480255800_sa-de-1500559853_1788911247'
                results['own'][topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['comput_status'],topic['_source']['end_ts']])
            except:
                if topic['_source']['en_name']=='sa-de-1500559853':
                    topic['_source']['en_name']='1478787000_1480255800_sa-de-1500559853_1788911247'
                results['own'][topic['_source']['en_name']] = [[topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['comput_status'],topic['_source']['end_ts']]]

            #print results
    return json.dumps(results)

コード例 #2

0

ファイルを表示

def search_topics(keyword):
    result = []
    query_body = {
        'query': {
            'bool': {
                'should': [{
                    'wildcard': {
                        'en_name': '*' + keyword + '*'
                    }
                }, {
                    'wildcard': {
                        'name': '*' + keyword + '*'
                    }
                }]
            }
        }
    }
    results = weibo_es.search(index=topic_index_name,
                              doc_type=topic_index_type,
                              body=query_body)
    if results:
        topics = results['hits']['hits']
        for topic in topics:
            result.append(topic['_source'])
            #try:
            #    result[topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']])
            #except:
            #    result[topic['_source']['en_name']] = [[topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']]]
    return json.dumps(result)

コード例 #3

0

ファイルを表示

def submit(topic, start_ts, end_ts, submit_user):
    # print str(topic.decode('utf-8'))
    query_body = {'query': {'filtered': {'filter': {'term': {'name': topic}}}}}
    find_topic = weibo_es.search(index=topic_index_name,
                                 doc_type=topic_index_type,
                                 body=query_body)['hits']['hits']
    if len(find_topic) > 0:
        en_name = find_topic[0]['_source']['en_name']
    else:
        en_name = p.get_pinyin(topic) + '-' + str(int(time.time()))

    submit_id = start_ts + '_' + end_ts + '_' + en_name + '_' + submit_user
    query_body = {
        'name': topic,
        'en_name': en_name,
        'start_ts': start_ts,
        'end_ts': end_ts,
        'submit_user': submit_user,
        'comput_status': 0,
        'submit_ts': int(time.time())
    }
    try:
        # print weibo_es.get(index=topic_index_name, doc_type=topic_index_type, id=submit_id)['_source']
        result = 'already_have'
    except:
        weibo_es.index(index=topic_index_name,
                       doc_type=topic_index_type,
                       id=submit_id,
                       body=query_body)
        result = 'success'
    r.lpush(topic_queue_name, json.dumps(query_body))
    #该push到redis里，然后改status  计算完了再改回来
    return result

コード例 #4

0

ファイルを表示

def get_subopinion(topic, start_ts, end_ts):
    # query_body = {
    #     'query':{
    #         'filtered':{
    #             'filter':{
    #                 'bool':{
    #                     'must':[
    #                         {'match_phrase':{'name':topic}}
    #                     ]
    #                 }
    #             }
    #         }
    #     }
    # }
    query_body = {"query": {"match_phrase": {"name": topic}}}
    # print query_body
    features = weibo_es.search(index=subopinion_index_name,
                               doc_type=subopinion_index_type,
                               body=query_body)['hits']['hits']

    # return features
    if features:
        feature = json.loads(features[0]['_source']['features'])
        return feature.values()
    else:
        return 'no results'

コード例 #5

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def get_subopinion(topic,start_ts,end_ts):
    # query_body = {
    #     'query':{
    #         'filtered':{
    #             'filter':{
    #                 'bool':{
    #                     'must':[
    #                         {'match_phrase':{'name':topic}}
    #                     ]
    #                 }
    #             }
    #         }
    #     }
    # }
    query_body = {
        "query": {
          "match_phrase": {
            "name": topic
          }
        }
    }
    # print query_body
    features = weibo_es.search(index=subopinion_index_name,doc_type=subopinion_index_type,body=query_body)['hits']['hits']
    
    # return features
    if features:
        feature = json.loads(features[0]['_source']['features'])
        return feature.values()
    else:
        return 'no results'

コード例 #6

0

ファイルを表示

def cul_key_weibo_time_count(topic, news_topics, start_ts, over_ts, during):
    key_weibo_time_count = {}
    time_dict = {}
    during = Day
    for clusterid, keywords in news_topics.iteritems(
    ):  #{u'd2e97cf7-fc43-4982-8405-2d215b3e1fea': [u'\u77e5\u8bc6', u'\u5e7f\u5dde', u'\u9009\u624b']}
        if len(keywords) > 0:
            print len(keywords)
            start_ts = int(start_ts)
            over_ts = int(over_ts)

            over_ts = ts2HourlyTime(over_ts, during)
            interval = (over_ts - start_ts) / during

            for i in range(interval, 0, -1):  #时间段取每900秒的

                begin_ts = over_ts - during * i
                end_ts = begin_ts + during
                must_list = []
                must_list.append(
                    {'range': {
                        'timestamp': {
                            'gte': begin_ts,
                            'lt': end_ts
                        }
                    }})
                temp = []

                for word in keywords:
                    word_list = word.split('_', 1)
                    word1 = word_list[0]
                    sentence1 = {
                        'wildcard': {
                            'keywords_string': '*' + word1 + '*'
                        }
                    }
                    try:
                        word2 = word_list[1]
                        sentence2 = {
                            'wildcard': {
                                'keywords_string': '*' + word2 + '*'
                            }
                        }
                        temp.append(sentence2)
                    except:
                        print "index out of range"
                    temp.append(sentence1)
                must_list.append({'bool': {'should': temp}})

                query_body = {'query': {'bool': {'must': must_list}}}
                key_weibo = weibo_es.search(index=topic,
                                            doc_type=weibo_index_type,
                                            body=query_body)
                key_weibo_count = key_weibo['hits']['total']  #分时间段的类的数量
                print key_weibo_count
                time_dict[ts2datetime(end_ts)] = key_weibo_count

            key_weibo_time_count[clusterid] = sorted(time_dict.items(),
                                                     key=lambda x: x[0])
    return key_weibo_time_count

コード例 #7

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def get_weibo_by_time(topic,start_ts,end_ts,sort_item='timestamp'):
    print topic,start_ts,end_ts,weibo_es
    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'range':{'timestamp':{'lte':int(end_ts),'gte':int(start_ts)}}}
                ]
            }
        },
        'size':200,
        'sort':{sort_item:{'order':'desc'}}
    }
    items = weibo_es.search(index=topic,body=query_body)['hits']['hits']
    #items = db.session.query(PropagateWeibos).filter(PropagateWeibos.topic==topic).all()
    weibo_dict = {}
    if items:
        for item in items:  
            #print item,type(item)
            #mtype = item.mtype
            #weibos = _json_loads(item.weibos)
            weibo = item['_source']
            #print mtype
            weibo_content = {}
            weibo_content['text'] = weibo['text'] 
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = weibo['timestamp']
            #weibo_content['sentiment'] = weibo['sentiment'] 
            try:
                weibo_content['comment'] = weibo['comment']
            except:
                weibo_content['comment'] = 0
            try:
                weibo_content['retweeted'] = weibo['retweeted']
            except:
                weibo_content['retweeted'] = 0
            try:
                user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            #weibo_content['keywords'] = weibo['keywords_dict']
            weibo_content['mid'] = weibo['mid']
            #print weibo_content
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=1)

    else:
        results = []   
        #results = sorted(weibo_dict.items(),key=lambda x:x[1]['retweeted'],reverse=False)
        #for result in results:
            #print result
    return results

コード例 #8

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def get_symbol_weibo(topic,start_ts,end_ts,unit=MinInterval):  #鱼骨图
    # print topic
    weibos = {}
    # query_body = {
    #     'query':{
    #         'bool':{
    #             'must':[
    #                 {'term':{'name':topic}},
    #                 {'range':{'start_ts':{'lte':start_ts}}},
    #                 {'range':{'end_ts':{'gte':end_ts}}}
    #             ]
    #         }
    #     }
    # }
    query_body = {
        "query": {
          "match_phrase": {
            "name": topic
          }
        }
    }

    try:
        symbol = weibo_es.search(index=topics_river_index_name,doc_type=topics_river_index_type,body=query_body)['hits']['hits'][0]['_source']
    except:
        return 0
    features = json.loads(symbol['features'])
    symbol_weibos = json.loads(symbol['cluster_dump_dict'])
    # print 'dadfhsjdbfsdjdh'
    # print symbol_weibos
    begin_ts = end_ts - unit
    for clusterid,contents in symbol_weibos.iteritems():
        j = 0
        content = set()
        for i in contents:
            ts = full_datetime2ts(i['datetime'])
            # title = re.findall(r'【.*】',i['content'].encode('utf8'))[0]
            title = i['content'][:7]
            if ts >= start_ts and ts <= end_ts and title not in content:  #start_ts应该改成begin_ts，现在近15分钟没数据，所以用所有的
                try:
                    weibos[','.join(features[clusterid][:5])].append(i)
                except:
                    weibos[','.join(features[clusterid][:5])] = [i]
                content.add(title)
                j += 1
            #print content
            if j == 3:
                break
    #print weibos
    return weibos

コード例 #9

0

ファイルを表示

def search_topic_by_topic(topic):
    result = []
    query_body = {'query': {'term': {'name': topic}}}
    results = weibo_es.search(index=topic_index_name,
                              doc_type=topic_index_type,
                              body=query_body)
    if results:
        topics = results['hits']['hits']
        for topic in topics:
            result.append(topic['_source'])
            #try:
            #    result[topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']])
            #except:
    return json.dumps(result)

コード例 #10

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def cul_key_weibo_time_count(topic,news_topics,start_ts,over_ts,during):
    key_weibo_time_count = {}
    time_dict = {}
    during = Day
    for clusterid,keywords in news_topics.iteritems(): #{u'd2e97cf7-fc43-4982-8405-2d215b3e1fea': [u'\u77e5\u8bc6', u'\u5e7f\u5dde', u'\u9009\u624b']}
        if len(keywords)>0:
            print len(keywords)
            start_ts = int(start_ts)
            over_ts = int(over_ts)

            over_ts = ts2HourlyTime(over_ts, during)
            interval = (over_ts - start_ts) / during


            for i in range(interval, 0, -1):    #时间段取每900秒的

                begin_ts = over_ts - during * i
                end_ts = begin_ts + during
                must_list=[]
                must_list.append({'range':{'timestamp':{'gte':begin_ts,'lt':end_ts}}})
                temp = []
                
                for word in keywords:
                    word_list=word.split('_', 1)
                    word1=word_list[0]
                    sentence1 =  {'wildcard':{'keywords_string':'*'+word1+'*'}}
                    try:
                        word2=word_list[1]
                        sentence2 =  {'wildcard':{'keywords_string':'*'+word2+'*'}}
                        temp.append(sentence2)
                    except:
                        print "index out of range"
                    temp.append(sentence1)
                must_list.append({'bool':{'should':temp}})

                query_body = {'query':{
                                'bool':{
                                    'must':must_list
                                }
                            }
                        }
                key_weibo = weibo_es.search(index=topic,doc_type=weibo_index_type,body=query_body)
                key_weibo_count = key_weibo['hits']['total']  #分时间段的类的数量
                print key_weibo_count
                time_dict[ts2datetime(end_ts)] = key_weibo_count

            key_weibo_time_count[clusterid] = sorted(time_dict.items(),key=lambda x:x[0])
    return key_weibo_time_count

コード例 #11

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def get_weibo_content(topic,start_ts,end_ts,opinion,sort_item='timestamp'): #微博内容
    weibo_dict = {}
    #a = json.dumps(opinion)
    #opinion = '圣保罗_班底_巴西_康熙'
    print 'opinion:::::::::',opinion
    print 'topic:::::::;:::',topic
    print type(start_ts),type(end_ts)
    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'wildcard':{'keys':opinion}},
                    {'term':{'name':topic}},
                    {'range':{'start_ts':{'lte':start_ts}}},
                    {'range':{'end_ts':{'gte':end_ts}}}
                ]
            }
        }
    }  #没有查到uid   每次的id不一样   
    print query_body
    weibos = weibo_es.search(index=subopinion_index_name,doc_type=subopinion_index_type,body=query_body)['hits']['hits']
    #print weibo_es,subopinion_index_name,subopinion_index_type,query_body
    print len(weibos)
    if weibos:
        weibos = json.loads(weibos[0]['_source']['cluster_dump_dict'])
        for weibo in weibos.values():#jln0825
            weibo = weibo[0]
            weibo_content = {}
            weibo_content['text'] = weibo['text'] 
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = full_datetime2ts(weibo['datetime'])
            weibo_content['comment'] = weibo['comment']
            weibo_content['retweeted'] = weibo['retweeted']
            weibo_content['mid'] = weibo['id']
            try:
                user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True)
        #print results
        return results
    else:
        return 'no results'

コード例 #12

0

ファイルを表示

def get_symbol_weibo(topic, start_ts, end_ts, unit=MinInterval):  #鱼骨图
    # print topic
    weibos = {}
    # query_body = {
    #     'query':{
    #         'bool':{
    #             'must':[
    #                 {'term':{'name':topic}},
    #                 {'range':{'start_ts':{'lte':start_ts}}},
    #                 {'range':{'end_ts':{'gte':end_ts}}}
    #             ]
    #         }
    #     }
    # }
    query_body = {"query": {"match_phrase": {"name": topic}}}

    try:
        symbol = weibo_es.search(index=topics_river_index_name,
                                 doc_type=topics_river_index_type,
                                 body=query_body)['hits']['hits'][0]['_source']
    except:
        return 0
    features = json.loads(symbol['features'])
    symbol_weibos = json.loads(symbol['cluster_dump_dict'])
    # print 'dadfhsjdbfsdjdh'
    # print symbol_weibos
    begin_ts = end_ts - unit
    for clusterid, contents in symbol_weibos.iteritems():
        j = 0
        content = set()
        for i in contents:
            ts = full_datetime2ts(i['datetime'])
            # title = re.findall(r'【.*】',i['content'].encode('utf8'))[0]
            title = i['content'][:7]
            if ts >= start_ts and ts <= end_ts and title not in content:  #start_ts应该改成begin_ts，现在近15分钟没数据，所以用所有的
                try:
                    weibos[','.join(features[clusterid][:5])].append(i)
                except:
                    weibos[','.join(features[clusterid][:5])] = [i]
                content.add(title)
                j += 1
            #print content
            if j == 3:
                break
    #print weibos
    return weibos

コード例 #13

0

ファイルを表示

def get_during_keywords(topic, start_ts, end_ts):  #关键词云,unit=MinInterval
    keywords = []
    # if (end_ts-start_ts)>unit:
    #     begin_ts = end_ts-unit
    # else:
    #     begin_ts = start_ts
    # print begin_ts,end_ts
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'range': {
                        'timestamp': {
                            'gte': start_ts,
                            'lt': end_ts
                        }
                    }
                }
            }
        },
        'size': MAX_LANGUAGE_WEIBO
    }
    keywords_dict = {}
    weibo_text = []
    keyword_weibo = weibo_es.search(index=topic,
                                    doc_type=weibo_index_type,
                                    body=query_body)['hits']['hits']
    for key_weibo in keyword_weibo:
        weibo_text.append(key_weibo['_source']['text'].encode('utf-8'))
    keywords_dict = get_weibo(weibo_text, n_gram=2, n_count=100)
    '''
    print keyword_weibo
    for key_weibo in keyword_weibo:
        keywords_dict_list = json.loads(key_weibo['_source']['keywords_dict'])  #
        #print keywords_dict_list,type(keywords_dict_list)
        for k,v in keywords_dict_list.iteritems():
            try:
                keywords_dict[k] += v
            except:
                keywords_dict[k] = v
    '''
    word_results = sorted(keywords_dict.iteritems(),
                          key=lambda x: x[1],
                          reverse=True)[:MAX_FREQUENT_WORDS]
    return json.dumps(word_results)

コード例 #14

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def search_topic_by_topic(topic):
    result = []
    query_body = {
        'query': {
            'term': {
                'name':topic
                }
            }
    }
    results = weibo_es.search(index=topic_index_name,doc_type=topic_index_type,body=query_body)
    if results:
        topics = results['hits']['hits']
        for topic in topics:
            result.append(topic['_source'])
            #try:
            #    result[topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']])
            #except:
    return json.dumps(result)

コード例 #15

0

ファイルを表示

def get_topics_river(topic, start_ts, end_ts, unit=MinInterval):  #主题河
    #topic='event'
    # print start_ts,end_ts
    topic_name = topic + '_' + start_ts + '_' + end_ts
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'term': {
                        'name': topic_name
                    }
                }, {
                    'range': {
                        'start_ts': {
                            'lte': start_ts
                        }
                    }
                }, {
                    'range': {
                        'end_ts': {
                            'gte': end_ts
                        }
                    }
                }]
            }
        }
    }
    # print '????',query_body
    news_topics = json.loads(
        weibo_es.search(
            index=topics_river_index_name,
            doc_type=topics_river_index_type,
            body=query_body)['hits']['hits'][0]['_source']['features'])
    zhutihe_results = cul_key_weibo_time_count(topic, news_topics, start_ts,
                                               end_ts, unit)
    results = {}
    for k, v in news_topics.iteritems():
        if len(v) > 0:
            results[v[0]] = zhutihe_results[k]
    # print results
    return results

コード例 #16

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def get_key_topics(keyword):
    result = {}
    query_body = {
        'query': {
            'bool': {
                'must': [
                        {'term':{'comput_status':1}},
                        {'wildcard':{'name':'*'+keyword+'*'}}
                        ]
                }
            }
    }
    results = weibo_es.search(index=topic_index_name,doc_type=topic_index_type,body=query_body)
    if results:
        topics = results['hits']['hits']
        for topic in topics:
            try:
                result[topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']])
            except:
                result[topic['_source']['en_name']] = [[topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']]]
    return json.dumps(result)

コード例 #17

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def search_topics(keyword):
    result = []
    query_body = {
        'query': {
            'bool': {
                'should': [
                        {'wildcard':{'en_name':'*'+keyword+'*'}},
                        {'wildcard':{'name':'*'+keyword+'*'}}
                        ]
                }
            }
    }
    results = weibo_es.search(index=topic_index_name,doc_type=topic_index_type,body=query_body)
    if results:
        topics = results['hits']['hits']
        for topic in topics:
            result.append(topic['_source'])
            #try:
            #    result[topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']])
            #except:
            #    result[topic['_source']['en_name']] = [[topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']]]
    return json.dumps(result)

コード例 #18

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def get_during_keywords(topic,start_ts,end_ts):  #关键词云,unit=MinInterval
    keywords = []
    # if (end_ts-start_ts)>unit:
    #     begin_ts = end_ts-unit
    # else:
    #     begin_ts = start_ts
    # print begin_ts,end_ts
    query_body = {
        'query':{
            'filtered':{
                'filter':{
                    'range':{
                        'timestamp':{'gte': start_ts, 'lt':end_ts} 
                    }
                }
            }
        },
        'size':MAX_LANGUAGE_WEIBO
    }
    keywords_dict = {}
    weibo_text = []
    keyword_weibo = weibo_es.search(index=topic,doc_type=weibo_index_type,body=query_body)['hits']['hits']   
    for key_weibo in keyword_weibo:
        weibo_text.append(key_weibo['_source']['text'].encode('utf-8'))
    keywords_dict = get_weibo(weibo_text,n_gram=2,n_count=100)
    '''
    print keyword_weibo
    for key_weibo in keyword_weibo:
        keywords_dict_list = json.loads(key_weibo['_source']['keywords_dict'])  #
        #print keywords_dict_list,type(keywords_dict_list)
        for k,v in keywords_dict_list.iteritems():
            try:
                keywords_dict[k] += v
            except:
                keywords_dict[k] = v
    '''
    word_results = sorted(keywords_dict.iteritems(),key=lambda x:x[1],reverse=True)[:MAX_FREQUENT_WORDS]   
    return json.dumps(word_results)

コード例 #19

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def submit(topic,start_ts,end_ts,submit_user):
    # print str(topic.decode('utf-8'))
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'term':{
                        'name':topic 
                    }
                }
            }
        }
    }
    find_topic = weibo_es.search(index=topic_index_name,doc_type=topic_index_type,body=query_body)['hits']['hits']
    if len(find_topic)>0:
        en_name = find_topic[0]['_source']['en_name']
    else:
        en_name = p.get_pinyin(topic)+'-'+str(int(time.time()))

    submit_id = start_ts+'_'+end_ts+'_'+en_name+'_'+submit_user
    query_body={
        'name':topic,
        'en_name':en_name,
        'start_ts':start_ts,
        'end_ts':end_ts,
        'submit_user':submit_user,
        'comput_status':0,
        'submit_ts':int(time.time())
    }
    try:
        # print weibo_es.get(index=topic_index_name, doc_type=topic_index_type, id=submit_id)['_source']
        result = 'already_have'
    except:
        weibo_es.index(index=topic_index_name,doc_type=topic_index_type,id=submit_id,body=query_body)
        result = 'success'
    r.lpush(topic_queue_name,json.dumps(query_body))
    #该push到redis里，然后改status  计算完了再改回来
    return result

コード例 #20

0

ファイルを表示

def get_key_topics(keyword):
    result = {}
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'term': {
                        'comput_status': 1
                    }
                }, {
                    'wildcard': {
                        'name': '*' + keyword + '*'
                    }
                }]
            }
        }
    }
    results = weibo_es.search(index=topic_index_name,
                              doc_type=topic_index_type,
                              body=query_body)
    if results:
        topics = results['hits']['hits']
        for topic in topics:
            try:
                result[topic['_source']['en_name']].append([
                    topic['_source']['name'], topic['_source']['start_ts'],
                    topic['_source']['end_ts'],
                    topic['_source']['comput_status']
                ])
            except:
                result[topic['_source']['en_name']] = [[
                    topic['_source']['name'], topic['_source']['start_ts'],
                    topic['_source']['end_ts'],
                    topic['_source']['comput_status']
                ]]
    return json.dumps(result)

コード例 #21

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

def get_topics_river(topic,start_ts,end_ts,unit=MinInterval):#主题河
    #topic='event'
    # print start_ts,end_ts
    topic_name = topic+'_'+start_ts+'_'+end_ts
    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'term':{'name':topic_name}},
                    {'range':{'start_ts':{'lte':start_ts}}},
                    {'range':{'end_ts':{'gte':end_ts}}}
                ]
            }
        }
    }
    # print '????',query_body
    news_topics = json.loads(weibo_es.search(index=topics_river_index_name,doc_type=topics_river_index_type,body=query_body)['hits']['hits'][0]['_source']['features'])
    zhutihe_results = cul_key_weibo_time_count(topic,news_topics,start_ts,end_ts,unit)
    results = {}
    for k,v in news_topics.iteritems():
        if len(v)>0:
            results[v[0]] = zhutihe_results[k]
    # print results
    return results

コード例 #22

0

ファイルを表示

ファイル: utils.py プロジェクト: SwoJa/ruman

        return;  # 则没有众数  
    else: 
        sort_arr = sorted(arr_appear.iteritems(), key=lambda d:d[1], reverse = True)
        for i in range(0,4):  # 否则，出现次数最大的数字，就是众数  
            mode.append(sort_arr[i][0]);  
    return mode


if __name__ == '__main__':
	#all_weibo_count('aoyunhui',1468166400,1468170900)
    #get_symbol_weibo('aoyunhui',1468944000,1471622400,Day)
    # opinion=['姐姐', '综艺节目', '网络']
    #get_weibo_content('aoyunhui',1468944000,1471622400,opinion)
    # get_during_keywords('aoyunhui',1468944000,1469707652)
    topic='te-lang-pu-ji-xin-ge-1492166854'
    start_ts = 1478736000
    end_ts=1480176000
    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'term':{'name':topic}},
                    {'range':{'start_ts':{'lte':start_ts}}},
                    {'range':{'end_ts':{'gte':end_ts}}}
                ]
            }
        }
    }
    
    symbol = weibo_es.search(index=topics_river_index_name,doc_type=topics_river_index_type,body=query_body)['hits']['hits'][0]['_source']
    print symbol

コード例 #23

0

ファイルを表示

def get_weibo_content(topic,
                      start_ts,
                      end_ts,
                      opinion,
                      sort_item='timestamp'):  #微博内容
    weibo_dict = {}
    #a = json.dumps(opinion)
    #opinion = '圣保罗_班底_巴西_康熙'
    print 'opinion:::::::::', opinion
    print 'topic:::::::;:::', topic
    print type(start_ts), type(end_ts)
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'wildcard': {
                        'keys': opinion
                    }
                }, {
                    'term': {
                        'name': topic
                    }
                }, {
                    'range': {
                        'start_ts': {
                            'lte': start_ts
                        }
                    }
                }, {
                    'range': {
                        'end_ts': {
                            'gte': end_ts
                        }
                    }
                }]
            }
        }
    }  #没有查到uid   每次的id不一样
    print query_body
    weibos = weibo_es.search(index=subopinion_index_name,
                             doc_type=subopinion_index_type,
                             body=query_body)['hits']['hits']
    #print weibo_es,subopinion_index_name,subopinion_index_type,query_body
    print len(weibos)
    if weibos:
        weibos = json.loads(weibos[0]['_source']['cluster_dump_dict'])
        for weibo in weibos.values():  #jln0825
            weibo = weibo[0]
            weibo_content = {}
            weibo_content['text'] = weibo['text']
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = full_datetime2ts(weibo['datetime'])
            weibo_content['comment'] = weibo['comment']
            weibo_content['retweeted'] = weibo['retweeted']
            weibo_content['mid'] = weibo['id']
            try:
                user = es_user_profile.get(index=profile_index_name,
                                           doc_type=profile_index_type,
                                           id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),
                         key=lambda x: x[1][sort_item],
                         reverse=True)
        #print results
        return results
    else:
        return 'no results'

コード例 #24

0

ファイルを表示

def get_topics(user):
    results = {'recommend': {}, 'own': {}}
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'term': {
                                'comput_status': 1
                            }
                        }],
                        'must_not': [{
                            'term': {
                                'submit_user': user
                            }
                        }]
                    }
                }
            }
        },
        'sort': {
            'submit_ts': {
                'order': 'desc'
            }
        },
        'size': 1000
    }
    topics = weibo_es.search(index=topic_index_name,
                             doc_type=topic_index_type,
                             body=query_body)
    if topics:
        topics = topics['hits']['hits']
        for topic in topics:
            try:
                if topic['_source']['en_name'] == 'sa-de-1500559853':
                    topic['_source'][
                        'en_name'] = '1478787000_1480255800_sa-de-1500559853_1788911247'
                results['recommend'][topic['_source']['en_name']].append([
                    topic['_source']['name'], topic['_source']['start_ts'],
                    topic['_source']['end_ts'],
                    topic['_source']['comput_status'],
                    topic['_source']['submit_user']
                ])
            except:
                if topic['_source']['en_name'] == 'sa-de-1500559853':
                    topic['_source'][
                        'en_name'] = '1478787000_1480255800_sa-de-1500559853_1788911247'
                results['recommend'][topic['_source']['en_name']] = [[
                    topic['_source']['name'], topic['_source']['start_ts'],
                    topic['_source']['end_ts'],
                    topic['_source']['comput_status'],
                    topic['_source']['submit_user']
                ]]
    query_own = {
        'query': {
            'filtered': {
                'filter': {
                    'term': {
                        'submit_user': user
                    }
                }
            }
        },
        "size": 1000
    }
    own_topics = weibo_es.search(index=topic_index_name,
                                 doc_type=topic_index_type,
                                 body=query_own)
    if own_topics:
        topics = own_topics['hits']['hits']
        for topic in topics:
            try:
                if topic['_source']['en_name'] == 'sa-de-1500559853':
                    topic['_source'][
                        'en_name'] = '1478787000_1480255800_sa-de-1500559853_1788911247'
                results['own'][topic['_source']['en_name']].append([
                    topic['_source']['name'], topic['_source']['start_ts'],
                    topic['_source']['comput_status'],
                    topic['_source']['end_ts']
                ])
            except:
                if topic['_source']['en_name'] == 'sa-de-1500559853':
                    topic['_source'][
                        'en_name'] = '1478787000_1480255800_sa-de-1500559853_1788911247'
                results['own'][topic['_source']['en_name']] = [[
                    topic['_source']['name'], topic['_source']['start_ts'],
                    topic['_source']['comput_status'],
                    topic['_source']['end_ts']
                ]]

            #print results
    return json.dumps(results)

コード例 #25

0

ファイルを表示

ファイル: utils.py プロジェクト: lvleilei/screen

def get_weibo_by_time(topic, start_ts, end_ts, sort_item='timestamp'):
    # print topic,start_ts,end_ts,weibo_es
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'range': {
                        'timestamp': {
                            'lte': int(end_ts),
                            'gte': int(start_ts)
                        }
                    }
                }]
            }
        },
        'size': 200,
        'sort': {
            sort_item: {
                'order': 'desc'
            }
        }
    }
    items = weibo_es.search(index=topic, body=query_body)['hits']['hits']
    #items = db.session.query(PropagateWeibos).filter(PropagateWeibos.topic==topic).all()
    weibo_dict = {}
    if items:
        for item in items:
            #print item,type(item)
            #mtype = item.mtype
            #weibos = _json_loads(item.weibos)
            weibo = item['_source']
            #print mtype
            weibo_content = {}
            weibo_content['text'] = weibo['text']
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = weibo['timestamp']
            #weibo_content['sentiment'] = weibo['sentiment']
            try:
                weibo_content['comment'] = weibo['comment']
            except:
                weibo_content['comment'] = 0
            try:
                weibo_content['retweeted'] = weibo['retweeted']
            except:
                weibo_content['retweeted'] = 0
            try:
                user = es_user_profile.get(index=profile_index_name,
                                           doc_type=profile_index_type,
                                           id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            #weibo_content['keywords'] = weibo['keywords_dict']
            weibo_content['mid'] = weibo['mid']
            #print weibo_content
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),
                         key=lambda x: x[1][sort_item],
                         reverse=1)

    else:
        results = []
        #results = sorted(weibo_dict.items(),key=lambda x:x[1]['retweeted'],reverse=False)
        #for result in results:
        #print result
    return results

コード例 #26

0

ファイルを表示

    topic = 'te-lang-pu-ji-xin-ge-1492166854'
    start_ts = 1478736000
    end_ts = 1480176000
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'term': {
                        'name': topic
                    }
                }, {
                    'range': {
                        'start_ts': {
                            'lte': start_ts
                        }
                    }
                }, {
                    'range': {
                        'end_ts': {
                            'gte': end_ts
                        }
                    }
                }]
            }
        }
    }

    symbol = weibo_es.search(index=topics_river_index_name,
                             doc_type=topics_river_index_type,
                             body=query_body)['hits']['hits'][0]['_source']
    print symbol