def get_during_keywords(topic, start_ts, end_ts): #关键词云,unit=MinInterval keywords = [] # if (end_ts-start_ts)>unit: # begin_ts = end_ts-unit # else: # begin_ts = start_ts # print begin_ts,end_ts query_body = { 'query': { 'filtered': { 'filter': { 'range': { 'timestamp': { 'gte': start_ts, 'lt': end_ts } } } } }, 'size': MAX_LANGUAGE_WEIBO } keywords_dict = {} weibo_text = [] keyword_weibo = weibo_es.search(index=topic, doc_type=weibo_index_type, body=query_body)['hits']['hits'] for key_weibo in keyword_weibo: weibo_text.append(key_weibo['_source']['text'].encode('utf-8')) keywords_dict = get_weibo(weibo_text, n_gram=2, n_count=100) ''' print keyword_weibo for key_weibo in keyword_weibo: keywords_dict_list = json.loads(key_weibo['_source']['keywords_dict']) # #print keywords_dict_list,type(keywords_dict_list) for k,v in keywords_dict_list.iteritems(): try: keywords_dict[k] += v except: keywords_dict[k] = v ''' word_results = sorted(keywords_dict.iteritems(), key=lambda x: x[1], reverse=True)[:MAX_FREQUENT_WORDS] return json.dumps(word_results)
def get_during_keywords(topic,start_ts,end_ts): #关键词云,unit=MinInterval keywords = [] # if (end_ts-start_ts)>unit: # begin_ts = end_ts-unit # else: # begin_ts = start_ts # print begin_ts,end_ts query_body = { 'query':{ 'filtered':{ 'filter':{ 'range':{ 'timestamp':{'gte': start_ts, 'lt':end_ts} } } } }, 'size':MAX_LANGUAGE_WEIBO } keywords_dict = {} weibo_text = [] keyword_weibo = weibo_es.search(index=topic,doc_type=weibo_index_type,body=query_body)['hits']['hits'] for key_weibo in keyword_weibo: weibo_text.append(key_weibo['_source']['text'].encode('utf-8')) keywords_dict = get_weibo(weibo_text,n_gram=2,n_count=100) ''' print keyword_weibo for key_weibo in keyword_weibo: keywords_dict_list = json.loads(key_weibo['_source']['keywords_dict']) # #print keywords_dict_list,type(keywords_dict_list) for k,v in keywords_dict_list.iteritems(): try: keywords_dict[k] += v except: keywords_dict[k] = v ''' word_results = sorted(keywords_dict.iteritems(),key=lambda x:x[1],reverse=True)[:MAX_FREQUENT_WORDS] return json.dumps(word_results)