def get_subopinion_new(topic,start_ts,end_ts,sort_item='timestamp'): # query_body = { # 'query':{ # 'filtered':{ # 'filter':{ # 'bool':{ # 'must':[ # {'match_phrase':{'name':topic}} # ] # } # } # } # } # } query_body = { "query": { "match_phrase": { "name": topic } } } # print query_body results = [] print topic weibos = weibo_es.get(index=subopinion_index_name,id=topic,doc_type="text")['_source'] print weibos for k,weibo in weibos.items(): if k != 'start_ts' and k !='end_ts': result = {} weibo_dict ={} raw_keys=weibo['keys'] raw_keys = raw_keys.split('_') result['keys']=get_mode(raw_keys) weibos_texts = json.loads(weibo['cluster_dump_dict']) for weibo_text in weibos_texts.values():#jln0825 weibo_text = weibo_text[0] weibo_content = {} weibo_content['text'] = weibo_text['text'] weibo_content['uid'] = weibo_text['uid'] weibo_content['timestamp'] = full_datetime2ts(weibo_text['datetime']) weibo_content['comment'] = weibo_text['comment'] weibo_content['retweeted'] = weibo_text['retweeted'] weibo_content['mid'] = weibo_text['id'] try: user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content result['weibos'] = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True) results.append(result) return results
def get_symbol_weibo(topic,start_ts,end_ts,unit=MinInterval): #鱼骨图 # print topic weibos = {} # query_body = { # 'query':{ # 'bool':{ # 'must':[ # {'term':{'name':topic}}, # {'range':{'start_ts':{'lte':start_ts}}}, # {'range':{'end_ts':{'gte':end_ts}}} # ] # } # } # } query_body = { "query": { "match_phrase": { "name": topic } } } try: symbol = weibo_es.search(index=topics_river_index_name,doc_type=topics_river_index_type,body=query_body)['hits']['hits'][0]['_source'] except: return 0 features = json.loads(symbol['features']) symbol_weibos = json.loads(symbol['cluster_dump_dict']) # print 'dadfhsjdbfsdjdh' # print symbol_weibos begin_ts = end_ts - unit for clusterid,contents in symbol_weibos.iteritems(): j = 0 content = set() for i in contents: ts = full_datetime2ts(i['datetime']) # title = re.findall(r'【.*】',i['content'].encode('utf8'))[0] title = i['content'][:7] if ts >= start_ts and ts <= end_ts and title not in content: #start_ts应该改成begin_ts,现在近15分钟没数据,所以用所有的 try: weibos[','.join(features[clusterid][:5])].append(i) except: weibos[','.join(features[clusterid][:5])] = [i] content.add(title) j += 1 #print content if j == 3: break #print weibos return weibos
def get_weibo_content(topic,start_ts,end_ts,opinion,sort_item='timestamp'): #微博内容 weibo_dict = {} #a = json.dumps(opinion) #opinion = '圣保罗_班底_巴西_康熙' print 'opinion:::::::::',opinion print 'topic:::::::;:::',topic print type(start_ts),type(end_ts) query_body = { 'query':{ 'bool':{ 'must':[ {'wildcard':{'keys':opinion}}, {'term':{'name':topic}}, {'range':{'start_ts':{'lte':start_ts}}}, {'range':{'end_ts':{'gte':end_ts}}} ] } } } #没有查到uid 每次的id不一样 print query_body weibos = weibo_es.search(index=subopinion_index_name,doc_type=subopinion_index_type,body=query_body)['hits']['hits'] #print weibo_es,subopinion_index_name,subopinion_index_type,query_body print len(weibos) if weibos: weibos = json.loads(weibos[0]['_source']['cluster_dump_dict']) for weibo in weibos.values():#jln0825 weibo = weibo[0] weibo_content = {} weibo_content['text'] = weibo['text'] weibo_content['uid'] = weibo['uid'] weibo_content['timestamp'] = full_datetime2ts(weibo['datetime']) weibo_content['comment'] = weibo['comment'] weibo_content['retweeted'] = weibo['retweeted'] weibo_content['mid'] = weibo['id'] try: user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True) #print results return results else: return 'no results'
def get_symbol_weibo(topic, start_ts, end_ts, unit=MinInterval): #鱼骨图 # print topic weibos = {} # query_body = { # 'query':{ # 'bool':{ # 'must':[ # {'term':{'name':topic}}, # {'range':{'start_ts':{'lte':start_ts}}}, # {'range':{'end_ts':{'gte':end_ts}}} # ] # } # } # } query_body = {"query": {"match_phrase": {"name": topic}}} try: symbol = weibo_es.search(index=topics_river_index_name, doc_type=topics_river_index_type, body=query_body)['hits']['hits'][0]['_source'] except: return 0 features = json.loads(symbol['features']) symbol_weibos = json.loads(symbol['cluster_dump_dict']) # print 'dadfhsjdbfsdjdh' # print symbol_weibos begin_ts = end_ts - unit for clusterid, contents in symbol_weibos.iteritems(): j = 0 content = set() for i in contents: ts = full_datetime2ts(i['datetime']) # title = re.findall(r'【.*】',i['content'].encode('utf8'))[0] title = i['content'][:7] if ts >= start_ts and ts <= end_ts and title not in content: #start_ts应该改成begin_ts,现在近15分钟没数据,所以用所有的 try: weibos[','.join(features[clusterid][:5])].append(i) except: weibos[','.join(features[clusterid][:5])] = [i] content.add(title) j += 1 #print content if j == 3: break #print weibos return weibos
def get_weibo_content(topic, start_ts, end_ts, opinion, sort_item='timestamp'): #微博内容 weibo_dict = {} #a = json.dumps(opinion) #opinion = '圣保罗_班底_巴西_康熙' print 'opinion:::::::::', opinion print 'topic:::::::;:::', topic print type(start_ts), type(end_ts) query_body = { 'query': { 'bool': { 'must': [{ 'wildcard': { 'keys': opinion } }, { 'term': { 'name': topic } }, { 'range': { 'start_ts': { 'lte': start_ts } } }, { 'range': { 'end_ts': { 'gte': end_ts } } }] } } } #没有查到uid 每次的id不一样 print query_body weibos = weibo_es.search(index=subopinion_index_name, doc_type=subopinion_index_type, body=query_body)['hits']['hits'] #print weibo_es,subopinion_index_name,subopinion_index_type,query_body print len(weibos) if weibos: weibos = json.loads(weibos[0]['_source']['cluster_dump_dict']) for weibo in weibos.values(): #jln0825 weibo = weibo[0] weibo_content = {} weibo_content['text'] = weibo['text'] weibo_content['uid'] = weibo['uid'] weibo_content['timestamp'] = full_datetime2ts(weibo['datetime']) weibo_content['comment'] = weibo['comment'] weibo_content['retweeted'] = weibo['retweeted'] weibo_content['mid'] = weibo['id'] try: user = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type, id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content results = sorted(weibo_dict.items(), key=lambda x: x[1][sort_item], reverse=True) #print results return results else: return 'no results'
def get_subopinion_new(topic, start_ts, end_ts, sort_item='timestamp'): # query_body = { # 'query':{ # 'filtered':{ # 'filter':{ # 'bool':{ # 'must':[ # {'match_phrase':{'name':topic}} # ] # } # } # } # } # } query_body = {"query": {"match_phrase": {"name": topic}}} # print query_body results = [] print topic weibos = weibo_es.get(index=subopinion_index_name, id=topic, doc_type="text")['_source'] print weibos for k, weibo in weibos.items(): if k != 'start_ts' and k != 'end_ts': result = {} weibo_dict = {} raw_keys = weibo['keys'] raw_keys = raw_keys.split('_') result['keys'] = get_mode(raw_keys) weibos_texts = json.loads(weibo['cluster_dump_dict']) for weibo_text in weibos_texts.values(): #jln0825 weibo_text = weibo_text[0] weibo_content = {} weibo_content['text'] = weibo_text['text'] weibo_content['uid'] = weibo_text['uid'] weibo_content['timestamp'] = full_datetime2ts( weibo_text['datetime']) weibo_content['comment'] = weibo_text['comment'] weibo_content['retweeted'] = weibo_text['retweeted'] weibo_content['mid'] = weibo_text['id'] try: user = es_user_profile.get( index=profile_index_name, doc_type=profile_index_type, id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content result['weibos'] = sorted(weibo_dict.items(), key=lambda x: x[1][sort_item], reverse=True) results.append(result) return results