def get_person_value(uid): #认证类型 #print es_user_profile,profile_index_name,profile_index_type,uid try: value_static = es_bci_history.get(index = bci_history_index_name,doc_type = bci_history_index_type,id=uid) value_inf = es_user_portrait.get(index = portrait_index_name,doc_type = portrait_index_type,id=uid) static = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid) except: return 'no' fans_max = es_bci_history.search(index = bci_history_index_name,doc_type = bci_history_index_type,body={'query':{'match_all':{}},'sort':{'user_fansnum':{'order':'desc'}},'size':1})['hits']['hits'][0]['_source']['user_fansnum'] print 'max:',fans_max #print static['found'] if static['found']==False: return 'no' else: static = static['_source'] #print "static",static try: ver_calue = verified_value[static['verified_type']] except: ver_calue = 0 #账号创建时间 times = math.ceil((time.time()-int(static['create_at']))/31536000) #粉丝数 #person = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)['_source'] fans_value = math.log(float(value_static['_source']['user_fansnum'])/float(fans_max)*9+1,10) #fans_value = (math.log(static['fansnum']+1000000,100000000)-0.75)*4 #if fans_value>1: # fans_value=1.0 influence_max = es_user_portrait.search(index = portrait_index_name,doc_type = portrait_index_type,body={'query':{'match_all':{}},'sort':{'influence':{'order':'desc'}},'size':1})['hits']['hits'][0]['_source']['influence'] influence_value = float(value_inf['_source']['influence'])/float(influence_max) final= (ver_calue*0.1+times*0.05+fans_value+influence_value*1.2)*30 print ver_calue,times,fans_value,influence_value return final
def get_subopinion_new(topic,start_ts,end_ts,sort_item='timestamp'): # query_body = { # 'query':{ # 'filtered':{ # 'filter':{ # 'bool':{ # 'must':[ # {'match_phrase':{'name':topic}} # ] # } # } # } # } # } query_body = { "query": { "match_phrase": { "name": topic } } } # print query_body results = [] print topic weibos = weibo_es.get(index=subopinion_index_name,id=topic,doc_type="text")['_source'] print weibos for k,weibo in weibos.items(): if k != 'start_ts' and k !='end_ts': result = {} weibo_dict ={} raw_keys=weibo['keys'] raw_keys = raw_keys.split('_') result['keys']=get_mode(raw_keys) weibos_texts = json.loads(weibo['cluster_dump_dict']) for weibo_text in weibos_texts.values():#jln0825 weibo_text = weibo_text[0] weibo_content = {} weibo_content['text'] = weibo_text['text'] weibo_content['uid'] = weibo_text['uid'] weibo_content['timestamp'] = full_datetime2ts(weibo_text['datetime']) weibo_content['comment'] = weibo_text['comment'] weibo_content['retweeted'] = weibo_text['retweeted'] weibo_content['mid'] = weibo_text['id'] try: user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content result['weibos'] = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True) results.append(result) return results
def get_weibo_by_time(topic,start_ts,end_ts,sort_item='timestamp'): print topic,start_ts,end_ts,weibo_es query_body = { 'query':{ 'bool':{ 'must':[ {'range':{'timestamp':{'lte':int(end_ts),'gte':int(start_ts)}}} ] } }, 'size':200, 'sort':{sort_item:{'order':'desc'}} } items = weibo_es.search(index=topic,body=query_body)['hits']['hits'] #items = db.session.query(PropagateWeibos).filter(PropagateWeibos.topic==topic).all() weibo_dict = {} if items: for item in items: #print item,type(item) #mtype = item.mtype #weibos = _json_loads(item.weibos) weibo = item['_source'] #print mtype weibo_content = {} weibo_content['text'] = weibo['text'] weibo_content['uid'] = weibo['uid'] weibo_content['timestamp'] = weibo['timestamp'] #weibo_content['sentiment'] = weibo['sentiment'] try: weibo_content['comment'] = weibo['comment'] except: weibo_content['comment'] = 0 try: weibo_content['retweeted'] = weibo['retweeted'] except: weibo_content['retweeted'] = 0 try: user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' #weibo_content['keywords'] = weibo['keywords_dict'] weibo_content['mid'] = weibo['mid'] #print weibo_content weibo_dict[weibo_content['mid']] = weibo_content results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=1) else: results = [] #results = sorted(weibo_dict.items(),key=lambda x:x[1]['retweeted'],reverse=False) #for result in results: #print result return results
def get_weibo_content(topic,start_ts,end_ts,opinion,sort_item='timestamp'): #微博内容 weibo_dict = {} #a = json.dumps(opinion) #opinion = '圣保罗_班底_巴西_康熙' print 'opinion:::::::::',opinion print 'topic:::::::;:::',topic print type(start_ts),type(end_ts) query_body = { 'query':{ 'bool':{ 'must':[ {'wildcard':{'keys':opinion}}, {'term':{'name':topic}}, {'range':{'start_ts':{'lte':start_ts}}}, {'range':{'end_ts':{'gte':end_ts}}} ] } } } #没有查到uid 每次的id不一样 print query_body weibos = weibo_es.search(index=subopinion_index_name,doc_type=subopinion_index_type,body=query_body)['hits']['hits'] #print weibo_es,subopinion_index_name,subopinion_index_type,query_body print len(weibos) if weibos: weibos = json.loads(weibos[0]['_source']['cluster_dump_dict']) for weibo in weibos.values():#jln0825 weibo = weibo[0] weibo_content = {} weibo_content['text'] = weibo['text'] weibo_content['uid'] = weibo['uid'] weibo_content['timestamp'] = full_datetime2ts(weibo['datetime']) weibo_content['comment'] = weibo['comment'] weibo_content['retweeted'] = weibo['retweeted'] weibo_content['mid'] = weibo['id'] try: user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True) #print results return results else: return 'no results'
def get_weibo_content(topic, start_ts, end_ts, sort_item='timestamp', sen=0): #按时间、转发量、情绪类型 对微博排序 items = db.session.query(SentimentWeibos).filter(SentimentWeibos.end>start_ts, \ SentimentWeibos.end<=end_ts, \ SentimentWeibos.query==topic,\ SentimentWeibos.sentiment==sen).all() weibo_dict = {} for item in items: weibos = _json_loads(item.weibos) ori_text = set() for weibo in weibos: try: a = weibo['text'] except: continue if weibo['text'] not in ori_text: ori_text.add(weibo['text']) weibo_content = {} weibo_content['text'] = weibo['text'] weibo_content['uid'] = weibo['uid'] weibo_content['timestamp'] = weibo['timestamp'] weibo_content['sentiment'] = weibo['sentiment'] weibo_content['comment'] = weibo['comment'] weibo_content['retweeted'] = weibo['retweeted'] weibo_content['keywords'] = weibo['keywords_dict'] weibo_content['mid'] = weibo['mid'] try: user = es_user_profile.get( index=profile_index_name, doc_type=profile_index_type, id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content results = sorted(weibo_dict.items(), key=lambda x: x[1][sort_item], reverse=True) #print results db.session.close() return results
def get_weibo_content(topic,start_ts,end_ts,province,sort_item='timestamp',unit=Fifteenminutes): city = {} #print province.encode('utf8') # item = db.session.query(ProvinceWeibos).filter(ProvinceWeibos.end == 1468495800).all() # for i in item: # print i.province.encode('utf8') ###结果是unicode if (end_ts - start_ts < unit): upbound = long(math.ceil(end_ts / (unit * 1.0)) * unit) items = db.session.query(ProvinceWeibos).filter(ProvinceWeibos.end==upbound, \ ProvinceWeibos.province == province,\ ProvinceWeibos.topic==topic).all() else: upbound = long(math.ceil(end_ts / (unit * 1.0)) * unit) lowbound = long((start_ts / unit) * unit) items = db.session.query(ProvinceWeibos).filter(ProvinceWeibos.end>lowbound, \ ProvinceWeibos.end<=upbound, \ ProvinceWeibos.province == province,\ ProvinceWeibos.topic==topic).all() weibo_dict = {} for item in items: weibo = _json_loads(item.weibos) #for weibo in weibos: weibo_content = {} weibo_content['text'] = weibo['_source']['text'] weibo_content['uid'] = weibo['_source']['uid'] weibo_content['timestamp'] = weibo['_source']['timestamp'] weibo_content['sentiment'] = weibo['_source']['sentiment'] weibo_content['comment'] = weibo['_source']['comment'] weibo_content['retweeted'] = weibo['_source']['retweeted'] weibo_content['keywords'] = weibo['_source']['keywords_dict'] weibo_content['mid'] = weibo['_source']['mid'] try: user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True) #print results return results
def get_weibo_content(topic,start_ts,end_ts,sort_item='timestamp',sen=0): #按时间、转发量、情绪类型 对微博排序 items = db.session.query(SentimentWeibos).filter(SentimentWeibos.end>start_ts, \ SentimentWeibos.end<=end_ts, \ SentimentWeibos.query==topic,\ SentimentWeibos.sentiment==sen).all() weibo_dict = {} for item in items: weibos = _json_loads(item.weibos) ori_text = set() for weibo in weibos: try: a = weibo['text'] except: continue if weibo['text'] not in ori_text: ori_text.add(weibo['text']) weibo_content = {} weibo_content['text'] = weibo['text'] weibo_content['uid'] = weibo['uid'] weibo_content['timestamp'] = weibo['timestamp'] weibo_content['sentiment'] = weibo['sentiment'] weibo_content['comment'] = weibo['comment'] weibo_content['retweeted'] = weibo['retweeted'] weibo_content['keywords'] = weibo['keywords_dict'] weibo_content['mid'] = weibo['mid'] try: user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True) #print results return results
def get_person_value(uid): #认证类型 #print es_user_profile,profile_index_name,profile_index_type,uid try: value_static = es_bci_history.get(index=bci_history_index_name, doc_type=bci_history_index_type, id=uid) value_inf = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid) static = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type, id=uid) except: return 'no' fans_max = es_bci_history.search( index=bci_history_index_name, doc_type=bci_history_index_type, body={ 'query': { 'match_all': {} }, 'sort': { 'user_fansnum': { 'order': 'desc' } }, 'size': 1 })['hits']['hits'][0]['_source']['user_fansnum'] print 'max:', fans_max #print static['found'] if static['found'] == False: return 'no' else: static = static['_source'] #print "static",static try: ver_calue = verified_value[static['verified_type']] except: ver_calue = 0 #账号创建时间 times = math.ceil((time.time() - int(static['create_at'])) / 31536000) #粉丝数 #person = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)['_source'] fans_value = math.log( float(value_static['_source']['user_fansnum']) / float(fans_max) * 9 + 1, 10) #fans_value = (math.log(static['fansnum']+1000000,100000000)-0.75)*4 #if fans_value>1: # fans_value=1.0 influence_max = es_user_portrait.search( index=portrait_index_name, doc_type=portrait_index_type, body={ 'query': { 'match_all': {} }, 'sort': { 'influence': { 'order': 'desc' } }, 'size': 1 })['hits']['hits'][0]['_source']['influence'] influence_value = float( value_inf['_source']['influence']) / float(influence_max) final = (ver_calue * 0.1 + times * 0.05 + fans_value + influence_value * 1.2) * 30 print ver_calue, times, fans_value, influence_value return final
def get_weibo_content(topic, start_ts, end_ts, opinion, sort_item='timestamp'): #微博内容 weibo_dict = {} #a = json.dumps(opinion) #opinion = '圣保罗_班底_巴西_康熙' print 'opinion:::::::::', opinion print 'topic:::::::;:::', topic print type(start_ts), type(end_ts) query_body = { 'query': { 'bool': { 'must': [{ 'wildcard': { 'keys': opinion } }, { 'term': { 'name': topic } }, { 'range': { 'start_ts': { 'lte': start_ts } } }, { 'range': { 'end_ts': { 'gte': end_ts } } }] } } } #没有查到uid 每次的id不一样 print query_body weibos = weibo_es.search(index=subopinion_index_name, doc_type=subopinion_index_type, body=query_body)['hits']['hits'] #print weibo_es,subopinion_index_name,subopinion_index_type,query_body print len(weibos) if weibos: weibos = json.loads(weibos[0]['_source']['cluster_dump_dict']) for weibo in weibos.values(): #jln0825 weibo = weibo[0] weibo_content = {} weibo_content['text'] = weibo['text'] weibo_content['uid'] = weibo['uid'] weibo_content['timestamp'] = full_datetime2ts(weibo['datetime']) weibo_content['comment'] = weibo['comment'] weibo_content['retweeted'] = weibo['retweeted'] weibo_content['mid'] = weibo['id'] try: user = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type, id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content results = sorted(weibo_dict.items(), key=lambda x: x[1][sort_item], reverse=True) #print results return results else: return 'no results'
def get_subopinion_new(topic, start_ts, end_ts, sort_item='timestamp'): # query_body = { # 'query':{ # 'filtered':{ # 'filter':{ # 'bool':{ # 'must':[ # {'match_phrase':{'name':topic}} # ] # } # } # } # } # } query_body = {"query": {"match_phrase": {"name": topic}}} # print query_body results = [] print topic weibos = weibo_es.get(index=subopinion_index_name, id=topic, doc_type="text")['_source'] print weibos for k, weibo in weibos.items(): if k != 'start_ts' and k != 'end_ts': result = {} weibo_dict = {} raw_keys = weibo['keys'] raw_keys = raw_keys.split('_') result['keys'] = get_mode(raw_keys) weibos_texts = json.loads(weibo['cluster_dump_dict']) for weibo_text in weibos_texts.values(): #jln0825 weibo_text = weibo_text[0] weibo_content = {} weibo_content['text'] = weibo_text['text'] weibo_content['uid'] = weibo_text['uid'] weibo_content['timestamp'] = full_datetime2ts( weibo_text['datetime']) weibo_content['comment'] = weibo_text['comment'] weibo_content['retweeted'] = weibo_text['retweeted'] weibo_content['mid'] = weibo_text['id'] try: user = es_user_profile.get( index=profile_index_name, doc_type=profile_index_type, id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content result['weibos'] = sorted(weibo_dict.items(), key=lambda x: x[1][sort_item], reverse=True) results.append(result) return results
def get_weibo_by_time(topic, start_ts, end_ts, sort_item='timestamp'): # print topic,start_ts,end_ts,weibo_es query_body = { 'query': { 'bool': { 'must': [{ 'range': { 'timestamp': { 'lte': int(end_ts), 'gte': int(start_ts) } } }] } }, 'size': 200, 'sort': { sort_item: { 'order': 'desc' } } } items = weibo_es.search(index=topic, body=query_body)['hits']['hits'] #items = db.session.query(PropagateWeibos).filter(PropagateWeibos.topic==topic).all() weibo_dict = {} if items: for item in items: #print item,type(item) #mtype = item.mtype #weibos = _json_loads(item.weibos) weibo = item['_source'] #print mtype weibo_content = {} weibo_content['text'] = weibo['text'] weibo_content['uid'] = weibo['uid'] weibo_content['timestamp'] = weibo['timestamp'] #weibo_content['sentiment'] = weibo['sentiment'] try: weibo_content['comment'] = weibo['comment'] except: weibo_content['comment'] = 0 try: weibo_content['retweeted'] = weibo['retweeted'] except: weibo_content['retweeted'] = 0 try: user = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type, id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' #weibo_content['keywords'] = weibo['keywords_dict'] weibo_content['mid'] = weibo['mid'] #print weibo_content weibo_dict[weibo_content['mid']] = weibo_content results = sorted(weibo_dict.items(), key=lambda x: x[1][sort_item], reverse=1) else: results = [] #results = sorted(weibo_dict.items(),key=lambda x:x[1]['retweeted'],reverse=False) #for result in results: #print result return results