def get_sort(uid,fe): result = {} try: u_bci = es.get(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE, id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0] #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0] result['in_score'] = u_bci except: result['in_score']=""; query_body={ 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'range':{'bci_week_ave':{'gte':u_bci}}}, {'term':{'topic_string':fe}}] } } } } } result['in_top'] = es.search(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE,body=query_body)['hits']['total'] print 'essearch' print es.search(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE,body=query_body) try: u_bci = es.get(index='bci_history', doc_type='bci', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0] #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0] print "trymax" bci_max = get_max_value(es_user_profile, "bci_history", "bci") print "max",bci_max result['all_score'] = math.log(u_bci/float(bci_max)*9+1,10)*100 except: result['all_score']="" result ['all_top']="" query_body={ 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'range':{'bci_week_ave':{'gte':u_bci}}}] } } } } } result['all_top'] = es.search(index='bci_history', doc_type='bci',body=query_body)['hits']['total'] #result = es.search(index='user_portrait_1222', doc_type='user',body=query_body) # return json.dumps([result['hits']['total'],u_bci]) return json.dumps(result)
def get_person_value(uid): #认证类型 #print es_user_profile,profile_index_name,profile_index_type,uid try: value_static = es_bci_history.get(index = bci_history_index_name,doc_type = bci_history_index_type,id=uid) value_inf = es_user_portrait.get(index = portrait_index_name,doc_type = portrait_index_type,id=uid) static = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid) except: return 'no' fans_max = es_bci_history.search(index = bci_history_index_name,doc_type = bci_history_index_type,body={'query':{'match_all':{}},'sort':{'user_fansnum':{'order':'desc'}},'size':1})['hits']['hits'][0]['_source']['user_fansnum'] print 'max:',fans_max #print static['found'] if static['found']==False: return 'no' else: static = static['_source'] #print "static",static try: ver_calue = verified_value[static['verified_type']] except: ver_calue = 0 #账号创建时间 times = math.ceil((time.time()-int(static['create_at']))/31536000) #粉丝数 #person = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)['_source'] fans_value = math.log(float(value_static['_source']['user_fansnum'])/float(fans_max)*9+1,10) #fans_value = (math.log(static['fansnum']+1000000,100000000)-0.75)*4 #if fans_value>1: # fans_value=1.0 influence_max = es_user_portrait.search(index = portrait_index_name,doc_type = portrait_index_type,body={'query':{'match_all':{}},'sort':{'influence':{'order':'desc'}},'size':1})['hits']['hits'][0]['_source']['influence'] influence_value = float(value_inf['_source']['influence'])/float(influence_max) final= (ver_calue*0.1+times*0.05+fans_value+influence_value*1.2)*30 print ver_calue,times,fans_value,influence_value return final
def ajax_revise_task(): task_name = request.args.get('task_name', '') # must finish = request.args.get("finish", "10") stop_time = request.args.get('stop_time', '') # timestamp user = request.args.get('user', '') #now_ts = datetime2ts("2013-09-06") _id = user + '-' + task_name now_ts = time.time() if stop_time and stop_time < now_ts: return json.dumps([]) if task_name and user: task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] if stop_time: task_detail['stop_time'] = stop_time if int(finish) == 0: task_detail['finish'] = finish task_detail['processing_status'] = "1" # 重启时将处理状态改为 if stop_time or int(finish) == 0: es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail) return json.dumps(['1']) return json.dumps([])
def get_group_user_track(uid): results = [] #step1:get user_portrait activity_geo_dict try: portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\ id=uid, _source=False, fields=['activity_geo_dict']) except: portrait_result = {} if portrait_result == {}: return 'uid is not in user_portrait' activity_geo_dict = json.loads( portrait_result['fields']['activity_geo_dict'][0]) now_date_ts = datetime2ts(ts2datetime(int(time.time()))) start_ts = now_date_ts - DAY * len(activity_geo_dict) #step2: iter date to get month track for geo_item in activity_geo_dict: iter_date = ts2datetime(start_ts) sort_day_dict = sorted(geo_item.items(), key=lambda x: x[1], reverse=True) if sort_day_dict: results.append([iter_date, sort_day_dict[0][0]]) else: results.append([iter_date, '']) start_ts = start_ts + DAY return results
def ajax_get_group_detail(): task_name = request.args.get('task_name','') # task_name user = request.args.get('user', '') _id = user + '-' + task_name portrait_detail = [] top_activeness = get_top_influence("activeness") top_influence = get_top_influence("influence") top_importance = get_top_influence("importance") search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {}) if search_result: try: uid_list = json.loads(search_result['uid_list']) except: uid_list = search_result['uid_list'] if uid_list: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, fields=SOCIAL_SENSOR_INFO)['docs'] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["fields"][iter_item][0].split('&')) temp.append(item["fields"][iter_item][0].split('&')) elif iter_item == "activeness": temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100) elif iter_item == "importance": temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100) elif iter_item == "influence": temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return json.dumps(portrait_detail)
def get_text_detail(task_name, ts, text_type, user, order, size=100): results = [] _id = user + '-' + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"] social_sensors = json.loads(task_detail["social_sensors"]) #print social_sensors if int(text_type) == 0: # 热门原创微博 results = get_origin_weibo_detail(ts, user, task_name, size, order, 1) elif int(text_type) == 1: # 热门转发微博 results = get_origin_weibo_detail(ts, user, task_name, size, order, 2) elif int(text_type) == 2: # 普通转发微博 results = get_retweet_weibo_detail(ts, user, task_name, size, "message_type", 3) elif int(text_type) == 3: # 普通评论微博 results = get_retweet_weibo_detail(ts, user, task_name, size, "message_type", 2) elif int(text_type) == 4: # 积极微博 results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", "1") elif int(text_type) == 5: # 中性微博 results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", "0") elif int(text_type) == 6: # 消极微博 results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", ["2", "3", "4", "5", "6"]) elif int(text_type) == 7: # 敏感微博 results = get_origin_weibo_detail(ts, user, task_name, size, order, 3) else: print "error" print '******************' #print results return results
def search_identify_uid(uid): result = 0 try: user_dict = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid) result = 1 except: result = 0 return result
def ajax_stop_task(): task_name = request.args.get('task_name','') # must user = request.args.get('user', '') if task_name and user: _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] #task_detail["finish"] = finish_signal task_detail['processing_status'] = '0' es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail) return json.dumps(['1']) else: return json.dumps([])
def ajax_get_group_detail(): task_name = request.args.get('task_name', '') # task_name user = request.args.get('user', '') _id = user + '-' + task_name portrait_detail = [] top_activeness = get_top_influence("activeness") top_influence = get_top_influence("influence") top_importance = get_top_influence("importance") search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {}) if search_result: try: uid_list = json.loads(search_result['uid_list']) except: uid_list = search_result['uid_list'] if uid_list: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list}, fields=SOCIAL_SENSOR_INFO)['docs'] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append( item["fields"][iter_item][0].split('&')) temp.append( item["fields"][iter_item][0].split('&')) elif iter_item == "activeness": temp.append( math.log( item['fields']['activeness'][0] / float(top_activeness) * 9 + 1, 10) * 100) elif iter_item == "importance": temp.append( math.log( item['fields']['importance'][0] / float(top_importance) * 9 + 1, 10) * 100) elif iter_item == "influence": temp.append( math.log( item['fields']['influence'][0] / float(top_influence) * 9 + 1, 10) * 100) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return json.dumps(portrait_detail)
def ajax_get_task_detail_info(): task_name = request.args.get('task_name', '') # task_name user = request.args.get('user', 'admin') _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] task_detail["social_sensors"] = json.loads(task_detail["social_sensors"]) #task_detail['keywords'] = json.loads(task_detail['keywords']) #task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"]) history_status = json.loads(task_detail['history_status']) if history_status: temp_list = [] """ temp_list.append(history_status[-1]) print history_status for item in history_status[:-1]: temp_list.append(item) """ sorted_list = sorted(history_status, key=lambda x: x, reverse=True) task_detail['history_status'] = sorted_list else: task_detail['history_status'] = [] task_detail['social_sensors_portrait'] = [] portrait_detail = [] if task_detail["social_sensors"]: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs'] if search_results: for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["_source"][iter_item].split('&')) else: temp.append(item["_source"][iter_item]) portrait_detail.append(temp) if portrait_detail: portrait_detail = sorted(portrait_detail, key=lambda x: x[5], reverse=True) task_detail['social_sensors_portrait'] = portrait_detail return json.dumps(task_detail)
def new_get_sensitive_words(uid): try: user_portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\ id=uid)['_source'] except: user_portrait_result = {} if user_portrait_result: try: sensitive_dict = json.loads(user_portrait_result['sensitive_dict']) except: sensitive_dict = {} else: sensitive_dict = {} sort_sensitive_dict = sorted(sensitive_dict.items(), key=lambda x:x[1], reverse=True) return sort_sensitive_dict
def ajax_stop_task(): task_name = request.args.get('task_name', '') # must user = request.args.get('user', '') if task_name and user: _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] #task_detail["finish"] = finish_signal task_detail['processing_status'] = '0' es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail) return json.dumps(['1']) else: return json.dumps([])
def conclusion_on_activeness(uid): # test index_name = copy_portrait_index_name index_type = copy_portrait_index_type try: influ_result = es.get(index=index_name, doc_type=index_type, id=uid)['_source'] except: influ_result = {} result = activeness_conclusion_dict['0'] return result # generate time series---keys now_ts = time.time() now_ts = datetime2ts('2013-09-12') activeness_set = set() for i in range(N): ts = ts2datetime(now_ts - i * 3600 * 24) activeness_set.add(pre_activeness + ts) # 区分影响力和活跃度的keys keys_set = set(influ_result.keys()) activeness_keys = keys_set & activeness_set if activeness_keys: activeness_value = [] for key in activeness_keys: activeness_value.append(influ_result[key]) mean, std_var = level(activeness_value) if mean < activeness_level[0]: result = activeness_conclusion_dict['1'] elif mean >= activeness_level[0] and mean < activeness_level[1]: result = activeness_conclusion_dict['2'] elif mean >= activeness_level[1] and mean < activeness_level[2]: result = activeness_conclusion_dict["3"] elif mean >= activeness_level[2] and mean < activeness_level[3]: result = activeness_conclusion_dict["4"] else: result = activeness_conclusion_dict["5"] else: result = conclusion_dict['0'] return result
def conclusion_on_activeness(uid): # test index_name = copy_portrait_index_name index_type = copy_portrait_index_type try: influ_result = es.get(index=index_name, doc_type=index_type, id=uid)['_source'] except: influ_result = {} result = activeness_conclusion_dict['0'] return result # generate time series---keys now_ts = time.time() now_ts = datetime2ts('2013-09-12') activeness_set = set() for i in range(N): ts = ts2datetime(now_ts - i*3600*24) activeness_set.add(pre_activeness+ts) # 区分影响力和活跃度的keys keys_set = set(influ_result.keys()) activeness_keys = keys_set & activeness_set if activeness_keys: activeness_value = [] for key in activeness_keys: activeness_value.append(influ_result[key]) mean, std_var = level(activeness_value) if mean < activeness_level[0]: result = activeness_conclusion_dict['1'] elif mean >= activeness_level[0] and mean < activeness_level[1]: result = activeness_conclusion_dict['2'] elif mean >= activeness_level[1] and mean < activeness_level[2]: result = activeness_conclusion_dict["3"] elif mean >= activeness_level[2] and mean < activeness_level[3]: result = activeness_conclusion_dict["4"] else: result = activeness_conclusion_dict["5"] else: result = conclusion_dict['0'] return result
def ajax_get_task_detail_info(): task_name = request.args.get('task_name','') # task_name user = request.args.get('user', 'admin') _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] task_detail["social_sensors"] = json.loads(task_detail["social_sensors"]) #task_detail['keywords'] = json.loads(task_detail['keywords']) #task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"]) history_status = json.loads(task_detail['history_status']) if history_status: temp_list = [] """ temp_list.append(history_status[-1]) print history_status for item in history_status[:-1]: temp_list.append(item) """ sorted_list = sorted(history_status, key=lambda x:x, reverse=True) task_detail['history_status'] = sorted_list else: task_detail['history_status'] = [] task_detail['social_sensors_portrait'] = [] portrait_detail = [] if task_detail["social_sensors"]: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs'] if search_results: for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["_source"][iter_item].split('&')) else: temp.append(item["_source"][iter_item]) portrait_detail.append(temp) if portrait_detail: portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True) task_detail['social_sensors_portrait'] = portrait_detail return json.dumps(task_detail)
def ajax_get_clustering_topic(): task_name = request.args.get('task_name','') # task_name user = request.args.get('user', '') ts = int(request.args.get('ts', '')) # timestamp: 123456789 topic_list = [] _id = user + '-' + task_name task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] #burst_reason = task_detail['burst_reason'] burst_reason = 1 filter_list = [] if burst_reason: topic_list = task_detail.get("clustering_topic", []) if topic_list: topic_list = json.loads(topic_list) for item in topic_list: tmp = [] for word in item: if len(word) > 1: tmp.append(word) filter_list.append(tmp) return json.dumps(filter_list[:5])
def ajax_get_clustering_topic(): task_name = request.args.get('task_name', '') # task_name user = request.args.get('user', '') ts = int(request.args.get('ts', '')) # timestamp: 123456789 topic_list = [] _id = user + '-' + task_name task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] #burst_reason = task_detail['burst_reason'] burst_reason = 1 filter_list = [] if burst_reason: topic_list = task_detail.get("clustering_topic", []) if topic_list: topic_list = json.loads(topic_list) for item in topic_list: tmp = [] for word in item: if len(word) > 1: tmp.append(word) filter_list.append(tmp) return json.dumps(filter_list[:5])
def ajax_revise_task(): task_name = request.args.get('task_name','') # must finish = request.args.get("finish", "10") stop_time = request.args.get('stop_time', '') # timestamp user = request.args.get('user', '') #now_ts = datetime2ts("2013-09-06") _id = user + '-' + task_name now_ts = time.time() if stop_time and stop_time < now_ts: return json.dumps([]) if task_name and user: task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] if stop_time: task_detail['stop_time'] = stop_time if int(finish) == 0: task_detail['finish'] = finish task_detail['processing_status'] = "1" # 重启时将处理状态改为 if stop_time or int(finish) == 0: es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail) return json.dumps(['1']) return json.dumps([])
def get_group_user_track(uid): results = [] #step1:get user_portrait activity_geo_dict try: portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\ id=uid, _source=False, fields=['activity_geo_dict']) except: portrait_result = {} if portrait_result == {}: return 'uid is not in user_portrait' activity_geo_dict = json.loads(portrait_result['fields']['activity_geo_dict'][0]) now_date_ts = datetime2ts(ts2datetime(int(time.time()))) start_ts = now_date_ts - DAY * len(activity_geo_dict) #step2: iter date to get month track for geo_item in activity_geo_dict: iter_date = ts2datetime(start_ts) sort_day_dict = sorted(geo_item.items(), key=lambda x:x[1], reverse=True) if sort_day_dict: results.append([iter_date, sort_day_dict[0][0]]) else: results.append([iter_date, '']) start_ts = start_ts + DAY return results
def get_origin_weibo_detail(ts, user, task_name, size, order, message_type=1): _id = user + '-' + task_name task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] print '37',index_sensing_task,_id mid_value = json.loads(task_detail['mid_topic_value']) duplicate_dict = json.loads(task_detail['duplicate_dict']) tmp_duplicate_dict = dict() for k,v in duplicate_dict.iteritems(): try: tmp_duplicate_dict[v].append(k) except: tmp_duplicate_dict[v] = [k, v] if message_type == 1: weibo_detail = json.loads(task_detail['origin_weibo_detail']) elif message_type == 2: weibo_detail = json.loads(task_detail['retweeted_weibo_detail']) else: weibo_detail = json.loads(task_detail['sensitive_weibo_detail']) weibo_detail_list = [] if weibo_detail: for iter_mid, item in weibo_detail.iteritems(): tmp = [] tmp.append(iter_mid) tmp.append(item[iter_mid]) tmp.append(item['retweeted']) tmp.append(item['comment']) weibo_detail_list.append(tmp) mid_list = weibo_detail.keys() print len(mid_list) results = [] query_body = { "query":{ "filtered":{ "filter":{ "terms":{"mid": mid_list} } } }, "size": 1000, "sort": {"timestamp": {"order": "desc"}} } index_list = [] datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts-DAY) index_name = flow_text_index_name_pre + datetime print es_text exist_es = es_text.indices.exists(index_name) print exist_es if exist_es: index_list.append(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) if exist_es_1: index_list.append(index_name_1) if index_list and mid_list: search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] uid_list = [] text_dict = dict() # 文本信息 portrait_dict = dict() # 背景信息 sort_results = [] if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) text_dict[item['_id']] = item['_source'] # _id是mid if uid_list: portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"] for item in portrait_result: if item['found']: portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]} else: portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""} if order == "total": sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True)[:10] elif order == "retweeted": sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True)[:10] elif order == "comment": sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True)[:10] else: sorted_list = weibo_detail_list count_n = 0 results_dict = dict() mid_index_dict = dict() for item in sorted_list: # size mid = item[0] iter_text = text_dict.get(mid, {}) temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, keywords_string, message_type if iter_text: uid = iter_text['uid'] temp.append(uid) iter_portrait = portrait_dict.get(uid, {}) if iter_portrait: temp.append(iter_portrait['nick_name']) temp.append(iter_portrait['photo_url']) else: temp.extend([uid,'']) temp.append(iter_text["text"]) temp.append(iter_text["sentiment"]) temp.append(ts2date(iter_text['timestamp'])) temp.append(iter_text['geo']) if message_type == 1: temp.append(1) elif message_type == 2: temp.append(3) else: temp.append(iter_text['message_type']) #jln 提取关键词 f_key = get_weibo_single(iter_text['text']) temp.append(sorted(f_key.iteritems(),key=lambda x:x[1],reverse=True)) temp.append(item[2]) temp.append(item[3]) temp.append(iter_text.get('sensitive', 0)) temp.append(iter_text['timestamp']) temp.append(mid_value[mid]) temp.append(mid) results.append(temp) count_n += 1 results = sorted(results, key=operator.itemgetter(-4, -2, -6), reverse=True) # -4 -2 -3 sort_results = [] count = 0 for item in results: sort_results.append([item]) mid_index_dict[item[-1]] = count count += 1 if tmp_duplicate_dict: remove_list = [] value_list = tmp_duplicate_dict.values() # [[mid, mid], ] for item in value_list: tmp = [] for mid in item: if mid_index_dict.get(mid, 0): tmp.append(mid_index_dict[mid]) if len(tmp) > 1: tmp_min = min(tmp) else: continue tmp.remove(tmp_min) for iter_count in tmp: sort_results[tmp_min].extend(sort_results[iter_count]) remove_list.append(sort_results[iter_count]) if remove_list: for item in remove_list: sort_results.remove(item) return sort_results
def get_person_value(uid): #认证类型 #print es_user_profile,profile_index_name,profile_index_type,uid try: value_static = es_bci_history.get(index=bci_history_index_name, doc_type=bci_history_index_type, id=uid) value_inf = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid) static = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type, id=uid) except: return 'no' fans_max = es_bci_history.search( index=bci_history_index_name, doc_type=bci_history_index_type, body={ 'query': { 'match_all': {} }, 'sort': { 'user_fansnum': { 'order': 'desc' } }, 'size': 1 })['hits']['hits'][0]['_source']['user_fansnum'] print 'max:', fans_max #print static['found'] if static['found'] == False: return 'no' else: static = static['_source'] #print "static",static try: ver_calue = verified_value[static['verified_type']] except: ver_calue = 0 #账号创建时间 times = math.ceil((time.time() - int(static['create_at'])) / 31536000) #粉丝数 #person = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)['_source'] fans_value = math.log( float(value_static['_source']['user_fansnum']) / float(fans_max) * 9 + 1, 10) #fans_value = (math.log(static['fansnum']+1000000,100000000)-0.75)*4 #if fans_value>1: # fans_value=1.0 influence_max = es_user_portrait.search( index=portrait_index_name, doc_type=portrait_index_type, body={ 'query': { 'match_all': {} }, 'sort': { 'influence': { 'order': 'desc' } }, 'size': 1 })['hits']['hits'][0]['_source']['influence'] influence_value = float( value_inf['_source']['influence']) / float(influence_max) final = (ver_calue * 0.1 + times * 0.05 + fans_value + influence_value * 1.2) * 30 print ver_calue, times, fans_value, influence_value return final
def get_retweet_weibo_detail(ts, user, task_name, size, text_type, type_value): _id = user + '-' + task_name task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] origin_weibo_detail = json.loads(task_detail['origin_weibo_detail']) retweeted_weibo_detail = json.loads(task_detail['retweeted_weibo_detail']) mid_list = [] mid_list.extend(origin_weibo_detail.keys()) mid_list.extend(retweeted_weibo_detail.keys()) query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "range": { "timestamp": { "gte": ts - time_interval, "lt": ts } } }, { "terms": { "root_mid": mid_list } }] } } } }, "sort": { "timestamp": { "order": "desc" } }, "size": 100 } if text_type == "message_type": query_body['query']['filtered']['filter']['bool']['must'].append( {"term": { text_type: type_value }}) if text_type == "sentiment": #if isinstance(type_value, str): if len(type_value) == 1: query_body['query']['filtered']['filter']['bool']['must'].append( {"term": { text_type: type_value }}) else: query_body['query']['filtered']['filter']['bool']['must'].append( {"terms": { text_type: type_value }}) datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts - time_interval) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) # 1. 查询微博 if datetime == datetime_1 and exist_es: search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] elif datetime != datetime_1 and exist_es_1: search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] #print search_results # 2. 获取微博相关信息 results = [] uid_list = [] if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) if uid_list: portrait_result = es_profile.mget( index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list}, fields=['nick_name', 'photo_url'])["docs"] for i in range(len(uid_list)): item = search_results[i]['_source'] temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type temp.append(item['uid']) if portrait_result[i]['found']: temp.append(portrait_result[i]["fields"]["nick_name"][0]) temp.append(portrait_result[i]["fields"]["photo_url"][0]) else: temp.append(item['uid']) temp.append("") temp.append(item["text"]) #print item['text'] temp.append(item["sentiment"]) temp.append(ts2date(item['timestamp'])) temp.append(item['geo']) temp.append(item["message_type"]) results.append(temp) return results
def conclusion_on_influence(uid): # test index_name = copy_portrait_index_name index_type = copy_portrait_index_type total_number = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type)["count"] try: influ_result = es.get(index=index_name, doc_type=index_type, id=uid)['_source'] except: influ_result = {} result = [0, 0, 0, 0, 0, 0, total_number] # aver_activeness, sorted, aver_influence, sorted return result aver_activeness = influ_result.get("aver_activeness", 0) aver_influence = influ_result.get("aver_influence", 0) aver_importance = influ_result.get('aver_importance', 0) influence_query_body = { "query":{ "match_all": {} }, "sort": {"aver_influence": {"order": "desc"}}, "size": 1 } top_influence = es.search(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body)['hits']['hits'][0]['sort'][0] importance_query_body = { "query":{ "match_all": {} }, "sort": {"aver_importance": {"order": "desc"}}, "size": 1 } top_importance = es.search(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body)['hits']['hits'][0]['sort'][0] activeness_query_body = { "query":{ "match_all": {} }, "sort": {"aver_activeness": {"order": "desc"}}, "size": 1 } top_activeness = es.search(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body)['hits']['hits'][0]['sort'][0] influence_query_body = { "query": { "filtered":{ "filter": { "range": { "aver_influence": { "gt": aver_influence } } } } } } activeness_query_body = { "query": { "filtered":{ "filter": { "range": { "aver_activeness": { "gt": aver_activeness } } } } } } importance_query_body = { "query": { "filtered":{ "filter": { "range": { "aver_importance": { "gt": aver_importance } } } } } } influence_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body)['count'] activeness_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body)['count'] importance_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body)['count'] result = [int(aver_activeness*100.0/top_activeness), activeness_count, int(aver_influence*100.0/top_influence), influence_count, int(aver_importance*100.0/top_importance), importance_count, total_number] return result
def get_origin_weibo_detail(ts, user, task_name, size, order, message_type=1): _id = user + '-' + task_name task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] print '37', index_sensing_task, _id mid_value = json.loads(task_detail['mid_topic_value']) duplicate_dict = json.loads(task_detail['duplicate_dict']) tmp_duplicate_dict = dict() for k, v in duplicate_dict.iteritems(): try: tmp_duplicate_dict[v].append(k) except: tmp_duplicate_dict[v] = [k, v] if message_type == 1: weibo_detail = json.loads(task_detail['origin_weibo_detail']) elif message_type == 2: weibo_detail = json.loads(task_detail['retweeted_weibo_detail']) else: weibo_detail = json.loads(task_detail['sensitive_weibo_detail']) weibo_detail_list = [] if weibo_detail: for iter_mid, item in weibo_detail.iteritems(): tmp = [] tmp.append(iter_mid) tmp.append(item[iter_mid]) tmp.append(item['retweeted']) tmp.append(item['comment']) weibo_detail_list.append(tmp) mid_list = weibo_detail.keys() print len(mid_list) results = [] query_body = { "query": { "filtered": { "filter": { "terms": { "mid": mid_list } } } }, "size": 1000, "sort": { "timestamp": { "order": "desc" } } } index_list = [] datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts - DAY) index_name = flow_text_index_name_pre + datetime print es_text exist_es = es_text.indices.exists(index_name) print exist_es if exist_es: index_list.append(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) if exist_es_1: index_list.append(index_name_1) if index_list and mid_list: search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] uid_list = [] text_dict = dict() # 文本信息 portrait_dict = dict() # 背景信息 sort_results = [] if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) text_dict[item['_id']] = item['_source'] # _id是mid if uid_list: portrait_result = es_profile.mget( index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list}, fields=['nick_name', 'photo_url'])["docs"] for item in portrait_result: if item['found']: portrait_dict[item['_id']] = { "nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0] } else: portrait_dict[item['_id']] = { "nick_name": item['_id'], "photo_url": "" } if order == "total": sorted_list = sorted(weibo_detail_list, key=lambda x: x[1], reverse=True)[:10] elif order == "retweeted": sorted_list = sorted(weibo_detail_list, key=lambda x: x[2], reverse=True)[:10] elif order == "comment": sorted_list = sorted(weibo_detail_list, key=lambda x: x[3], reverse=True)[:10] else: sorted_list = weibo_detail_list count_n = 0 results_dict = dict() mid_index_dict = dict() for item in sorted_list: # size mid = item[0] iter_text = text_dict.get(mid, {}) temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, keywords_string, message_type if iter_text: uid = iter_text['uid'] temp.append(uid) iter_portrait = portrait_dict.get(uid, {}) if iter_portrait: temp.append(iter_portrait['nick_name']) temp.append(iter_portrait['photo_url']) else: temp.extend([uid, '']) temp.append(iter_text["text"]) temp.append(iter_text["sentiment"]) temp.append(ts2date(iter_text['timestamp'])) temp.append(iter_text['geo']) if message_type == 1: temp.append(1) elif message_type == 2: temp.append(3) else: temp.append(iter_text['message_type']) #jln 提取关键词 f_key = get_weibo_single(iter_text['text']) temp.append( sorted(f_key.iteritems(), key=lambda x: x[1], reverse=True)) temp.append(item[2]) temp.append(item[3]) temp.append(iter_text.get('sensitive', 0)) temp.append(iter_text['timestamp']) temp.append(mid_value[mid]) temp.append(mid) results.append(temp) count_n += 1 results = sorted(results, key=operator.itemgetter(-4, -2, -6), reverse=True) # -4 -2 -3 sort_results = [] count = 0 for item in results: sort_results.append([item]) mid_index_dict[item[-1]] = count count += 1 if tmp_duplicate_dict: remove_list = [] value_list = tmp_duplicate_dict.values() # [[mid, mid], ] for item in value_list: tmp = [] for mid in item: if mid_index_dict.get(mid, 0): tmp.append(mid_index_dict[mid]) if len(tmp) > 1: tmp_min = min(tmp) else: continue tmp.remove(tmp_min) for iter_count in tmp: sort_results[tmp_min].extend(sort_results[iter_count]) remove_list.append(sort_results[iter_count]) if remove_list: for item in remove_list: sort_results.remove(item) return sort_results
def get_retweet_weibo_detail(ts, user, task_name, size, text_type, type_value): _id = user + '-' + task_name task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] origin_weibo_detail = json.loads(task_detail['origin_weibo_detail']) retweeted_weibo_detail = json.loads(task_detail['retweeted_weibo_detail']) mid_list = [] mid_list.extend(origin_weibo_detail.keys()) mid_list.extend(retweeted_weibo_detail.keys()) query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"range":{ "timestamp":{ "gte": ts - time_interval, "lt": ts } }}, {"terms": {"root_mid": mid_list}} ] } } } }, "sort": {"timestamp": {"order": "desc"}}, "size": 100 } if text_type == "message_type": query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}}) if text_type == "sentiment": #if isinstance(type_value, str): if len(type_value) == 1: query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}}) else: query_body['query']['filtered']['filter']['bool']['must'].append({"terms":{text_type: type_value}}) datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts-time_interval) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) # 1. 查询微博 if datetime == datetime_1 and exist_es: search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] elif datetime != datetime_1 and exist_es_1: search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] #print search_results # 2. 获取微博相关信息 results = [] uid_list = [] if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) if uid_list: portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"] for i in range(len(uid_list)): item = search_results[i]['_source'] temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type temp.append(item['uid']) if portrait_result[i]['found']: temp.append(portrait_result[i]["fields"]["nick_name"][0]) temp.append(portrait_result[i]["fields"]["photo_url"][0]) else: temp.append(item['uid']) temp.append("") temp.append(item["text"]) #print item['text'] temp.append(item["sentiment"]) temp.append(ts2date(item['timestamp'])) temp.append(item['geo']) temp.append(item["message_type"]) results.append(temp) return results
def get_task_detail_2(task_name, ts, user): results = dict() index_name = task_name _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"] task_name = task_detail['task_name'] social_sensors = json.loads(task_detail['social_sensors']) history_status = json.loads(task_detail['history_status']) start_time = task_detail['create_at'] create_by = task_detail['create_by'] stop_time = task_detail['stop_time'] remark = task_detail.get('remark', '') portrait_detail = [] count = 0 # 计数 top_influence = get_top_influence("influence") top_activeness = get_top_influence("activeness") top_importance = get_top_influence("importance") if social_sensors: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":social_sensors}, fields=SOCIAL_SENSOR_INFO)['docs'] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["fields"][iter_item][0].split('&')) elif iter_item == "activeness": temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100) elif iter_item == "importance": temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100) elif iter_item == "influence": temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True) time_series = [] # 时间 #positive_sentiment_list = [] # 情绪列表 #neutral_sentiment_list = [] #negetive_sentiment_list = [] all_weibo_list = [] origin_weibo_list = [] # 微博列表 retweeted_weibo_list = [] #retweeted_weibo_count = [] # 别人转发他的数量 #comment_weibo_count = [] #total_number_count = [] #burst_time_list = [] # 爆发时间列表 important_user_set = set() # 重要人物列表 out_portrait_users = set() # 未入库 ts = int(ts) time_series = history_status #for item in history_status: # if int(item[0]) <= ts: # time_series.append(item[0]) # 到目前为止的所有的时间戳 # get detail task information from es if time_series: flow_detail = es.mget(index=index_sensing_task, doc_type=_id, body={"ids": time_series})['docs'] else: flow_detail = {} if flow_detail: for item in flow_detail: item = item['_source'] timestamp = item['timestamp'] #sentiment_distribution = json.loads(item["sentiment_distribution"]) #positive_sentiment_list.append(int(sentiment_distribution['1'])) #negetive_sentiment_list.append(int(sentiment_distribution['2'])+int(sentiment_distribution['3']) \ # +int(sentiment_distribution['4'])+int(sentiment_distribution['5'])+int(sentiment_distribution['6'])) #neutral_sentiment_list.append(int(sentiment_distribution['0'])) origin_weibo_list.append(item["origin_weibo_number"]) # real retweeted_weibo_list.append(item['retweeted_weibo_number']) # real all_weibo_list.append(item["origin_weibo_number"]+item['retweeted_weibo_number']) #retweeted_weibo_count.append(item['retweeted_weibo_count']) #comment_weibo_count.append(item['comment_weibo_count']) #total_number_count.append(item['weibo_total_number']) temp_important_user_list = json.loads(item['important_users']) unfiltered_users = json.loads(item['unfilter_users']) temp_out_portrait_users = set(unfiltered_users) - set(temp_important_user_list) # 未入库 important_user_set = important_user_set | set(temp_important_user_list) out_portrait_users = out_portrait_users | set(temp_out_portrait_users) #burst_reason = item.get("burst_reason", "") #if burst_reason: # burst_time_list.append([timestamp, count, burst_reason]) count += 1 #################################################################################### # 统计爆发原因,下相应的结论 """ weibo_variation_count = 0 weibo_variation_time = [] sentiment_variation_count = 0 sentiment_variation_time = [] sensitive_variation_count = 0 # sensitive sensitive_variation_time = [] # sensitive common_variation_count = 0 common_variation_time = [] if burst_time_list: for item in burst_time_list: tmp_common = 0 x1 = 0 x2 = 0 x3 = 0 if signal_count_varition in item[2]: weibo_variation_count += 1 weibo_variation_time.append([ts2date_min(item[0]), total_number_count[item[1]]]) x1 = total_number_count[item[1]] tmp_common += 1 if signal_sentiment_varition in item[2]: tmp_common += 1 sentiment_variation_count += 1 x2 = negetive_sentiment_list[item[1]] sentiment_variation_time.append([ts2date_min(item[0]), negetive_sentiment_list[item[1]]]) if signal_sensitive_variation in item[2]: tmp_common += 1 sensitive_variation_count += 1 x3 = sensitive_total_number_list[item[1]] sensitive_variation_time.append([ts2date_min(item[0]), all_weibo_list[item[1]]]) if tmp_common >= 2: common_variation_count += 1 common_variation_time.append([ts2date_min(item[0]), x1, x2, x3]) warning_conclusion = remark variation_distribution = [] if weibo_variation_count: variation_distribution.append(weibo_variation_time) else: variation_distribution.append([]) if sentiment_variation_count: variation_distribution.append(sentiment_variation_time) else: variation_distribution.append([]) if sensitive_variation_count: variation_distribution.append(sensitive_variation_time) else: variation_distribution.append([]) if common_variation_count: variation_distribution.append(common_variation_time) else: variation_distribution.append([]) results['warning_conclusion'] = warning_conclusion results['variation_distribution'] = variation_distribution # 每个用户的热度 """ # 获取重要用户的个人信息 important_uid_list = list(important_user_set) out_portrait_users_list = list(out_portrait_users) social_sensor_set = set(social_sensors) user_detail_info = [] # out_user_detail_info = [] if important_uid_list: user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":important_uid_list}, fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness'])['docs'] for item in user_results: if item['found']: temp = [] #if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD: # continue temp.append(item['fields']['uid'][0]) uname = item['fields']['uname'][0] if not uname or uname == "未知": uname = item['fields']['uid'][0] temp.append(uname) temp.append(item['fields']['photo_url'][0]) temp.append(item['fields']['domain'][0]) temp.append(item['fields']['topic_string'][0].split('&')) #hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time) #temp.append(hot_count) temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100) temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100) temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100) if item['fields']['uid'][0] in social_sensor_set: temp.append(1) else: temp.append(0) user_detail_info.append(temp) # 排序 if user_detail_info: user_detail_info = sorted(user_detail_info, key=lambda x:x[6], reverse=True) else: user_detail_info = [] if out_portrait_users_list: profile_results = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":out_portrait_users_list})["docs"] bci_index = "bci_" + ts2datetime(ts-DAY).replace('-','') influence_results = es.mget(index=bci_index, doc_type="bci", body={"ids":out_portrait_users_list}, fields=["user_index"])['docs'] bci_results = es_profile.mget(index="bci_history", doc_type="bci", body={"ids":out_portrait_users_list}, fields=['user_fansnum'])['docs'] top_influence = get_top_all_influence("user_index", ts) count = 0 if profile_results: for item in profile_results: temp = [] if item['found']: temp.append(item['_source']['uid']) if item['_source']['nick_name']: temp.append(item['_source']['nick_name']) else: temp.append(item['_source']['uid']) temp.append(item['_source']['user_location']) #temp.append(item['_source']['fansnum']) else: temp.append(item['_id']) temp.append(item['_id']) temp.extend(['']) try: user_fansnum = bci_results[count]["fields"]["user_fansnum"][0] except: user_fansnum = 0 temp.append(user_fansnum) temp_influ = influence_results[count] if temp_influ.get('found', 0): user_index = temp_influ['fields']['user_index'][0] temp.append(math.log(user_index/float(top_influence)*9+1, 10)*100) else: temp.append(0) count += 1 out_user_detail_info.append(temp) print len(out_user_detail_info) if len(out_user_detail_info): print "sort" out_user_detail_info = sorted(out_user_detail_info, key=lambda x:x[4], reverse=True) revise_time_series = [] for item in time_series: revise_time_series.append(ts2date_min(item)) results['important_user_detail'] = user_detail_info results['out_portrait_user_detail'] = out_user_detail_info #results['burst_time'] = burst_time_list # 爆发时间点,以及爆发原因 results['time_series'] = revise_time_series #results['positive_sentiment_list'] = positive_sentiment_list #esults['negetive_sentiment_list'] = negetive_sentiment_list #results['neutral_sentiment_list'] = neutral_sentiment_list results['all_weibo_list'] = all_weibo_list results['origin_weibo_list'] = origin_weibo_list results['retweeted_weibo_list'] = retweeted_weibo_list #results['comment_weibo_count'] = comment_weibo_count #results['retweeted_weibo_count'] = retweeted_weibo_count #results['total_number_list'] = total_number_count results['social_sensors_detail'] = portrait_detail return results
def get_sensitive_text_detail(task_name, ts, user, order): _id = user + '-' + task_name task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] weibo_detail = json.loads(task_detail['sensitive_weibo_detail']) weibo_detail_list = [] if weibo_detail: for iter_mid, item in weibo_detail.iteritems(): tmp = [] tmp.append(iter_mid) tmp.append(item[iter_mid]) tmp.append(item['retweeted']) tmp.append(item['comment']) weibo_detail_list.append(tmp) mid_list = weibo_detail.keys() results = [] query_body = { "query":{ "filtered":{ "filter":{ "terms":{"mid": mid_list} } } } } index_list = [] datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts-DAY) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) if exist_es: index_list.append(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) if exist_es_1: index_list.append(index_name_1) if index_list and mid_list: search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] uid_list = [] text_dict = dict() # 文本信息 portrait_dict = dict() # 背景信息 if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) text_dict[item['_id']] = item['_source'] # _id是mid if uid_list: portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"] for item in portrait_result: if item['found']: portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]} else: portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""} if order == "total": sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True) elif order == "retweeted": sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True) elif order == "comment": sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True) else: sorted_list = weibo_detail_list count_n = 0 for item in sorted_list: mid = item[0] iter_text = text_dict.get(mid, {}) temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type if iter_text: uid = iter_text['uid'] temp.append(uid) iter_portrait = portrait_dict.get(uid, {}) if iter_portrait: temp.append(iter_portrait['nick_name']) temp.append(iter_portrait['photo_url']) else: temp.extend([uid,'']) temp.append(iter_text["text"]) temp.append(iter_text["sentiment"]) temp.append(ts2date(iter_text['timestamp'])) temp.append(iter_text['geo']) temp.append(iter_text['message_type']) temp.append(item[2]) temp.append(item[3]) temp.append(iter_text.get('sensitive', 0)) count_n += 1 results.append(temp) if results and order == "ts": results = sorted(results, key=lambda x:x[5], reverse=True) if results and order == "sensitive": results = sorted(results, key=lambda x:x[-1], reverse=True) return results
def get_sensitive_text_detail(task_name, ts, user, order): _id = user + '-' + task_name task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] weibo_detail = json.loads(task_detail['sensitive_weibo_detail']) weibo_detail_list = [] if weibo_detail: for iter_mid, item in weibo_detail.iteritems(): tmp = [] tmp.append(iter_mid) tmp.append(item[iter_mid]) tmp.append(item['retweeted']) tmp.append(item['comment']) weibo_detail_list.append(tmp) mid_list = weibo_detail.keys() results = [] query_body = { "query": { "filtered": { "filter": { "terms": { "mid": mid_list } } } } } index_list = [] datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts - DAY) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) if exist_es: index_list.append(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) if exist_es_1: index_list.append(index_name_1) if index_list and mid_list: search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] uid_list = [] text_dict = dict() # 文本信息 portrait_dict = dict() # 背景信息 if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) text_dict[item['_id']] = item['_source'] # _id是mid if uid_list: portrait_result = es_profile.mget( index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list}, fields=['nick_name', 'photo_url'])["docs"] for item in portrait_result: if item['found']: portrait_dict[item['_id']] = { "nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0] } else: portrait_dict[item['_id']] = { "nick_name": item['_id'], "photo_url": "" } if order == "total": sorted_list = sorted(weibo_detail_list, key=lambda x: x[1], reverse=True) elif order == "retweeted": sorted_list = sorted(weibo_detail_list, key=lambda x: x[2], reverse=True) elif order == "comment": sorted_list = sorted(weibo_detail_list, key=lambda x: x[3], reverse=True) else: sorted_list = weibo_detail_list count_n = 0 for item in sorted_list: mid = item[0] iter_text = text_dict.get(mid, {}) temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type if iter_text: uid = iter_text['uid'] temp.append(uid) iter_portrait = portrait_dict.get(uid, {}) if iter_portrait: temp.append(iter_portrait['nick_name']) temp.append(iter_portrait['photo_url']) else: temp.extend([uid, '']) temp.append(iter_text["text"]) temp.append(iter_text["sentiment"]) temp.append(ts2date(iter_text['timestamp'])) temp.append(iter_text['geo']) temp.append(iter_text['message_type']) temp.append(item[2]) temp.append(item[3]) temp.append(iter_text.get('sensitive', 0)) count_n += 1 results.append(temp) if results and order == "ts": results = sorted(results, key=lambda x: x[5], reverse=True) if results and order == "sensitive": results = sorted(results, key=lambda x: x[-1], reverse=True) return results
def get_sort(uid, fe): result = {} try: u_bci = es.get(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE, id=uid, fields=['bci_week_ave'])['fields']['bci_week_ave'][0] #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0] result['in_score'] = u_bci except: result['in_score'] = "" query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'range': { 'bci_week_ave': { 'gte': u_bci } } }, { 'term': { 'topic_string': fe } }] } } } } } result['in_top'] = es.search(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE, body=query_body)['hits']['total'] # print 'essearch' # print es.search(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE,body=query_body) try: u_bci = es.get(index='bci_history', doc_type='bci', id=uid, fields=['bci_week_ave'])['fields']['bci_week_ave'][0] #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0] # print "trymax" bci_max = get_max_value(es_user_profile, "bci_history", "bci") # print "max",bci_max result['all_score'] = math.log(u_bci / float(bci_max) * 9 + 1, 10) * 100 except: result['all_score'] = "" result['all_top'] = "" query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'range': { 'bci_week_ave': { 'gte': u_bci } } }] } } } } } result['all_top'] = es.search(index='bci_history', doc_type='bci', body=query_body)['hits']['total'] #result = es.search(index='user_portrait_1222', doc_type='user',body=query_body) # return json.dumps([result['hits']['total'],u_bci]) return json.dumps(result)
def new_get_user_portrait(uid, admin_user): results = {} print 'jln ',es_user_portrait,portrait_index_name try: user_portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\ id=uid)['_source'] except: user_portrait_result = {} if not user_portrait_result: results['tag_remark'] = {} results['attention_information'] = {} results['tendency'] = {} results['group_tag'] = [] else: print 'step1' #step1: get attention_information #sensitive words try: sensitive_words_dict = json.loads(user_portrait_result['sensitive_dict']) except: sensitive_words_dict = {} sort_sensitive_words = sorted(sensitive_words_dict.items(), key=lambda x:x[1], reverse=True) results['attention_information'] = {'sensitive_dict': sort_sensitive_words} #keywords try: #sort_keywords = json.loads(user_portrait_result['keywords']) keywords_list = json.loads(user_portrait_result['keywords']) except: #sort_keywords = [] keywords_list = {} keywords_dict = dict() for item in keywords_list: keywords_dict[item[0]] = item[1] filter_word_dict = keyword_filter(keywords_dict) sort_keywords = sorted(filter_word_dict.items(), key=lambda x:x[1], reverse=True) results['attention_information']['keywords'] = sort_keywords #hashtag try: hashtag_dict = json.loads(user_portrait_result['hashtag_dict']) except: hashtag_dict = {} sort_hashtag = sorted(hashtag_dict.items(), key=lambda x:x[1], reverse=True) results['attention_information']['hashtag'] = sort_hashtag #step2: get tendency_information results['tendency'] = {'domain':user_portrait_result['domain']} results['tendency']['topic'] = user_portrait_result['topic_string'].split('&')[0] #add school information results['tendency']['is_school'] = user_portrait_result['is_school'] results['tendency']['school'] = user_portrait_result['school_string'] results['tendency']['character_sentiment'] = user_portrait_result['character_sentiment'] results['tendency']['character_text'] = user_portrait_result['character_text'] #step3: get tag_information #tag try: admin_tag = user_portrait_result[admin_user + '-tag'] except: admin_tag = {} if not admin_tag: results['tag_remark'] = {'tag': []} else: tag_list = admin_tag.split('&') results['tag_remark'] = {'tag': tag_list} #remark try: remark = user_portrait_result['remark'] except: remark = '' results['tag_remark']['remark'] = remark #step4: get group_tag information results['group_tag'] = [] try: group_tag = user_portrait_result['group'] except: group_tag = '' if group_tag: group_tag_list = group_tag.split('&') for group_tag in group_tag_list: group_tag_item_list = group_tag.split('-') if group_tag_item_list[0] == admin_user: results['group_tag'].append(group_tag_item_list[1]) return results
def conclusion_on_influence(uid): # test index_name = copy_portrait_index_name index_type = copy_portrait_index_type total_number = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type)["count"] try: influ_result = es.get(index=index_name, doc_type=index_type, id=uid)['_source'] except: influ_result = {} result = [0, 0, 0, 0, 0, 0, total_number ] # aver_activeness, sorted, aver_influence, sorted return result aver_activeness = influ_result.get("aver_activeness", 0) aver_influence = influ_result.get("aver_influence", 0) aver_importance = influ_result.get('aver_importance', 0) influence_query_body = { "query": { "match_all": {} }, "sort": { "aver_influence": { "order": "desc" } }, "size": 1 } top_influence = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body)['hits']['hits'][0]['sort'][0] importance_query_body = { "query": { "match_all": {} }, "sort": { "aver_importance": { "order": "desc" } }, "size": 1 } top_importance = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body)['hits']['hits'][0]['sort'][0] activeness_query_body = { "query": { "match_all": {} }, "sort": { "aver_activeness": { "order": "desc" } }, "size": 1 } top_activeness = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body)['hits']['hits'][0]['sort'][0] influence_query_body = { "query": { "filtered": { "filter": { "range": { "aver_influence": { "gt": aver_influence } } } } } } activeness_query_body = { "query": { "filtered": { "filter": { "range": { "aver_activeness": { "gt": aver_activeness } } } } } } importance_query_body = { "query": { "filtered": { "filter": { "range": { "aver_importance": { "gt": aver_importance } } } } } } influence_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body)['count'] activeness_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body)['count'] importance_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body)['count'] result = [ int(aver_activeness * 100.0 / top_activeness), activeness_count, int(aver_influence * 100.0 / top_influence), influence_count, int(aver_importance * 100.0 / top_importance), importance_count, total_number ] return result