def get_group_list(task_name, submit_user): results = [] query_body = { "query": {"bool": {"must": [{"term": {"task_name": task_name}}, {"term": {"submit_user": submit_user}}]}} } es_results = es_group_result.search(index=group_index_name, doc_type=group_index_type, body=query_body)["hits"][ "hits" ][0]["_source"] uid_list = es_results["uid_list"] user_portrait_attribute = es_user_portrait.mget( index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list} )["docs"] evaluate_max = get_evaluate_max() for item in user_portrait_attribute: uid = item["_id"] try: source = item["_source"] uname = source["uname"] gender = source["gender"] location = source["location"] importance = source["importance"] normal_importance = math.log(importance / evaluate_max["importance"] * 9 + 1, 10) * 100 influence = source["influence"] normal_influence = math.log(influence / evaluate_max["influence"] * 9 + 1, 10) * 100 results.append([uid, uname, gender, location, normal_importance, normal_influence]) except: results.append([uid, "", "", "", "", ""]) return results
def ajax_get_group_detail(): task_name = request.args.get('task_name','') # task_name user = request.args.get('user', '') _id = user + '-' + task_name portrait_detail = [] top_activeness = get_top_influence("activeness") top_influence = get_top_influence("influence") top_importance = get_top_influence("importance") search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {}) if search_result: try: uid_list = json.loads(search_result['uid_list']) except: uid_list = search_result['uid_list'] if uid_list: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, fields=SOCIAL_SENSOR_INFO)['docs'] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["fields"][iter_item][0].split('&')) temp.append(item["fields"][iter_item][0].split('&')) elif iter_item == "activeness": temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100) elif iter_item == "importance": temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100) elif iter_item == "influence": temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return json.dumps(portrait_detail)
def get_activity_weibo(task_name, submit_user, start_ts, time_segment=FOUR_HOUR): results = [] #step1: get task_name uid task_id = submit_user + task_name try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\ id=task_id, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step2: get uid2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body = {'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step3: search time_segment weibo end_ts = start_ts + time_segment time_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + time_date query = [] query.append({'terms': {'uid': uid_list}}) query.append({'range': {'timestamp': {'gte': start_ts, 'lt': end_ts}}}) try: flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \ body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits'] except: flow_text_es_result = [] for item in flow_text_es_result: weibo = {} source = item['_source'] weibo['timestamp'] = ts2date(source['timestamp']) weibo['ip'] = source['ip'] weibo['text'] = source['text'] if source['geo']: weibo['geo'] = '\t'.join(source['geo']) else: weibo['geo'] = '' results.append(weibo) return results
def ajax_get_group_detail(): task_name = request.args.get('task_name', '') # task_name user = request.args.get('user', '') _id = user + '-' + task_name portrait_detail = [] top_activeness = get_top_influence("activeness") top_influence = get_top_influence("influence") top_importance = get_top_influence("importance") search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {}) if search_result: try: uid_list = json.loads(search_result['uid_list']) except: uid_list = search_result['uid_list'] if uid_list: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list}, fields=SOCIAL_SENSOR_INFO)['docs'] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append( item["fields"][iter_item][0].split('&')) temp.append( item["fields"][iter_item][0].split('&')) elif iter_item == "activeness": temp.append( math.log( item['fields']['activeness'][0] / float(top_activeness) * 9 + 1, 10) * 100) elif iter_item == "importance": temp.append( math.log( item['fields']['importance'][0] / float(top_importance) * 9 + 1, 10) * 100) elif iter_item == "influence": temp.append( math.log( item['fields']['influence'][0] / float(top_influence) * 9 + 1, 10) * 100) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return json.dumps(portrait_detail)
def identify_user_out(input_uid_list): out_user_list = [] in_user_list = [] input_len = len(input_uid_list) iter_count = 0 print 'identify user out' #get user list who is out user_portrait while iter_count < input_len: iter_user_list = input_uid_list[iter_count: iter_count+DETECT_ITER_COUNT] try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':iter_user_list}, _source=False)['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found'] != True: out_user_list.append(uid) else: in_user_list.append(uid) iter_count += DETECT_ITER_COUNT print 'get out user portrait information' #get user profile information for out user_portrait iter_count = 0 out_user_count = len(out_user_list) out_user_result = [] while iter_count < out_user_count: iter_user_list = out_user_list[iter_count: iter_count+DETECT_ITER_COUNT] try: profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':iter_user_list}, _source=True)['docs'] except: profile_result = [] for item in profile_result: uid = item['_id'] if item['found']==True: source = item['_source'] uname = source['nick_name'] fansnum = source['fansnum'] statusnum = source['statusnum'] friendsnum = source['friendsnum'] else: uname = u'未知' fansnum = u'未知' statusnum = u'未知' friendsnum = u'未知' out_user_result.append([uid, uname, fansnum, statusnum, friendsnum]) iter_count += DETECT_ITER_COUNT sort_out_user_result = sorted(out_user_result, key=lambda x:x[2], reverse=True) return in_user_list, sort_out_user_result
def show_detect_result(task_id): user_result = [] #step1:identify the task name id exist try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result == {}: return 'task name is not exist' #step2:get uid list uid_list = json.loads(task_exist_result['uid_list']) #step3:get user evaluation information---uid/uname/activeness/importance/influence iter_count = 0 uid_count = len(uid_list) while iter_count < uid_count: iter_user_list = uid_list[iter_count:iter_count + DETECT_ITER_COUNT] try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':iter_user_list}, _source=True)['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found'] == True: source = item['_source'] uname = source['uname'] evaluate_max = get_evaluate_max() activeness = math.log( source['activeness'] / evaluate_max['activeness'] * 9 + 1, 10) * 100 importance = math.log( source['importance'] / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = math.log( source['influence'] / evaluate_max['influence'] * 9 + 1, 10) * 100 else: uname = u'未知' activeness = u'未知' importance = u'未知' influence = u'未知' user_result.append([uid, uname, activeness, importance, influence]) iter_count += DETECT_ITER_COUNT sort_user_result = sorted(user_result, key=lambda x: x[4], reverse=True) return sort_user_result
def get_group_list(task_name, submit_user): results = [] query_body = { "query": { "bool": { "must": [{ "term": { "task_name": task_name } }, { "term": { "submit_user": submit_user } }] } } } es_results = es_group_result.search(index=group_index_name, doc_type=group_index_type, \ body=query_body)["hits"]["hits"][0]["_source"] uid_list = es_results['uid_list'] user_portrait_attribute = es_user_portrait.mget( index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] evaluate_max = get_evaluate_max() for item in user_portrait_attribute: uid = item['_id'] try: source = item['_source'] uname = source['uname'] gender = source['gender'] location = source['location'] importance = source['importance'] normal_importance = math.log( importance / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = source['influence'] normal_influence = math.log( influence / evaluate_max['influence'] * 9 + 1, 10) * 100 results.append([ uid, uname, gender, location, normal_importance, normal_influence ]) except: results.append([uid, '', '', '', '', '']) return results
def ajax_get_task_detail_info(): task_name = request.args.get('task_name','') # task_name user = request.args.get('user', 'admin') _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] task_detail["social_sensors"] = json.loads(task_detail["social_sensors"]) #task_detail['keywords'] = json.loads(task_detail['keywords']) #task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"]) history_status = json.loads(task_detail['history_status']) if history_status: temp_list = [] """ temp_list.append(history_status[-1]) print history_status for item in history_status[:-1]: temp_list.append(item) """ sorted_list = sorted(history_status, key=lambda x:x, reverse=True) task_detail['history_status'] = sorted_list else: task_detail['history_status'] = [] task_detail['social_sensors_portrait'] = [] portrait_detail = [] if task_detail["social_sensors"]: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs'] if search_results: for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["_source"][iter_item].split('&')) else: temp.append(item["_source"][iter_item]) portrait_detail.append(temp) if portrait_detail: portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True) task_detail['social_sensors_portrait'] = portrait_detail return json.dumps(task_detail)
def show_detect_result(task_id): user_result = [] #step1:identify the task name id exist try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result == {}: return 'task name is not exist' #step2:get uid list uid_list = json.loads(task_exist_result['uid_list']) #step3:get user evaluation information---uid/uname/activeness/importance/influence iter_count = 0 uid_count = len(uid_list) while iter_count < uid_count: iter_user_list = uid_list[iter_count: iter_count+DETECT_ITER_COUNT] try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':iter_user_list}, _source=True)['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found']==True: source = item['_source'] uname = source['uname'] evaluate_max = get_evaluate_max() activeness = math.log(source['activeness']/evaluate_max['activeness'] * 9 + 1 ,10)*100 importance = math.log(source['importance']/evaluate_max['importance'] * 9 + 1 ,10)*100 influence = math.log(source['influence']/evaluate_max['influence'] * 9 + 1 ,10)*100 else: uname = u'未知' activeness = u'未知' importance = u'未知' influence = u'未知' user_result.append([uid, uname, activeness, importance, influence]) iter_count += DETECT_ITER_COUNT sort_user_result = sorted(user_result, key=lambda x:x[4], reverse=True) return sort_user_result
def get_group_member_name(task_name, submit_user): results = {} task_id = submit_user + task_name try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)["_source"] except: return results uid_list = group_result["uid_list"] try: user_portrait_result = es_user_portrait.mget( index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list} )["docs"] except: return results for item in user_portrait_result: uid = item["_id"] if item["found"] == True: source = item["_source"] uname = source["uname"] else: uname = "unkown" results[uid] = uname return results
def get_group_member_name(task_name, submit_user): results = {} task_id = submit_user + task_name try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return results uid_list = group_result['uid_list'] try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\ body={'ids':uid_list})['docs'] except: return results for item in user_portrait_result: uid = item['_id'] if item['found'] == True: source = item['_source'] uname = source['uname'] else: uname = 'unkown' results[uid] = uname return results
def get_temporal_rank(task_type, sort="retweeted", number=100): number = int(number) - 1 if int(task_type) == 0: # 到目前位置 sort_list = r.zrange("influence_%s" %sort, 0, number, withscores=True, desc=True) elif int(task_type) == 1: sort_list = r.zrange("influence_%s_1" %sort, 0, number, withscores=True, desc=True) elif int(task_type) == 2: sort_list = r.zrange("influence_%s_2" %sort, 0, number, withscores=True, desc=True) elif int(task_type) == 3: sort_list = r.zrange("influence_%s_3" %sort, 0, number, withscores=True, desc=True) else: sort_list = r.zrange("influence_%s_4" %sort, 0, number, withscores=True, desc=True) uid_list = [] for item in sort_list: uid_list.append(item[0]) if sort == "retweeted": other = "comment" else: other = "retweeted" results = [] # 查看背景信息 if uid_list: profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"] bci_result = es_user_profile.mget(index="bci_history", doc_type="bci", body={"ids":uid_list}, fields=['user_fansnum',"weibo_month_sum" ])["docs"] count = 0 for item in profile_result: _id = item['_id'] index = profile_result.index(item) tmp = [] tmp.append(item['_id']) if item['found']: item = item['_source'] tmp.append(item['nick_name']) tmp.append(item['statusnum']) tmp.append(item['user_location']) tmp.append(item['fansnum']) else: tmp.extend(['',0,'',0]) try: user_fansnum = bci_result[count]['fileds']['user_fansnum'] tmp[4] = user_fansnum except: pass try: weibo_number = bci_result[count]['fileds']["weibo_month_sum"] tmp[2] = weibo_number except: pass count_1 = int(sort_list[index][1]) if int(task_type) == 0: tmp_count = r.zscore("influence_%s" %other, _id) if tmp_count: count_2 = int(tmp_count) else: count_2 = 0 else: tmp_count = r.zscore("influence_%s_%s" %(other,task_type), _id) if tmp_count: count_2 = int(tmp_count) else: count_2 = 0 if sort == "retweeted": tmp.append(count_1) tmp.append(count_2) else: tmp.append(count_2) tmp.append(count_1) results.append(tmp) count += 1 if uid_list: count = 0 portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"] for item in portrait_result: if item['found']: results[count].append("1") else: results[count].append("0") count += 1 return results
def show_important_users(task_name): return_results = dict() # 返回字典 top_influence = get_top_influence() task_detail = es.get(index=index_manage_social_task, doc_type=task_doc_type, id=task_name)["_source"] portrait_detail = [] important_user_set = set() # 重要人物列表 history_status = json.loads(task_detail['history_status']) start_time = int(task_detail['create_at']) stop_time = int(task_detail['stop_time']) time_series = [] keywords_list = json.loads(task_detail['keywords']) return_results['keywords'] = keywords_list return_results['remark'] = task_detail['remark'] social_sensors = json.loads(task_detail['social_sensors']) for item in history_status: time_series.append(item[0]) # return social sensors details if social_sensors: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":social_sensors},fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness'])["docs"] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["fields"][iter_item][0].split('&')) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return_results['social_sensors_detail'] = portrait_detail if time_series: flow_detail = es.mget(index=index_sensing_task, doc_type=task_name, body={"ids": time_series})['docs'] else: flow_detail = {} if flow_detail: for item in flow_detail: item = item['_source'] temp_user_list = json.loads(item['important_users']) important_user_set = important_user_set | set(temp_user_list) top_importance = get_top_influence('importance') top_activeness = get_top_influence('activeness') top_influence = get_top_influence('influence') important_uid_list = list(important_user_set) user_detail_info = [] # if important_uid_list: user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":important_uid_list},fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence','activeness'])['docs'] for item in user_results: if item['found']: temp = [] if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD: continue temp.append(item['fields']['uid'][0]) temp.append(item['fields']['uname'][0]) temp.append(item['fields']['photo_url'][0]) temp.append(item['fields']['domain'][0]) temp.append(item['fields']['topic_string'][0].split('&')) hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time, keywords_list) temp.append(hot_count) importance = math.log(item['fields']['importance'][0]/top_importance*9+1, 10)*100 temp.append("%.2f" %importance) temp.append(math.log(item['fields']['influence'][0]/top_influence*9+1, 10)*100) temp.append(math.log(item['fields']['activeness'][0]/top_activeness*9+1, 10)*100) user_detail_info.append(temp) return_results['group_list'] = user_detail_info return return_results
def search_group_sentiment_weibo(task_name, submit_user, start_ts, sentiment): weibo_list = [] # step1:get task_name uid task_id = submit_user + task_name try: group_result = es_group_result.get( index=group_index_name, doc_type=group_index_type, id=task_id, _source=False, fields=["uid_list"] ) except: group_result = {} if group_result == {}: return "task name invalid" try: uid_list = group_result["fields"]["uid_list"] except: uid_list = [] if uid_list == []: return "task uid list null" # step3: get ui2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget( index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list}, _source=False, fields=["uname"], )["docs"] except: user_portrait_result = [] for item in user_portrait_result: uid = item["_id"] if item["found"] == True: uname = item["fields"]["uname"][0] uid2uname[uid] = uname # step4:iter date to search weibo weibo_list = [] iter_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + str(iter_date) # step4: get query_body if sentiment != "2": query_body = [ {"terms": {"uid": uid_list}}, {"term": {"sentiment": sentiment}}, {"range": {"timestamp": {"gte": start_ts, "lt": start_ts + DAY}}}, ] else: query_body = [ {"terms": {"uid": uid_list}}, {"terms": {"sentiment": SENTIMENT_SECOND}}, {"range": {"timestamp": {"gte": start_ts, "lt": start_ts + DAY}}}, ] try: flow_text_result = es_flow_text.search( index=flow_text_index_name, doc_type=flow_text_index_type, body={ "query": {"bool": {"must": query_body}}, "sort": [{"timestamp": {"order": "asc"}}], "size": MAX_VALUE, }, )["hits"]["hits"] except: flow_text_result = [] for flow_text_item in flow_text_result: source = flow_text_item["_source"] weibo = {} weibo["uid"] = source["uid"] weibo["uname"] = uid2uname[weibo["uid"]] weibo["ip"] = source["ip"] try: weibo["geo"] = "\t".join(source["geo"].split("&")) except: weibo["geo"] = "" weibo["text"] = source["text"] weibo["timestamp"] = source["timestamp"] weibo["sentiment"] = source["sentiment"] weibo_list.append(weibo) return weibo_list
def get_social_inter_content(uid1, uid2, type_mark): weibo_list = [] # get two type relation about uid1 and uid2 # search weibo list now_ts = int(time.time()) # run_type if RUN_TYPE == 1: now_date_ts = datetime2ts(ts2datetime(now_ts)) else: now_date_ts = datetime2ts(RUN_TEST_TIME) # uid2uname uid2uname = {} try: portrait_result = es_user_portrait.mget( index=portrait_index_name, doc_type=portrait_index_type, body={"ids": [uid1, uid2]}, _source=False, fields=["uid", "uname"], )["docs"] except: portrait_result = [] for item in portrait_result: uid = item["_id"] if item["found"] == True: uname = item["fields"]["uname"][0] uid2uname[uid] = uname else: uid2uname[uid] = "unknown" # iter date to search weibo list for i in range(7, 0, -1): iter_date_ts = now_date_ts - i * DAY iter_date = ts2datetime(iter_date_ts) flow_text_index_name = flow_text_index_name_pre + str(iter_date) query = [] query.append({"bool": {"must": [{"term": {"uid": uid1}}, {"term": {"directed_uid": int(uid2)}}]}}) if type_mark == "out": query.append({"bool": {"must": [{"term": {"uid": uid2}}, {"term": {"directed_uid": int(uid1)}}]}}) try: flow_text_result = es_flow_text.search( index=flow_text_index_name, doc_type=flow_text_index_type, body={ "query": {"bool": {"should": query}}, "sort": [{"timestamp": {"order": "asc"}}], "size": MAX_VALUE, }, )["hits"]["hits"] except: flow_text_result = [] for flow_text in flow_text_result: source = flow_text["_source"] weibo = {} weibo["timestamp"] = source["timestamp"] weibo["ip"] = source["ip"] weibo["geo"] = source["geo"] weibo["text"] = "\t".join(source["text"].split("&")) weibo["uid"] = source["uid"] weibo["uname"] = uid2uname[weibo["uid"]] weibo["directed_uid"] = str(source["directed_uid"]) weibo["directed_uname"] = uid2uname[str(source["directed_uid"])] weibo_list.append(weibo) return weibo_list
def get_activity_weibo(task_name, submit_user, start_ts, time_segment=FOUR_HOUR): results = [] # step1: get task_name uid task_id = submit_user + task_name try: group_result = es_group_result.get( index=group_index_name, doc_type=group_index_type, id=task_id, _source=False, fields=["uid_list"] ) except: group_result = {} if group_result == {}: return "task name invalid" try: uid_list = group_result["fields"]["uid_list"] except: uid_list = [] if uid_list == []: return "task uid list null" # step2: get uid2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget( index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list}, _source=False, fields=["uname"], )["docs"] except: user_portrait_result = [] for item in user_portrait_result: uid = item["_id"] if item["found"] == True: uname = item["fields"]["uname"][0] uid2uname[uid] = uname # step3: search time_segment weibo end_ts = start_ts + time_segment time_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + time_date query = [] query.append({"terms": {"uid": uid_list}}) query.append({"range": {"timestamp": {"gte": start_ts, "lt": end_ts}}}) try: flow_text_es_result = es_flow_text.search( index=flow_text_index_name, doc_type=flow_text_index_type, body={"query": {"bool": {"must": query}}, "sort": "timestamp", "size": MAX_VALUE}, )["hits"]["hits"] except: flow_text_es_result = [] for item in flow_text_es_result: weibo = {} source = item["_source"] weibo["timestamp"] = ts2date(source["timestamp"]) weibo["ip"] = source["ip"] weibo["text"] = source["text"] if source["geo"]: weibo["geo"] = "\t".join(source["geo"]) else: weibo["geo"] = "" results.append(weibo) return results
def get_social_inter_content(uid1, uid2, type_mark): weibo_list = [] #get two type relation about uid1 and uid2 #search weibo list now_ts = int(time.time()) #run_type if RUN_TYPE == 1: now_date_ts = datetime2ts(ts2datetime(now_ts)) else: now_date_ts = datetime2ts(RUN_TEST_TIME) #uid2uname uid2uname = {} try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\ body={'ids': [uid1, uid2]}, _source=False, fields=['uid', 'uname'])['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found'] == True: uname = item['fields']['uname'][0] uid2uname[uid] = uname else: uid2uname[uid] = 'unknown' #iter date to search weibo list for i in range(7, 0, -1): iter_date_ts = now_date_ts - i * DAY iter_date = ts2datetime(iter_date_ts) flow_text_index_name = flow_text_index_name_pre + str(iter_date) query = [] query.append({ 'bool': { 'must': [{ 'term': { 'uid': uid1 } }, { 'term': { 'directed_uid': int(uid2) } }] } }) if type_mark == 'out': query.append({ 'bool': { 'must': [{ 'term': { 'uid': uid2 } }, { 'term': { 'directed_uid': int(uid1) } }] } }) try: flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\ body={'query': {'bool':{'should': query}}, 'sort':[{'timestamp':{'order': 'asc'}}], 'size':MAX_VALUE})['hits']['hits'] except: flow_text_result = [] for flow_text in flow_text_result: source = flow_text['_source'] weibo = {} weibo['timestamp'] = source['timestamp'] weibo['ip'] = source['ip'] weibo['geo'] = source['geo'] weibo['text'] = '\t'.join(source['text'].split('&')) weibo['uid'] = source['uid'] weibo['uname'] = uid2uname[weibo['uid']] weibo['directed_uid'] = str(source['directed_uid']) weibo['directed_uname'] = uid2uname[str(source['directed_uid'])] weibo_list.append(weibo) return weibo_list
def show_important_users(task_name): return_results = dict() # 返回字典 top_influence = get_top_influence() task_detail = es.get(index=index_manage_social_task, doc_type=task_doc_type, id=task_name)["_source"] portrait_detail = [] important_user_set = set() # 重要人物列表 history_status = json.loads(task_detail['history_status']) start_time = int(task_detail['create_at']) stop_time = int(task_detail['stop_time']) time_series = [] keywords_list = json.loads(task_detail['keywords']) return_results['keywords'] = keywords_list return_results['remark'] = task_detail['remark'] social_sensors = json.loads(task_detail['social_sensors']) for item in history_status: time_series.append(item[0]) # return social sensors details if social_sensors: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": social_sensors}, fields=[ 'uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness' ])["docs"] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["fields"][iter_item][0].split('&')) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return_results['social_sensors_detail'] = portrait_detail if time_series: flow_detail = es.mget(index=index_sensing_task, doc_type=task_name, body={"ids": time_series})['docs'] else: flow_detail = {} if flow_detail: for item in flow_detail: item = item['_source'] temp_user_list = json.loads(item['important_users']) important_user_set = important_user_set | set(temp_user_list) top_importance = get_top_influence('importance') top_activeness = get_top_influence('activeness') top_influence = get_top_influence('influence') important_uid_list = list(important_user_set) user_detail_info = [] # if important_uid_list: user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": important_uid_list}, fields=[ 'uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness' ])['docs'] for item in user_results: if item['found']: temp = [] if int(item['fields']['importance'] [0]) < IMPORTANT_USER_THRESHOULD: continue temp.append(item['fields']['uid'][0]) temp.append(item['fields']['uname'][0]) temp.append(item['fields']['photo_url'][0]) temp.append(item['fields']['domain'][0]) temp.append(item['fields']['topic_string'][0].split('&')) hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time, keywords_list) temp.append(hot_count) importance = math.log( item['fields']['importance'][0] / top_importance * 9 + 1, 10) * 100 temp.append("%.2f" % importance) temp.append( math.log( item['fields']['influence'][0] / top_influence * 9 + 1, 10) * 100) temp.append( math.log( item['fields']['activeness'][0] / top_activeness * 9 + 1, 10) * 100) user_detail_info.append(temp) return_results['group_list'] = user_detail_info return return_results
def identify_user_out(input_uid_list): out_user_list = [] in_user_list = [] input_len = len(input_uid_list) iter_count = 0 print 'identify user out' #get user list who is out user_portrait while iter_count < input_len: iter_user_list = input_uid_list[iter_count:iter_count + DETECT_ITER_COUNT] try: portrait_result = es_user_portrait.mget( index=portrait_index_name, doc_type=portrait_index_type, body={'ids': iter_user_list}, _source=False)['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found'] != True: out_user_list.append(uid) else: in_user_list.append(uid) iter_count += DETECT_ITER_COUNT print 'get out user portrait information' #get user profile information for out user_portrait iter_count = 0 out_user_count = len(out_user_list) out_user_result = [] while iter_count < out_user_count: iter_user_list = out_user_list[iter_count:iter_count + DETECT_ITER_COUNT] try: profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids': iter_user_list}, _source=True)['docs'] except: profile_result = [] for item in profile_result: uid = item['_id'] if item['found'] == True: source = item['_source'] uname = source['nick_name'] fansnum = source['fansnum'] statusnum = source['statusnum'] friendsnum = source['friendsnum'] else: uname = u'未知' fansnum = u'未知' statusnum = u'未知' friendsnum = u'未知' out_user_result.append( [uid, uname, fansnum, statusnum, friendsnum]) iter_count += DETECT_ITER_COUNT sort_out_user_result = sorted(out_user_result, key=lambda x: x[2], reverse=True) return in_user_list, sort_out_user_result
def search_group_sentiment_weibo(task_name, submit_user, start_ts, sentiment): weibo_list = [] #step1:get task_name uid task_id = submit_user + task_name try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step3: get ui2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step4:iter date to search weibo weibo_list = [] iter_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + str(iter_date) #step4: get query_body if sentiment != '2': query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \ {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}] else: query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\ {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}] try: flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\ body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits'] except: flow_text_result = [] for flow_text_item in flow_text_result: source = flow_text_item['_source'] weibo = {} weibo['uid'] = source['uid'] weibo['uname'] = uid2uname[weibo['uid']] weibo['ip'] = source['ip'] try: weibo['geo'] = '\t'.join(source['geo'].split('&')) except: weibo['geo'] = '' weibo['text'] = source['text'] weibo['timestamp'] = source['timestamp'] weibo['sentiment'] = source['sentiment'] weibo_list.append(weibo) return weibo_list
def get_task_detail_2(task_name, ts, user): results = dict() index_name = task_name _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"] task_name = task_detail['task_name'] history_status = json.loads(task_detail['history_status']) start_time = task_detail['create_at'] create_by = task_detail['create_by'] stop_time = task_detail['stop_time'] remark = task_detail.get('remark', '') portrait_detail = [] count = 0 # 计数 top_influence = get_top_influence("influence") top_activeness = get_top_influence("activeness") top_importance = get_top_influence("importance") time_series = [] # 时间 #positive_sentiment_list = [] # 情绪列表 #neutral_sentiment_list = [] #negetive_sentiment_list = [] all_weibo_list = [] origin_weibo_list = [] # 微博列表 retweeted_weibo_list = [] #retweeted_weibo_count = [] # 别人转发他的数量 #comment_weibo_count = [] #total_number_count = [] #burst_time_list = [] # 爆发时间列表 important_user_set = set() # 重要人物列表 out_portrait_users = set() # 未入库 ts = int(ts) time_series = history_status #for item in history_status: # if int(item[0]) <= ts: # time_series.append(item[0]) # 到目前为止的所有的时间戳 # get detail task information from es if time_series: flow_detail = es.mget(index=index_sensing_task, doc_type=_id, body={"ids": time_series})['docs'] else: flow_detail = {} if flow_detail: for item in flow_detail: item = item['_source'] timestamp = item['timestamp'] #sentiment_distribution = json.loads(item["sentiment_distribution"]) #positive_sentiment_list.append(int(sentiment_distribution['1'])) #negetive_sentiment_list.append(int(sentiment_distribution['2'])+int(sentiment_distribution['3']) \ # +int(sentiment_distribution['4'])+int(sentiment_distribution['5'])+int(sentiment_distribution['6'])) #neutral_sentiment_list.append(int(sentiment_distribution['0'])) origin_weibo_list.append(item["origin_weibo_number"]) # real retweeted_weibo_list.append(item['retweeted_weibo_number']) # real all_weibo_list.append(item["origin_weibo_number"]+item['retweeted_weibo_number']) #retweeted_weibo_count.append(item['retweeted_weibo_count']) #comment_weibo_count.append(item['comment_weibo_count']) #total_number_count.append(item['weibo_total_number']) temp_important_user_list = json.loads(item['important_users']) unfiltered_users = json.loads(item['unfilter_users']) temp_out_portrait_users = set(unfiltered_users) - set(temp_important_user_list) # 未入库 important_user_set = important_user_set | set(temp_important_user_list) out_portrait_users = out_portrait_users | set(temp_out_portrait_users) #burst_reason = item.get("burst_reason", "") #if burst_reason: # burst_time_list.append([timestamp, count, burst_reason]) count += 1 #################################################################################### # 统计爆发原因,下相应的结论 """ weibo_variation_count = 0 weibo_variation_time = [] sentiment_variation_count = 0 sentiment_variation_time = [] sensitive_variation_count = 0 # sensitive sensitive_variation_time = [] # sensitive common_variation_count = 0 common_variation_time = [] if burst_time_list: for item in burst_time_list: tmp_common = 0 x1 = 0 x2 = 0 x3 = 0 if signal_count_varition in item[2]: weibo_variation_count += 1 weibo_variation_time.append([ts2date_min(item[0]), total_number_count[item[1]]]) x1 = total_number_count[item[1]] tmp_common += 1 if signal_sentiment_varition in item[2]: tmp_common += 1 sentiment_variation_count += 1 x2 = negetive_sentiment_list[item[1]] sentiment_variation_time.append([ts2date_min(item[0]), negetive_sentiment_list[item[1]]]) if signal_sensitive_variation in item[2]: tmp_common += 1 sensitive_variation_count += 1 x3 = sensitive_total_number_list[item[1]] sensitive_variation_time.append([ts2date_min(item[0]), all_weibo_list[item[1]]]) if tmp_common >= 2: common_variation_count += 1 common_variation_time.append([ts2date_min(item[0]), x1, x2, x3]) warning_conclusion = remark variation_distribution = [] if weibo_variation_count: variation_distribution.append(weibo_variation_time) else: variation_distribution.append([]) if sentiment_variation_count: variation_distribution.append(sentiment_variation_time) else: variation_distribution.append([]) if sensitive_variation_count: variation_distribution.append(sensitive_variation_time) else: variation_distribution.append([]) if common_variation_count: variation_distribution.append(common_variation_time) else: variation_distribution.append([]) results['warning_conclusion'] = warning_conclusion results['variation_distribution'] = variation_distribution # 每个用户的热度 """ # 获取重要用户的个人信息 important_uid_list = list(important_user_set) out_portrait_users_list = list(out_portrait_users) user_detail_info = [] # out_user_detail_info = [] if important_uid_list: user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":important_uid_list}, fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness'])['docs'] for item in user_results: if item['found']: temp = [] #if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD: # continue temp.append(item['fields']['uid'][0]) uname = item['fields']['uname'][0] if not uname or uname == "未知": uname = item['fields']['uid'][0] temp.append(uname) temp.append(item['fields']['photo_url'][0]) temp.append(item['fields']['domain'][0]) temp.append(item['fields']['topic_string'][0].split('&')) #hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time) #temp.append(hot_count) temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100) temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100) temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100) user_detail_info.append(temp) # 排序 if user_detail_info: user_detail_info = sorted(user_detail_info, key=lambda x:x[6], reverse=True) else: user_detail_info = [] if out_portrait_users_list: profile_results = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":out_portrait_users_list})["docs"] bci_index = "bci_" + ts2datetime(ts-DAY).replace('-','') influence_results = es.mget(index=bci_index, doc_type="bci", body={"ids":out_portrait_users_list}, fields=["user_index"])['docs'] bci_results = es_profile.mget(index="bci_history", doc_type="bci", body={"ids":out_portrait_users_list}, fields=['user_fansnum'])['docs'] top_influence = get_top_all_influence("user_index", ts) count = 0 if profile_results: for item in profile_results: temp = [] if item['found']: uid = item['_source']['uid'] temp = get_user_profile([uid], ['nick_name', 'user_location', 'statusnum', 'fansnum'])[0] else: temp = [item['_id'], item['_id'], '', '', ''] """ if item['found']: temp.append(item['_source']['uid']) if item['_source']['nick_name']: temp.append(item['_source']['nick_name']) else: temp.append(item['_source']['uid']) temp.append(item['_source']['user_location']) temp.append(item['_source']['statusnum']) temp.append(item['_source']['friendsnum']) else: temp.append(item['_id']) temp.append(item['_id']) temp.extend(['']) temp.append('--') temp.append('--') try: user_fansnum = bci_results[count]["fields"]["user_fansnum"][0] except: user_fansnum = 0 temp.append(user_fansnum) temp_influ = influence_results[count] if temp_influ.get('found', 0): user_index = temp_influ['fields']['user_index'][0] temp.append(math.log(user_index/float(top_influence)*9+1, 10)*100) else: temp.append(0) """ count += 1 out_user_detail_info.append(temp) revise_time_series = [] for item in time_series: revise_time_series.append(ts2date_min(item)) results['important_user_detail'] = user_detail_info results['out_portrait_user_detail'] = out_user_detail_info #results['burst_time'] = burst_time_list # 爆发时间点,以及爆发原因 results['time_series'] = revise_time_series #results['positive_sentiment_list'] = positive_sentiment_list #esults['negetive_sentiment_list'] = negetive_sentiment_list #results['neutral_sentiment_list'] = neutral_sentiment_list results['all_weibo_list'] = all_weibo_list results['origin_weibo_list'] = origin_weibo_list results['retweeted_weibo_list'] = retweeted_weibo_list #results['comment_weibo_count'] = comment_weibo_count #results['retweeted_weibo_count'] = retweeted_weibo_count #results['total_number_list'] = total_number_count return results
def full_text_search(keywords, uid, start_time, end_time, size): results = [] uid_list = [] user_profile_list = [] query_body = { "query": { "bool": { "must": [] } }, "size":size, "sort":{"timestamp":{"order": 'desc'}} } if RUN_TYPE: query_body["sort"] = {"user_fansnum":{"order": 'desc'}} if uid: query_body["query"]["bool"]["must"].append({"term":{"uid":uid}}) if keywords: keywords_list = keywords.split(',') for word in keywords_list: query_body["query"]["bool"]["must"].append({'wildcard':{'text':{'wildcard':'*'+word+'*'}}}) index_list = [] exist_bool = es_flow_text.indices.exists(index="flow_text_"+end_time) if start_time: start_ts = datetime2ts(start_time) end_ts = datetime2ts(end_time) ts = end_ts while 1: index_name = "flow_text_"+ts2datetime(ts) exist_bool = es_flow_text.indices.exists(index=index_name) if exist_bool: index_list.append(index_name) if ts == start_ts: break else: ts -= 3600*24 print index_list # 没有可行的es if not index_list: return [[], []] search_results = es_flow_text.search(index=index_list, doc_type="text", body=query_body)["hits"]["hits"] for item in search_results: uid_list.append(item['_source']['uid']) user_info = [] if uid_list: history_max = get_history_max() personal_field = ["nick_name", "fansnum", "statusnum","user_location"] user_info = get_user_profile(uid_list, personal_field) bci_results = ES_CLUSTER_FLOW1.mget(index="bci_history", doc_type="bci", body={"ids":uid_list}, _source=False, fields=["bci_day_last"])["docs"] in_portrait = es_user_portrait.mget(index="sensitive_user_portrait", doc_type="user", body={"ids":uid_list}, _source=False)["docs"] sensitive_results = es_sensitive_history.mget(index="sensitive_history", doc_type="sensitive", body={"ids":uid_list}, _source=False, fields=["last_value"])["docs"] print "len search: ", len(search_results) count = 0 # uid uname text date geo sensitive_words retweeted comment for item in search_results: item = item['_source'] uid_list.append(item['uid']) iter_item = [] iter_item.append(item['uid']) iter_item.append(user_info[count][1]) iter_item.append(item['text']) iter_item.append(ts2date(item['timestamp'])) iter_item.append(item['geo']) if item.get("sensitive_words_string", ''): iter_item.append(item['sensitive_words_string'].split('&')) else: iter_item.append([]) iter_item.append(item.get('retweeted', 0)) iter_item.append(item.get('comment', 0)) count += 1 results.append(iter_item) user_set = set() count = 0 # uid "nick_name", "fansnum", "statusnum","user_location", bci, sensitive for item in user_info: if item[0] in user_set: continue else: user_set.add(item[0]) if bci_results[count]["found"]: if bci_results[count].has_key("fields"): bci_value = bci_results[count]["fields"]["bci_day_last"][0] else: bci_value = 0 item.append(normalize_index(bci_value, history_max["max_bci"])) else: item.append(0) if sensitive_results[count]["found"]: if sensitive_results[count].has_key("fields"): sensitive_value = sensitive_results[count]['fields']['last_value'][0] else: sensitive_value = 0 item.append(normalize_index(sensitive_value, history_max["max_sensitive"])) else: item.append(0) if in_portrait[count]["found"]: item.append("1") else: item.append("0") user_profile_list.append(item) return results, user_profile_list