def statistics_influence_people(uid, date, style): # output: different retweeted and comment, uids' domain distribution, topic distribution, registeration geo distribution results = {} # retwweted weibo people and comment weibo people date1 = str(date).replace("-", "") index_name = pre_index + date1 index_flow_text = pre_text_index + date try: bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: bci_result = [] return results origin_retweeted_mid = [] # origin weibo mid retweeted_retweeted_mid = [] # retweeted weibo mid origin_comment_mid = [] retweeted_comment_mid = [] origin_retweeted = json.loads(bci_result["origin_weibo_retweeted_detail"]) retweeted_retweeted = json.loads(bci_result["retweeted_weibo_retweeted_detail"]) origin_comment = json.loads(bci_result["origin_weibo_comment_detail"]) retweeted_comment = json.loads(bci_result["retweeted_weibo_comment_detail"]) retweeted_total_number = sum(origin_retweeted.values()) + sum(retweeted_retweeted.values()) comment_total_number = sum(origin_comment.values()) + sum(retweeted_comment.values()) if origin_retweeted: origin_retweeted_mid = filter_mid(origin_retweeted) if retweeted_retweeted: retweeted_retweeted_mid = filter_mid(retweeted_retweeted) if origin_comment: origin_comment_mid = filter_mid(origin_comment) if retweeted_comment: retweeted_comment_mid = filter_mid(retweeted_comment) query_body = {"query": {"filtered": {"filter": {"bool": {"should": [], "must": []}}}}, "size": 10000} if int(style) == 0: # retweeted retweeted_origin = [] if retweeted_retweeted_mid: text_result = es.mget( index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_retweeted_mid} )["docs"] for item in text_result: mid = item.get("source", {}).get("root_mid", "0") retweeted_origin.append(mid) retweeted_results = influenced_user_detail(uid, date, origin_retweeted_mid, retweeted_origin, 3) retweeted_results["total_number"] = retweeted_total_number results = retweeted_results else: retweeted_origin = [] if retweeted_comment_mid: text_result = es.mget( index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_comment_mid} )["docs"] for item in text_result: mid = item.get("source", {}).get("root_mid", "0") retweeted_origin.append(mid) comment_results = influenced_user_detail(uid, date, origin_comment_mid, retweeted_origin, 2) comment_results["total_number"] = comment_total_number results = comment_results return results
def get_text(top_list, date, style): # input: [[mid1, no.1], [mid2, no.2], ['mid3', no.3]] # output: [[text1, no.1], [text2, no.2], [text3, no.3]] # mid, retweeted, comment, text, geo, timestamp, sentiment, mid_url results = [] index_flow_text = pre_text_index + date #index_list = get_text_index(date) if len(top_list) != 0: # no one mid_list = [] for item in top_list: mid_list.append(item[0]) search_result = es.mget(index=index_flow_text, doc_type=flow_text_index_type, body={"ids":mid_list})["docs"] for i in range(len(top_list)): temp = [] temp.extend(top_list[i]) if search_result[i]['found']: source = search_result[i]['_source'] temp.append(source["text"]) temp.append(source["geo"]) temp.append(ts2date(source["timestamp"])) temp.append(source["sentiment"]) temp.append(weiboinfo2url(source['uid'], source['mid'])) temp.append(uid_url+source['uid']) temp.append(source['uid']) try: uname = es_profile.get(index=profile_index_name, doc_type=profile_index_type, id=source['uid'])["_source"]["nick_name"] temp.append(uname) except: temp.append("unknown") else: temp.extend(["", "", "", "", "", "", "", ""]) results.append(temp) return results
def get_text(top_list, date, user_info, style): # input: [[mid1, no.1], [mid2, no.2], ['mid3', no.3]] # output: [[text1, no.1], [text2, no.2], [text3, no.3]] # mid, retweeted, comment, text, geo, timestamp, sentiment, mid_url results = [] detail_list = [ "origin_weibo_retweeted_detail", "origin_weibo_comment_detail", "retweeted_weibo_retweeted_detail", "retweeted_weibo_comment_detail", ] index_flow_text = pre_text_index + date if len(top_list) != 0: # no one mid_list = [] for i in range(len(top_list)): mid_list.append(top_list[i][0]) search_result = es.mget(index=index_flow_text, doc_type=flow_text_index_type, body={"ids": mid_list})["docs"] for i in range(len(top_list)): temp = [] temp.append(mid_list[i]) if int(style) == 0: temp.append(top_list[i][1]) temp.append(json.loads(user_info[detail_list[1]]).get(top_list[i][0], 0)) elif int(style) == 1: temp.append(json.loads(user_info[detail_list[0]]).get(top_list[i][0], 0)) temp.append(top_list[i][1]) elif int(style) == 2: temp.append(top_list[i][1]) temp.append(json.loads(user_info[detail_list[3]]).get(top_list[i][0], 0)) else: temp.append(json.loads(user_info[detail_list[2]]).get(top_list[i][0], 0)) temp.append(top_list[i][1]) if search_result[i]["found"]: source = search_result[i]["_source"] temp.append(source["text"]) temp.append(source["geo"]) temp.append(ts2date(source["timestamp"])) temp.append(source["sentiment"]) temp.append(weiboinfo2url(source["uid"], source["mid"])) temp.append(uid_url + source["uid"]) temp.append(source["uid"]) try: uname = es_profile.get(index=profile_index_name, doc_type=profile_index_type, id=source["uid"])[ "_source" ]["nick_name"] temp.append(uname) except: temp.append("unknown") else: temp.extend(["", "", "", "", "", "", "", ""]) results.append(temp) return results
def get_text(top_list, date, user_info, style): # input: [[mid1, no.1], [mid2, no.2], ['mid3', no.3]] # output: [[text1, no.1], [text2, no.2], [text3, no.3]] # mid, retweeted, comment, text, geo, timestamp, sentiment, mid_url results = [] detail_list = ["origin_weibo_retweeted_detail", "origin_weibo_comment_detail", "retweeted_weibo_retweeted_detail", "retweeted_weibo_comment_detail"] index_flow_text = pre_text_index + date if len(top_list) != 0: # no one mid_list = [] for i in range(len(top_list)): mid_list.append(top_list[i][0]) search_result = es.mget(index=index_flow_text, doc_type=flow_text_index_type, body={"ids":mid_list})["docs"] for i in range(len(top_list)): temp = [] temp.append(mid_list[i]) if int(style) == 0: temp.append(top_list[i][1]) temp.append(json.loads(user_info[detail_list[1]]).get(top_list[i][0], 0)) elif int(style) == 1: temp.append(json.loads(user_info[detail_list[0]]).get(top_list[i][0], 0)) temp.append(top_list[i][1]) elif int(style) == 2: temp.append(top_list[i][1]) temp.append(json.loads(user_info[detail_list[3]]).get(top_list[i][0], 0)) else: temp.append(json.loads(user_info[detail_list[2]]).get(top_list[i][0], 0)) temp.append(top_list[i][1]) if search_result[i]["found"]: source = search_result[i]["_source"] temp.append(source["text"]) temp.append(source["geo"]) temp.append(ts2date(source["timestamp"])) temp.append(source["sentiment"]) temp.append(weiboinfo2url(source['uid'], source['mid'])) temp.append(uid_url+source['uid']) temp.append(source['uid']) try: uname = es_profile.get(index=profile_index_name, doc_type=profile_index_type, id=source['uid'])["_source"]["nick_name"] temp.append(uname) except: temp.append("unknown") else: temp.extend(["", "", "", "", "", "", "", ""]) results.append(temp) return results
def statistics_influence_people(uid, date, style): # output: different retweeted and comment, uids' domain distribution, topic distribution, registeration geo distribution results = {} # retwweted weibo people and comment weibo people date1 = str(date).replace('-', '') index_name = pre_index + date1 index_flow_text = pre_text_index + date try: bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: bci_result = [] return results origin_retweeted_mid = [] # origin weibo mid retweeted_retweeted_mid = [] # retweeted weibo mid origin_comment_mid = [] retweeted_comment_mid = [] origin_retweeted = json.loads(bci_result["origin_weibo_retweeted_detail"]) retweeted_retweeted = json.loads( bci_result["retweeted_weibo_retweeted_detail"]) origin_comment = json.loads(bci_result["origin_weibo_comment_detail"]) retweeted_comment = json.loads( bci_result["retweeted_weibo_comment_detail"]) retweeted_total_number = sum(origin_retweeted.values()) + sum( retweeted_retweeted.values()) comment_total_number = sum(origin_comment.values()) + sum( retweeted_comment.values()) if origin_retweeted: origin_retweeted_mid = filter_mid(origin_retweeted) if retweeted_retweeted: retweeted_retweeted_mid = filter_mid(retweeted_retweeted) if origin_comment: origin_comment_mid = filter_mid(origin_comment) if retweeted_comment: retweeted_comment_mid = filter_mid(retweeted_comment) query_body = { "query": { "filtered": { "filter": { "bool": { "should": [], "must": [] } } } }, "size": 10000 } if int(style) == 0: # retweeted retweeted_origin = [] if retweeted_retweeted_mid: text_result = es.mget(index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_retweeted_mid})["docs"] for item in text_result: mid = item.get("source", {}).get("root_mid", '0') retweeted_origin.append(mid) retweeted_results = influenced_user_detail(uid, date, origin_retweeted_mid, retweeted_origin, 3) retweeted_results["total_number"] = retweeted_total_number results = retweeted_results else: retweeted_origin = [] if retweeted_comment_mid: text_result = es.mget(index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_comment_mid})["docs"] for item in text_result: mid = item.get("source", {}).get("root_mid", '0') retweeted_origin.append(mid) comment_results = influenced_user_detail(uid, date, origin_comment_mid, retweeted_origin, 2) comment_results["total_number"] = comment_total_number results = comment_results return results