def query_brust(index_name, field_name, range_1=0, range_2=50000, count=0): query_body = { "query": { "filtered": { "query": { "match_all": {} }, "filter": { "range": { field_name: { "gte": range_1, "lt": range_2 } } } } } } if count == 1: result = es.count(index=index_name, doc_type="bci", body=query_body)['count'] return result else: query_body['size'] = 1000 result = es.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] profile_list = [] for item in result: profile_list.append(item['_id']) return profile_list
def search_portrait_history_active_info(uid, date, index_name="copy_user_portrait", doctype="user"): # date.formate: 20130901 date_list = time_series(date) try: result = es.get(index=index_name, doc_type=doctype, id=uid, _source=True)['_source'] except NotFoundError: return "NotFound" except: return None date_max = {} for date_str in date_list: query_body = { 'query':{ 'match_all':{} }, 'size': 1, 'sort': [{date_str: {'order': 'desc'}}] } try: max_item = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] except Exception, e: raise e date_max[date_str] = max_item[0]['_source'][date_str]
def get_user_influence(uid, date): date = str(date).replace("-","") index_name = pre_index + date try: bci_info = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: bci_info = {} result = {} for key in BCI_LIST: result[key] = bci_info.get(key, 0) user_index = result["user_index"] query_body = { "query":{ "filtered":{ "filter":{ "range":{ "user_index":{ "gt": user_index } } } } } } total_count = es_cluster.count(index=index_name, doc_type=influence_doctype)['count'] order_count = es_cluster.count(index=index_name, doc_type=influence_doctype, body=query_body)['count'] result["total_count"] = total_count result["order_count"] = order_count + 1 return result
def get_user_influence(uid, date): date1 = str(date).replace("-","") index_name = pre_index + date1 result = bci_detail(date, uid) user_index = result["user_index"] query_body = { "query":{ "filtered":{ "filter":{ "range":{ "user_index":{ "gt": user_index } } } } } } total_count = es_cluster.count(index=index_name, doc_type=influence_doctype)['count'] order_count = es_cluster.count(index=index_name, doc_type=influence_doctype, body=query_body)['count'] result["total_count"] = total_count result["order_count"] = order_count + 1 return result
def query_brust(index_name,field_name, range_1=0, range_2=50000, count=0): query_body = { "query":{ "filtered": { "query": { "match_all":{} }, "filter": { "range": { field_name: { "gte": range_1, "lt": range_2 } } } } } } if count == 1: result = es.count(index=index_name, doc_type="bci", body=query_body)['count'] return result else: query_body['size'] = 1000 result = es.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] profile_list = [] for item in result: profile_list.append(item['_id']) return profile_list
def search_top_index(index_name, top_k=1, index_type="bci", top=False, sort_order="user_index"): query_body = { "query": { "match_all": {} }, "size": top_k, "sort": [{sort_order: {"order": "desc"}}] } if top: result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'][0]['_source'][sort_order] else: search_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] uid_list = [] for item in search_result: uid_list.append(item['_id']) profile_result = es_profile.mget(index="weibo_user",doc_type="user", body={"ids":uid_list}, _source=True)['docs'] portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids":uid_list}, _source=True)['docs'] result = [] rank = 1 for i in range(len(search_result)): info = ['','','',''] info[0] = rank if profile_result[i]['found']: info[1] = profile_result[i]['_source'].get('photo_url','') info[3] = profile_result[i]['_source'].get('nick_name','') info[2] = search_result[i].get('_id','') if sort_order in ["user_index","origin_weibo_retweeted_brust_average","origin_weibo_comment_brust_average"]: info.append(search_result[i]['_source'][sort_order]) if portrait_result[i]['found']: info.append("1") else: info.append("0") elif sort_order == "origin_weibo_retweeted_top_number": info.append(search_result[i]['_source']['origin_weibo_retweeted_top_number']) mid = search_result[i]['_source']['origin_weibo_top_retweeted_id'] info.append(weiboinfo2url(info[2],mid)) if portrait_result[i]['found']: info.append("1") else: info.append("0") elif sort_order == "origin_weibo_comment_top_number": info.append(search_result[i]['_source']['origin_weibo_comment_top_number']) mid = search_result[i]['_source']['origin_weibo_top_comment_id'] info.append(weiboinfo2url(info[2],mid)) if portrait_result[i]['found']: info.append("1") else: info.append("0") rank += 1 result.append(info) return result
def update_record_index(uid_list): bulk_action = [] for each in uid_list: info = {} info['uid'] = str(each) info['low_number'] = 0 xdata = expand_update_action(info) bulk_action.extend([xdata[0], xdata[1]]) es.bulk(bulk_action, index=index_destination, doc_type=index_destination_doctype, timeout=30)
def count_es(es, index_name, doctype, sort_order="user_index", range_1=0, range_2=3000): query_body = { "query": { "filtered": { "query": { "match_all": {} }, "filter": { "range": { sort_order: { "gte": range_1, "lt": range_2 } } } } } } result = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] return result
def search_influence_detail(uid_list, index_name, doctype): result = es.mget(index=index_name, doc_type=doctype, body={"ids": uid_list}, _source=True)["docs"] return result[0]['_source']
def influenced_detail(uid, date, style): date1 = str(date).replace("-", "") index_name = pre_index + date1 # detail_text = {} style = int(style) try: user_info = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: result = {} return result origin_retweetd = json.loads(user_info["origin_weibo_retweeted_top"]) origin_comment = json.loads(user_info["origin_weibo_comment_top"]) retweeted_retweeted = json.loads(user_info["retweeted_weibo_retweeted_top"]) retweeted_comment = json.loads(user_info["retweeted_weibo_comment_top"]) if style == 0: detail_text = get_text(origin_retweetd, date, user_info, style) elif style == 1: detail_text = get_text(origin_comment, date, user_info, style) elif style == 2: detail_text = get_text(retweeted_retweeted, date, user_info, style) else: detail_text = get_text(retweeted_comment, date, user_info, style) # detail_text["origin_retweeted"] = get_text(origin_retweetd, date) # detail_text["origin_comment"] = get_text(origin_comment, date) # detail_text["retweeted_retweeted"] = get_text(retweeted_retweeted, date) # detail_text["retweeted_comment"] = get_text(retweeted_comment, date) return detail_text
def tag_vector(uid, date): date1 = str(date).replace('-', '') index_name = pre_index + date1 index_flow_text = pre_text_index + date result = [] try: bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: tag = influence_tag["0"] result.append(tag) return result origin_retweeted = json.loads(bci_result["origin_weibo_retweeted_detail"]) retweeted_retweeted = json.loads(bci_result["retweeted_weibo_retweeted_detail"]) origin_comment = json.loads(bci_result["origin_weibo_comment_detail"]) retweeted_comment = json.loads(bci_result["retweeted_weibo_comment_detail"]) sum_retweeted = sum(origin_retweeted.values()) + sum(origin_comment.values()) sum_comment = sum(retweeted_retweeted.values()) + sum(retweeted_comment.values()) if sum_retweeted >= retweeted_threshold: if sum_comment >= comment_threshold: tag = influence_tag['3'] else: tag = influence_tag['1'] else: if sum_comment >= comment_threshold: tag = influence_tag['2'] else: tag = influence_tag['4'] result.append(tag) return result
def influenced_detail(uid, date, style): date1 = str(date).replace('-', '') index_name = pre_index + date1 #detail_text = {} style = int(style) try: user_info = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: result = {} return result origin_retweetd = json.loads(user_info["origin_weibo_retweeted_top"]) origin_comment = json.loads(user_info['origin_weibo_comment_top']) retweeted_retweeted = json.loads( user_info["retweeted_weibo_retweeted_top"]) retweeted_comment = json.loads(user_info["retweeted_weibo_comment_top"]) if style == 0: detail_text = get_text(origin_retweetd, date, user_info, style) elif style == 1: detail_text = get_text(origin_comment, date, user_info, style) elif style == 2: detail_text = get_text(retweeted_retweeted, date, user_info, style) else: detail_text = get_text(retweeted_comment, date, user_info, style) #detail_text["origin_retweeted"] = get_text(origin_retweetd, date) #detail_text["origin_comment"] = get_text(origin_comment, date) #detail_text["retweeted_retweeted"] = get_text(retweeted_retweeted, date) #detail_text["retweeted_comment"] = get_text(retweeted_comment, date) return detail_text
def get_user_detail(date, input_result, status): results = [] if status=='show_in': uid_list = input_result if status=='show_compute': uid_list = input_result.keys() if status=='show_in_history': uid_list = input_result.keys() if date!='all': index_name = 'bci_' + ''.join(date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if status == 'show_in': results.append([uid, uname, location, fansnum, statusnum, influence]) if status == 'show_compute': in_date = json.loads(input_result[uid])[0] compute_status = json.loads(input_result[uid])[1] if compute_status == '1': compute_status = '3' results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status]) if status == 'show_in_history': in_status = input_result[uid] results.append([uid, uname, location, fansnum, statusnum, influence, in_status]) return results
def statistics_influence_people(uid, date, style): # output: different retweeted and comment, uids' domain distribution, topic distribution, registeration geo distribution results = {} # retwweted weibo people and comment weibo people date1 = str(date).replace("-", "") index_name = pre_index + date1 index_flow_text = pre_text_index + date try: bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: bci_result = [] return results origin_retweeted_mid = [] # origin weibo mid retweeted_retweeted_mid = [] # retweeted weibo mid origin_comment_mid = [] retweeted_comment_mid = [] origin_retweeted = json.loads(bci_result["origin_weibo_retweeted_detail"]) retweeted_retweeted = json.loads(bci_result["retweeted_weibo_retweeted_detail"]) origin_comment = json.loads(bci_result["origin_weibo_comment_detail"]) retweeted_comment = json.loads(bci_result["retweeted_weibo_comment_detail"]) retweeted_total_number = sum(origin_retweeted.values()) + sum(retweeted_retweeted.values()) comment_total_number = sum(origin_comment.values()) + sum(retweeted_comment.values()) if origin_retweeted: origin_retweeted_mid = filter_mid(origin_retweeted) if retweeted_retweeted: retweeted_retweeted_mid = filter_mid(retweeted_retweeted) if origin_comment: origin_comment_mid = filter_mid(origin_comment) if retweeted_comment: retweeted_comment_mid = filter_mid(retweeted_comment) query_body = {"query": {"filtered": {"filter": {"bool": {"should": [], "must": []}}}}, "size": 10000} if int(style) == 0: # retweeted retweeted_origin = [] if retweeted_retweeted_mid: text_result = es.mget( index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_retweeted_mid} )["docs"] for item in text_result: mid = item.get("source", {}).get("root_mid", "0") retweeted_origin.append(mid) retweeted_results = influenced_user_detail(uid, date, origin_retweeted_mid, retweeted_origin, 3) retweeted_results["total_number"] = retweeted_total_number results = retweeted_results else: retweeted_origin = [] if retweeted_comment_mid: text_result = es.mget( index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_comment_mid} )["docs"] for item in text_result: mid = item.get("source", {}).get("root_mid", "0") retweeted_origin.append(mid) comment_results = influenced_user_detail(uid, date, origin_comment_mid, retweeted_origin, 2) comment_results["total_number"] = comment_total_number results = comment_results return results
def get_recommentation(submit_user): if RUN_TYPE: now_ts = time.time() else: now_ts = datetime2ts(RUN_TEST_TIME) in_portrait_set = set(r.hkeys("compute")) result = [] for i in range(7): iter_ts = now_ts - i*DAY iter_date = ts2datetime(iter_ts) submit_user_recomment = "recomment_" + submit_user + "_" + str(iter_date) bci_date = ts2datetime(iter_ts - DAY) submit_user_recomment = r.hkeys(submit_user_recomment) bci_index_name = "bci_" + bci_date.replace('-', '') exist_bool = es_cluster.indices.exists(index=bci_index_name) if not exist_bool: continue if submit_user_recomment: user_bci_result = es_cluster.mget(index=bci_index_name, doc_type="bci", body={'ids':submit_user_recomment}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':submit_user_recomment}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(bci_index_name) for i in range(len(submit_user_recomment)): uid = submit_user_recomment[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if uid in in_portrait_set: in_portrait = "1" else: in_portrait = "0" recomment_day = iter_date result.append([iter_date, uid, uname, location, fansnum, statusnum, influence, in_portrait]) return result
def statistics_influence_people(uid, date, style): # output: different retweeted and comment, uids' domain distribution, topic distribution, registeration geo distribution results = {} # retwweted weibo people and comment weibo people date1 = str(date).replace('-', '') index_name = pre_index + date1 index_flow_text = pre_text_index + date try: bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: bci_result = [] return results origin_mid = [] # origin weibo mid retweeted_mid = [] # retweeted weibo mid query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ ] } } } }, "size":1000 } body_1 = copy.deepcopy(query_body) body_2 = copy.deepcopy(query_body) body_1["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": 1}}, {"term":{"uid": uid}}]) result_1 = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=body_1)["hits"]["hits"] if result_1: for item in result_1: origin_mid.append(item['_id']) body_1["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": 3}}, {"term":{"uid": uid}}]) result_2 = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=body_2)["hits"]["hits"] if result_2: for item in result_2: if item['_source'].get('root_mid', ''): retweeted_mid.append(item['_source']['root_mid']) if int(style) == 0: # retweeted retweeted_results = influenced_user_detail(uid, date, origin_mid, retweeted_mid, 3) results = retweeted_results else: comment_results = influenced_user_detail(uid, date, origin_mid, retweeted_mid, 2) results = comment_results return results
def comment_on_influence(uid, date): date1 = str(date).replace('-', '') index_name = pre_index + date1 index_flow_text = pre_text_index + date result = [] underline = [] try: bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: description = CURRENT_INFLUENCE_CONCLUSION['0'] result.append(description) return ([result, underline]) user_index = bci_result['user_index'] if user_index < CURRNET_INFLUENCE_THRESHOULD[0]: description = CURRENT_INFLUENCE_CONCLUSION['0'] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[ 0] and user_index < CURRNET_INFLUENCE_THRESHOULD[1]: description = CURRENT_INFLUENCE_CONCLUSION['1'] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[ 1] and user_index < CURRNET_INFLUENCE_THRESHOULD[2]: description = CURRENT_INFLUENCE_CONCLUSION['2'] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[ 2] and user_index < CURRNET_INFLUENCE_THRESHOULD[3]: description = CURRENT_INFLUENCE_CONCLUSION['3'] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[ 3] and user_index < CURRNET_INFLUENCE_THRESHOULD[4]: description = CURRENT_INFLUENCE_CONCLUSION['4'] else: description = CURRENT_INFLUENCE_CONCLUSION['5'] result.append(description) for i in range(4): if bci_result[INFLUENCE_TOTAL_LIST[i]] > INFLUENCE_TOTAL_THRESHOULD[i]: result.append(INFLUENCE_TOTAL_CONCLUSION[i]) if bci_result[ INFLUENCE_BRUST_LIST[i]] > INFLUENCE_BRUST_THRESHOULD[i]: result.append(INFLUENCE_BRUST_CONCLUSION[i]) underline.append(UNDERLINE_CONCLUSION[i]) else: result.append('') underline.append('') else: result.extend(['', '']) underline.append('') return [result, underline]
def search_portrait_history_active_info(uid, date, index_name=copy_portrait_index_name, doctype=copy_portrait_index_name): # date.formate: 20130901 date_list = time_series(date) try: result = es.get(index=index_name, doc_type=doctype, id=uid, _source=True)['_source'] except NotFoundError: return "NotFound" except: return None date_max = {} for date_str in date_list: query_body = { 'query': { 'match_all': {} }, 'size': 1, 'sort': [{ date_str: { 'order': 'desc' } }] } try: max_item = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] except Exception, e: raise e date_max[date_str] = max_item[0]['_source'][date_str]
def search_k(es, index_name, index_type, start, field="user_index", size=100): query_body = { "query":{ "match_all": {} }, "size": size, "from": start, "sort": [{field: {"order": "desc"}}] } result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] search_list = [] for item in result: search_list.append(item['_source']) return search_list
def get_evaluate_max(index_name): max_result = {} index_type = 'bci' evaluate_index = ['user_index'] for evaluate in evaluate_index: query_body = { 'query':{ 'match_all':{} }, 'size':1, 'sort':[{evaluate: {'order': 'desc'}}] } try: result = es_cluster.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] except Exception, e: raise e max_evaluate = result[0]['_source'][evaluate] max_result[evaluate] = max_evaluate
def comment_on_influence(uid, date): date1 = str(date).replace("-", "") index_name = pre_index + date1 index_flow_text = pre_text_index + date result = [] underline = [] try: bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: description = CURRENT_INFLUENCE_CONCLUSION["0"] result.append(description) return [result, underline] user_index = bci_result["user_index"] if user_index < CURRNET_INFLUENCE_THRESHOULD[0]: description = CURRENT_INFLUENCE_CONCLUSION["0"] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[0] and user_index < CURRNET_INFLUENCE_THRESHOULD[1]: description = CURRENT_INFLUENCE_CONCLUSION["1"] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[1] and user_index < CURRNET_INFLUENCE_THRESHOULD[2]: description = CURRENT_INFLUENCE_CONCLUSION["2"] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[2] and user_index < CURRNET_INFLUENCE_THRESHOULD[3]: description = CURRENT_INFLUENCE_CONCLUSION["3"] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[3] and user_index < CURRNET_INFLUENCE_THRESHOULD[4]: description = CURRENT_INFLUENCE_CONCLUSION["4"] else: description = CURRENT_INFLUENCE_CONCLUSION["5"] result.append(description) for i in range(4): if bci_result[INFLUENCE_TOTAL_LIST[i]] > INFLUENCE_TOTAL_THRESHOULD[i]: result.append(INFLUENCE_TOTAL_CONCLUSION[i]) if bci_result[INFLUENCE_BRUST_LIST[i]] > INFLUENCE_BRUST_THRESHOULD[i]: result.append(INFLUENCE_BRUST_CONCLUSION[i]) underline.append(UNDERLINE_CONCLUSION[i]) else: result.append("") underline.append("") else: result.extend(["", ""]) underline.append("") return [result, underline]
def comment_on_influence(uid, date): date1 = str(date).replace('-', '') index_name = pre_index + date1 index_flow_text = pre_text_index + date result = [] underline = [] try: bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: description = CURRENT_INFLUENCE_CONCLUSION['0'] result.append(description) return ([result, underline]) user_index = bci_result['user_index'] if user_index < CURRNET_INFLUENCE_THRESHOULD[0]: description = CURRENT_INFLUENCE_CONCLUSION['0'] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[0] and user_index < CURRNET_INFLUENCE_THRESHOULD[1]: description = CURRENT_INFLUENCE_CONCLUSION['1'] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[1] and user_index < CURRNET_INFLUENCE_THRESHOULD[2]: description = CURRENT_INFLUENCE_CONCLUSION['2'] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[2] and user_index < CURRNET_INFLUENCE_THRESHOULD[3]: description = CURRENT_INFLUENCE_CONCLUSION['3'] elif user_index >= CURRNET_INFLUENCE_THRESHOULD[3] and user_index < CURRNET_INFLUENCE_THRESHOULD[4]: description = CURRENT_INFLUENCE_CONCLUSION['4'] else: description = CURRENT_INFLUENCE_CONCLUSION['5'] result.append(description) for i in range(4): if bci_result[INFLUENCE_TOTAL_LIST[i]] > INFLUENCE_TOTAL_THRESHOULD[i]: result.append(INFLUENCE_TOTAL_CONCLUSION[i]) if bci_result[INFLUENCE_BRUST_LIST[i]] > INFLUENCE_BRUST_THRESHOULD[i]: result.append(INFLUENCE_BRUST_CONCLUSION[i]) underline.append(UNDERLINE_CONCLUSION[i]) else: result.append('') underline.append('') else: result.extend(['','']) underline.append('') return [result, underline]
def search_k(es, index_name, index_type, start, field="user_index", size=100): query_body = { "query": { "match_all": {} }, "size": size, "from": start, "sort": [{ field: { "order": "desc" } }] } result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] search_list = [] for item in result: search_list.append(item['_source']) return search_list
def count_es(es, index_name,doctype, sort_order="user_index",range_1=0, range_2=3000): query_body = { "query":{ "filtered": { "query": { "match_all":{} }, "filter": { "range": { sort_order: { "gte": range_1, "lt": range_2 } } } } } } result = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] return result
def search_portrait_history_active_info(uid, date, index_name="copy_user_portrait", doctype="user"): # date.formate: 20130901 date_list = time_series(date) try: result = es.get(index=index_name, doc_type=doctype, id=uid, _source=True)['_source'] except NotFoundError: return "NotFound" except: return None return_dict = {} for item in date_list: return_dict[item] = result.get(item, 0) in_list = [] for item in sorted(date_list): in_list.append(return_dict[item]) #print 'in_list:', in_list max_influence = max(in_list) ave_influence = sum(in_list) / float(7) min_influence = min(in_list) if max_influence - min_influence <= 400 and ave_influence >= 900: mark = u'平稳高影响力' elif max_influence - min_influence > 400 and ave_influence >= 900: mark = u'波动高影响力' elif max_influence - min_influence <= 400 and ave_influence < 900 and ave_influence >= 500: mark = u'平稳一般影响力' elif max_influence - min_influence > 400 and ave_influence < 900 and ave_influence >= 500: mark = u'波动一般影响力' elif max_influence - min_influence <= 400 and ave_influence < 500: mark = u'平稳低影响力' else: mark = u'波动低影响力' description = [u'该用户为', mark] return [in_list, description]
sensitive_string = "sensitive_score_" + tmp_ts query_sensitive_body = { "query":{ "match_all":{} }, "size":1, "sort":{sensitive_string:{"order":"desc"}} } try: top_sensitive_result = es_bci_history.search(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits'] top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0] except Exception, reason: print Exception, reason top_sensitive = 400 index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs'] sensitive_history_result = es_bci_history.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={'ids':uid_list}, fields=[sensitive_string], _source=False)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] bci_history_dict = bci_history_result[i] sensitive_history_dict = sensitive_history_result[i] #print sensitive_history_dict try: bci_source = bci_dict['_source'] except: bci_source = None
def get_user_detail(date, input_result, status, user_type="influence", auth=""): results = [] if status=='show_in': uid_list = input_result if status=='show_compute': uid_list = input_result.keys() if status=='show_in_history': uid_list = input_result.keys() if date!='all': index_name = 'bci_' + ''.join(date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if status == 'show_in': if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() else: senstive_words = [] results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence]) if auth: hashname_submit = "submit_recomment_" + date tmp_data = json.loads(r.hget(hashname_submit, uid)) recommend_list = (tmp_data['operation']).split('&') admin_list = [] admin_list.append(tmp_data['system']) admin_list.append(list(set(recommend_list))) admin_list.append(len(recommend_list)) results[-1].extend(admin_list) if status == 'show_compute': in_date = json.loads(input_result[uid])[0] compute_status = json.loads(input_result[uid])[1] if compute_status == '1': compute_status = '3' results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status]) if status == 'show_in_history': in_status = input_result[uid] if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() results.append([uid, uname, location, fansnum, statusnum, influence, in_status, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence, in_status]) return results
def search_top_index(index_name, top_k=1, index_type="bci", top=False, sort_order="user_index"): query_body = { "query": { "match_all": {} }, "size": top_k, "sort": [{ sort_order: { "order": "desc" } }] } if top: result = es.search( index=index_name, doc_type=index_type, body=query_body)['hits']['hits'][0]['_source'][sort_order] else: search_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] uid_list = [] for item in search_result: uid_list.append(item['_id']) profile_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list}, _source=True)['docs'] portrait_result = es_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list}, _source=True)['docs'] result = [] rank = 1 for i in range(len(search_result)): info = ['', '', '', ''] info[0] = rank if profile_result[i]['found']: info[1] = profile_result[i]['_source'].get('photo_url', '') info[3] = profile_result[i]['_source'].get('nick_name', '') info[2] = search_result[i].get('_id', '') if sort_order in [ "user_index", "origin_weibo_retweeted_brust_average", "origin_weibo_comment_brust_average" ]: info.append(search_result[i]['_source'][sort_order]) if portrait_result[i]['found']: info.append("1") else: info.append("0") elif sort_order == "origin_weibo_retweeted_top_number": info.append(search_result[i]['_source'] ['origin_weibo_retweeted_top_number']) mid = search_result[i]['_source'][ 'origin_weibo_top_retweeted_id'] info.append(weiboinfo2url(info[2], mid)) if portrait_result[i]['found']: info.append("1") else: info.append("0") elif sort_order == "origin_weibo_comment_top_number": info.append(search_result[i]['_source'] ['origin_weibo_comment_top_number']) mid = search_result[i]['_source'][ 'origin_weibo_top_comment_id'] info.append(weiboinfo2url(info[2], mid)) if portrait_result[i]['found']: info.append("1") else: info.append("0") rank += 1 result.append(info) return result
def influenced_detail(uid, date, style): date1 = str(date).replace('-', '') index_name = pre_index + date1 index_text = "flow_text_" + date #detail_text = {} style = int(style) try: user_info = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: result = {} return result origin_retweetd_dict = json.loads(user_info["origin_weibo_retweeted_detail"]) origin_comment_dict = json.loads(user_info['origin_weibo_comment_detail']) retweeted_retweeted_dict = json.loads(user_info["retweeted_weibo_retweeted_detail"]) retweeted_comment_dict = json.loads(user_info["retweeted_weibo_comment_detail"]) origin_retweetd = sorted(origin_retweetd_dict.items(), key=lambda x:x[1], reverse=True) origin_comment = sorted(origin_comment_dict.items(), key=lambda x:x[1], reverse=True) retweeted_retweeted = sorted(retweeted_retweeted_dict.items(), key=lambda x:x[1], reverse=True) retweeted_comment = sorted(retweeted_comment_dict.items(), key=lambda x:x[1], reverse=True) query_body_origin = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"message_type": 1}}, {"term":{"uid": uid}} ] } } } }, "size": 10000 } result_1 = es.search(index=index_text, doc_type="text", body=query_body_origin)['hits']['hits'] origin_set = set() if result_1: for item in result_1: origin_set.add(item['_id']) query_body_retweeted = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"message_type": 3}}, {"term":{"uid": uid}} ] } } } }, "size": 10000 } result_2 = es.search(index=index_text, doc_type="text", body=query_body_retweeted)['hits']['hits'] retweeted_set = set() if result_2: for item in retweeted_set: retweeted_set.add(item['_id']) if origin_retweetd: for item in origin_retweetd: if item[0] not in origin_set: origin_retweetd.remove(item) if origin_comment: for item in origin_comment: if item[0] not in origin_set: origin_comment.remove(item) if retweeted_retweeted: for item in retweeted_retweeted: if item[0] not in retweeted_set: retweeted_retweeted.remove(item) if retweeted_comment: for item in retweeted_comment: if item[0] not in retweeted_set: retweeted_comment.remove(item) if style == 0: detail_text = get_text(origin_retweetd[:20], date, user_info, style) elif style == 1: detail_text = get_text(origin_comment[:20], date, user_info, style) elif style == 2: detail_text = get_text(retweeted_retweeted[:20], date, user_info, style) else: detail_text = get_text(retweeted_comment[:20], date, user_info, style) #detail_text["origin_retweeted"] = get_text(origin_retweetd, date) #detail_text["origin_comment"] = get_text(origin_comment, date) #detail_text["retweeted_retweeted"] = get_text(retweeted_retweeted, date) #detail_text["retweeted_comment"] = get_text(retweeted_comment, date) return detail_text
def get_user_detail(date, input_result, status, user_type="influence", auth=""): bci_date = ts2datetime(datetime2ts(date) - DAY) results = [] if status=='show_in': uid_list = input_result if status=='show_compute': uid_list = input_result.keys() if status=='show_in_history': uid_list = input_result.keys() if date!='all': index_name = 'bci_' + ''.join(bci_date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if status == 'show_in': if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() else: sensitive_words = [] results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence]) if auth: hashname_submit = "submit_recomment_" + date tmp_data = json.loads(r.hget(hashname_submit, uid)) recommend_list = (tmp_data['operation']).split('&') admin_list = [] admin_list.append(tmp_data['system']) admin_list.append(list(set(recommend_list))) admin_list.append(len(recommend_list)) results[-1].extend(admin_list) if status == 'show_compute': in_date = json.loads(input_result[uid])[0] compute_status = json.loads(input_result[uid])[1] if compute_status == '1': compute_status = '3' results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status]) if status == 'show_in_history': in_status = input_result[uid] if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() results.append([uid, uname, location, fansnum, statusnum, influence, in_status, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence, in_status]) return results
def get_recommentation(submit_user): if RUN_TYPE: now_ts = time.time() else: now_ts = datetime2ts(RUN_TEST_TIME) in_portrait_set = set(r.hkeys("compute")) result = [] for i in range(7): iter_ts = now_ts - i * DAY iter_date = ts2datetime(iter_ts) submit_user_recomment = "recomment_" + submit_user + "_" + str( iter_date) bci_date = ts2datetime(iter_ts - DAY) submit_user_recomment = r.hkeys(submit_user_recomment) bci_index_name = "bci_" + bci_date.replace('-', '') exist_bool = es_cluster.indices.exists(index=bci_index_name) if not exist_bool: continue if submit_user_recomment: user_bci_result = es_cluster.mget( index=bci_index_name, doc_type="bci", body={'ids': submit_user_recomment}, _source=True)['docs'] user_profile_result = es_user_profile.mget( index='weibo_user', doc_type='user', body={'ids': submit_user_recomment}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(bci_index_name) for i in range(len(submit_user_recomment)): uid = submit_user_recomment[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log( influence / max_evaluate_influ['user_index'] * 9 + 1, 10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if uid in in_portrait_set: in_portrait = "1" else: in_portrait = "0" recomment_day = iter_date result.append([ iter_date, uid, uname, location, fansnum, statusnum, influence, in_portrait ]) return result
def statistics_influence_people(uid, date, style): # output: different retweeted and comment, uids' domain distribution, topic distribution, registeration geo distribution results = {} # retwweted weibo people and comment weibo people date1 = str(date).replace('-', '') index_name = pre_index + date1 index_flow_text = pre_text_index + date try: bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: bci_result = [] return results origin_mid = [] # origin weibo mid retweeted_mid = [] # retweeted weibo mid query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ ] } } } }, "size":1000 } body_1 = copy.deepcopy(query_body) body_2 = copy.deepcopy(query_body) body_1["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": 1}}, {"term":{"uid": uid}}]) result_1 = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=body_1)["hits"]["hits"] if result_1: for item in result_1: origin_mid.append(item['_id']) body_1["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": 3}}, {"term":{"uid": uid}}]) result_2 = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=body_2)["hits"]["hits"] if result_2: for item in result_2: if item['_source'].get('root_mid', ''): retweeted_mid.append(item['_source']['root_mid']) origin_retweeted = json.loads(bci_result["origin_weibo_retweeted_detail"]) retweeted_retweeted = json.loads(bci_result["retweeted_weibo_retweeted_detail"]) origin_comment = json.loads(bci_result["origin_weibo_comment_detail"]) retweeted_comment = json.loads(bci_result["retweeted_weibo_comment_detail"]) """ retweeted_total_number = sum(origin_retweeted.values()) + sum(retweeted_retweeted.values()) comment_total_number = sum(origin_comment.values()) + sum(retweeted_comment.values()) if origin_retweeted: origin_retweeted_mid = filter_mid(origin_retweeted) if retweeted_retweeted: retweeted_retweeted_mid = filter_mid(retweeted_retweeted) if origin_comment: origin_comment_mid = filter_mid(origin_comment) if retweeted_comment: retweeted_comment_mid = filter_mid(retweeted_comment) query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "should":[ ], "must": [ ] } } } }, "size":10000 } """ if int(style) == 0: # retweeted retweeted_results = influenced_user_detail(uid, date, origin_mid, retweeted_mid, 3) results = retweeted_results else: comment_results = influenced_user_detail(uid, date, origin_mid, retweeted_mid, 2) results = comment_results return results
def statistics_influence_people(uid, date, style): # output: different retweeted and comment, uids' domain distribution, topic distribution, registeration geo distribution results = {} # retwweted weibo people and comment weibo people date1 = str(date).replace('-', '') index_name = pre_index + date1 index_flow_text = pre_text_index + date try: bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"] except: bci_result = [] return results origin_retweeted_mid = [] # origin weibo mid retweeted_retweeted_mid = [] # retweeted weibo mid origin_comment_mid = [] retweeted_comment_mid = [] origin_retweeted = json.loads(bci_result["origin_weibo_retweeted_detail"]) retweeted_retweeted = json.loads( bci_result["retweeted_weibo_retweeted_detail"]) origin_comment = json.loads(bci_result["origin_weibo_comment_detail"]) retweeted_comment = json.loads( bci_result["retweeted_weibo_comment_detail"]) retweeted_total_number = sum(origin_retweeted.values()) + sum( retweeted_retweeted.values()) comment_total_number = sum(origin_comment.values()) + sum( retweeted_comment.values()) if origin_retweeted: origin_retweeted_mid = filter_mid(origin_retweeted) if retweeted_retweeted: retweeted_retweeted_mid = filter_mid(retweeted_retweeted) if origin_comment: origin_comment_mid = filter_mid(origin_comment) if retweeted_comment: retweeted_comment_mid = filter_mid(retweeted_comment) query_body = { "query": { "filtered": { "filter": { "bool": { "should": [], "must": [] } } } }, "size": 10000 } if int(style) == 0: # retweeted retweeted_origin = [] if retweeted_retweeted_mid: text_result = es.mget(index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_retweeted_mid})["docs"] for item in text_result: mid = item.get("source", {}).get("root_mid", '0') retweeted_origin.append(mid) retweeted_results = influenced_user_detail(uid, date, origin_retweeted_mid, retweeted_origin, 3) retweeted_results["total_number"] = retweeted_total_number results = retweeted_results else: retweeted_origin = [] if retweeted_comment_mid: text_result = es.mget(index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_comment_mid})["docs"] for item in text_result: mid = item.get("source", {}).get("root_mid", '0') retweeted_origin.append(mid) comment_results = influenced_user_detail(uid, date, origin_comment_mid, retweeted_origin, 2) comment_results["total_number"] = comment_total_number results = comment_results return results
def influenced_user_detail(uid, date, origin_retweeted_mid, retweeted_retweeted_mid, message_type, default_number=20): query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must": [ ] } } } }, "size":100000, "sort":{"user_fansnum":{"order":"desc"}} } #详细影响到的人 date1 = str(date).replace('-', '') index_name = pre_index + date1 index_flow_text = pre_text_index + date origin_retweeted_uid = [] # influenced user uid_list retweeted_retweeted_uid = [] origin_comment_uid = [] retweeted_comment_uid = [] query_origin = copy.deepcopy(query_body) query_retweeted = copy.deepcopy(query_body) if origin_retweeted_mid: # 所有转发该条原创微博的用户 query_origin["query"]["filtered"]["filter"]["bool"]["must"].append({"terms": {"root_mid": origin_retweeted_mid}}) query_origin["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": message_type}}, {"term":{"root_uid": uid}}]) origin_retweeted_result = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=query_origin, fields=["uid"])["hits"]["hits"] if origin_retweeted_result: for item in origin_retweeted_result: origin_retweeted_uid.append(item["fields"]["uid"][0]) if retweeted_retweeted_mid: # 所有评论该条原创微博的用户 query_retweeted["query"]["filtered"]["filter"]["bool"]["must"].append({"terms": {"root_mid": retweeted_retweeted_mid}}) query_retweeted["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": message_type}},{"term": {"directed_uid": uid}}]) retweeted_retweeted_result = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=query_retweeted, fields=["uid"])["hits"]["hits"] if retweeted_retweeted_result: for item in retweeted_retweeted_result: retweeted_retweeted_uid.append(item["fields"]["uid"][0]) retweeted_uid_list = [] # all retweeted user list retweeted_results = {} # statistics of all retweeted uid information retweeted_domain = {} retweeted_topic = {} retweeted_geo = {} bci_results = {} in_portrait = [] out_portrait = [] average_influence = 0 total_influence = 0 count = 0 all_uid_set = set(origin_retweeted_uid) | set(retweeted_retweeted_uid) retweeted_uid_list.extend(origin_retweeted_uid) retweeted_uid_list.extend(retweeted_retweeted_uid) retweeted_uid_list = list(set(retweeted_uid_list) - set([uid])) # filter uids if retweeted_uid_list: user_portrait_result = es_user_portrait.mget(index=user_portrait, doc_type=portrait_index_type, body={"ids": retweeted_uid_list}, fields=["domain", "topic_string", "activity_geo_dict","importance", "influence"])["docs"] bci_index = "bci_" + date.replace('-', '') bci_results = es_cluster.mget(index=bci_index, doc_type="bci", body={"ids":retweeted_uid_list}, fields=['user_index'])["docs"] for item in user_portrait_result: if item["found"]: temp = [] count += 1 temp.append(item['_id']) temp.append(item["fields"]["importance"][0]) in_portrait.append(temp) temp_domain = item["fields"]["domain"][0].split('&') temp_topic = item["fields"]["topic_string"][0].split('&') temp_geo = json.loads(item["fields"]["activity_geo_dict"][0])[-1].keys() #total_influence += item["fields"]["influence"][0] retweeted_domain = aggregation(temp_domain, retweeted_domain) retweeted_topic = aggregation(temp_topic, retweeted_topic) retweeted_geo = aggregation(temp_geo, retweeted_geo) else: out_portrait.append(item['_id']) retweeted_domain = proportion(retweeted_domain) retweeted_topic = proportion(retweeted_topic) retweeted_geo = proportion(retweeted_geo) if bci_results: total_influence = 0 for item in bci_results: if item['found']: total_influence += item['fields']['user_index'][0] try: average_influence = total_influence/len(retweeted_uid_list) except: average_influence = 0 sorted_retweeted_domain = sorted(retweeted_domain.items(),key=lambda x:x[1], reverse=True) sorted_retweeted_topic = sorted(retweeted_topic.items(),key=lambda x:x[1], reverse=True) sorted_retweeted_geo = sorted(retweeted_geo.items(), key=lambda x:x[1], reverse=True) retweeted_results["domian"] = sorted_retweeted_domain[:5] retweeted_results["topic"] = sorted_retweeted_topic[:5] retweeted_results["geo"] = sorted_retweeted_geo[:5] retweeted_results["influence"] = average_influence in_portrait = sorted(in_portrait, key=lambda x:x[1], reverse=True) temp_list = [] for item in in_portrait: temp_list.append(item[0]) retweeted_results['in_portrait_number'] = len(temp_list) retweeted_results['out_portrait_number'] = len(out_portrait) in_portrait_url = get_user_url(temp_list[:default_number]) out_portrait_url = get_user_url(out_portrait[:default_number]) retweeted_results["in_portrait"] = in_portrait_url retweeted_results["out_portrait"] = out_portrait_url retweeted_results["total_number"] = len(temp_list) + len(out_portrait) return retweeted_results