def update_day_profile(uid_list): result_dict = dict() try: bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids':uid_list}, fields=['user_fansnum', 'weibo_month_sum', 'user_friendsnum'])['docs'] except: bci_history_result = [] iter_count = 0 for uid in uid_list: try: bci_history_item = bci_history_result[iter_count] except: bci_history_item = {} if bci_history_item and bci_history_item['found']==True: if isinstance(bci_history_item['fields']['weibo_month_sum'][0], int): statusnum = bci_history_item['fields']['weibo_month_sum'][0] else: statusnum = 0 if isinstance(bci_history_item['fields']['user_fansnum'][0], int): fansnum = bci_history_item['fields']['user_fansnum'][0] else: fansnum = 0 if isinstance(bci_history_item['fields']['user_friendsnum'][0], int): friendsnum = bci_history_item['fields']['user_friendsnum'][0] else: friendsnum = 0 else: statusnum = 0 fansnum = 0 friendsnum = 0 result_dict[uid] = {'statusnum': statusnum, 'fansnum':fansnum, 'friendsnum': friendsnum} iter_count += 1 return result_dict
def get_influence(uid_list): result = {} now_ts = time.time() #run_type if RUN_TYPE == 1: now_date = ts2datetime(now_ts - DAY) else: now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY) index_time = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' try: es_result = es.mget(index=index_time, doc_type=index_type, body={'ids': uid_list})['docs'] except Exception, e: raise e
def get_influence(uid_list): result = {} now_ts = time.time() #run_type if RUN_TYPE == 1: now_date = ts2datetime(now_ts - DAY) else: now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY) index_time = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' try: es_result = es.mget(index=index_time, doc_type=index_type, body={'ids': uid_list}, _source=False, fields=['user_index'])['docs'] except Exception, e: raise e
def get_attr_bci(uid_list): results = [] now_ts = time.time() now_date = ts2datetime(now_ts - 24*3600) ts = datetime2ts(now_date) #test ts = datetime2ts('2013-09-07') user_results = {} # {'uid':{'origin_max..':[], ''}} total_weibo_number = 0 fans_number = 0 origin_weibo_number = 0 retweeted_weibo_number = 0 origin_weibo_retweeted_total_number = 0 origin_weibo_comment_total_number = 0 retweeted_weibo_retweeted_total_number = 0 retweeted_weibo_comment_total_number = 0 origin_weibo_retweeted_top = 0 origin_weibo_comment_top = 0 retweeted_weibo_retweeted_top = 0 retweeted_weibo_comment_top = 0 influence_dict = {} for i in range(0, 7): timestamp = ts - i*24*3600 date = ts2datetime(timestamp) hash_key = ''.join(date.split('-')) es_user_results = es_cluster.mget(index=hash_key, doc_type='bci', body={'ids':uid_list})['docs'] for user_dict in es_user_results: try: user_item = user_dict['_source'] except: next uid = user_item['user'] total_weibo_number += user_item['origin_weibo_number'] total_weibo_number += user_item['retweeted_weibo_number'] # yuankun revise origin_weibo_number += user_item['origin_weibo_number'] retweeted_weibo_number += user_item['retweeted_weibo_number'] origin_weibo_retweeted_top += user_item['origin_weibo_retweeted_top_number'] origin_weibo_comment_top += user_item['origin_weibo_comment_top_number'] retweeted_weibo_retweeted_top += user_item['retweeted_weibo_retweeted_top_number'] retweeted_weibo_comment_top += user_item['retweeted_weibo_comment_top_number'] #print 'user_item:', user_item if uid in user_results: try: user_results[uid]['origin_weibo_retweeted_top'].append([user_item['origin_weibo_retweeted_top_number'], user_item['origin_weibo_top_retweeted_id']]) user_results[uid]['origin_weibo_comment_top'].append([user_item['origin_weibo_comment_top_number'], user_item['origin_weibo_top_comment_id']]) user_results[uid]['retweeted_weibo_retweeted_top'].append([user_item['retweeted_weibo_retweeted_top_number'], user_item['retweeted_weibo_top_retweeted_id']]) user_results[uid]['retweeted_weibo_comment_top'].append([user_item['retweeted_weibo_comment_top_number'], user_item['retweeted_weibo_top_comment_id']]) except: user_results[uid]['origin_weibo_retweeted_top'] = [[user_item['origin_weibo_retweeted_top_number'], user_item['origin_weibo_top_retweeted_id']]] user_results[uid]['origin_weibo_comment_top'] = [[user_item['origin_weibo_comment_top_number'], user_item['origin_weibo_top_comment_id']]] user_results[uid]['retweeted_weibo_retweeted_top'] = [[user_item['retweeted_weibo_retweeted_top_number'], user_item['retweeted_weibo_top_retweeted_id']]] user_results[uid]['retweeted_weibo_comment_top'] = [[user_item['retweeted_weibo_comment_top_number'], user_item['retweeted_weibo_top_comment_id']]] else: #print 'user_item:', [[user_item['origin_weibo_retweeted_top_number'], user_item['origin_weibo_top_retweeted_id']]] user_results[uid] = {'origin_weibo_retweeted_top':[[user_item['origin_weibo_retweeted_top_number'], user_item['origin_weibo_top_retweeted_id']]]} user_results[uid] = {'origin_weibo_comment_top': [[user_item['origin_weibo_comment_top_number'], user_item['origin_weibo_top_comment_id']]]} user_results[uid] = {'retweeted_weibo_retweeted_top': [[user_item['retweeted_weibo_retweeted_top_number'], user_item['retweeted_weibo_top_retweeted_id']]]} user_results[uid] = {'retweeted_weibo_comment_top': [[user_item['retweeted_weibo_comment_top_number'], user_item['retweeted_weibo_top_comment_id']]]} # yuankun need #print 'fan_num:', user_item['user_fansnum'], type(user_item['user_fansnum']), type(fans_number) fans_number += int(user_item['user_fansnum']) origin_weibo_retweeted_total_number += user_item['origin_weibo_retweeted_total_number'] origin_weibo_comment_total_number += user_item['origin_weibo_comment_total_number'] retweeted_weibo_retweeted_total_number += user_item['retweeted_weibo_retweeted_total_number'] retweeted_weibo_comment_total_number += user_item['retweeted_weibo_comment_total_number'] user_portrait_result = es.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs'] #print 'user_portrait_result:', user_portrait_result[0] # get activeness max & importance max & influence max to normalize evaluate_max_result = get_evaluate_max() for user_portrait in user_portrait_result: #print 'user_portrait:', user_portrait try: user_portrait_dict = user_portrait['_source'] #print 'user_portrait_dict:', user_portrait_dict uname = user_portrait_dict['uname'] importance = user_portrait_dict['importance'] normal_importance = math.log((importance / evaluate_max_result['importance']) * 9 + 1, 10) * 100 activeness = user_portrait_dict['activeness'] normal_activeness = math.log(activeness / evaluate_max_result['activeness'] * 9 + 1, 10) * 100 influence = user_portrait_dict['influence'] normal_influence = math.log(influence / evaluate_max_result['influence'] * 9 + 1, 10) * 100 except: uname = '' normal_importance = '' normal_activeness = '' normal_influence = '' #print 'user_portrait_dict:', user_portrait_dict uid = user_portrait_dict['uid'] user_item_dict = user_results[uid] origin_weibo_retweeted_top_item = sorted(user_item_dict['origin_weibo_retweeted_top'], key=lambda x:x[0], reverse=True)[0] origin_weibo_comment_top_item = sorted(user_item_dict['origin_weibo_comment_top'], key=lambda x:x[0], reverse=True)[0] retweeted_weibo_retweeted_top_item = sorted(user_item_dict['retweeted_weibo_retweeted_top'], key=lambda x:x[0], reverse=True)[0] retweeted_weibo_comment_top_item = sorted(user_item_dict['retweeted_weibo_comment_top'], key=lambda x:x[0], reverse=True)[0] results.append([uid, uname, normal_activeness, normal_importance, normal_influence, origin_weibo_retweeted_top_item ,\ origin_weibo_comment_top_item, retweeted_weibo_retweeted_top_item, \ retweeted_weibo_comment_top_item]) #yuankun need influence_dict['origin_weibo_retweeted_average_number'] = origin_weibo_retweeted_total_number/origin_weibo_number/7 influence_dict['origin_weibo_comment_average_number'] = origin_weibo_comment_total_number/origin_weibo_number/7 influence_dict['retweeted_weibo_retweeted_average_number'] = retweeted_weibo_retweeted_total_number/retweeted_weibo_number/7 influence_dict['retweeted_weibo_comment_average_number'] = retweeted_weibo_comment_total_number/retweeted_weibo_number/7 influence_dict['origin_weibo_retweeted_top_number'] = origin_weibo_retweeted_top/len(uid_list)/7 influence_dict['origin_weibo_comment_top_number'] = origin_weibo_comment_top/len(uid_list)/7 influence_dict['retweeted_weibo_retweeted_top_number'] = retweeted_weibo_retweeted_top/len(uid_list)/7 influence_dict['retweeted_weibo_comment_top_number'] = retweeted_weibo_comment_top/len(uid_list)/7 influence_dict['fans_number'] = fans_number influence_dict['total_weibo_number'] = total_weibo_number #print 'results:', results return {'user_influence_list': json.dumps(results), 'total_weibo_number': total_weibo_number}, influence_dict
#write in version:15-12-08 #input: uid_list #output: {uid:influence, ...} def get_influence(uid_list): result = {} now_ts = time.time() #run_type if RUN_TYPE = 1: now_date = ts2datetime(now_ts - DAY) else: now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY) index_time = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' try: es_result = es.mget(index=index_time, doc_type=index_type, body={'ids': uid_list})['docs'] except Exception, e: raise e for es_item in es_result: uid = es_item['_id'] if es_item['found'] == True: result[uid] = es_item['_source']['user_index'] else: result[uid] = 0 return result #use to get user importance #wirte in version:15-12-08 #input: domain, topic, user_fansnum, fansnum_max for one user
sensitive_string = "sensitive_score_" + tmp_ts query_sensitive_body = { "query":{ "match_all":{} }, "size":1, "sort":{sensitive_string:{"order":"desc"}} } try: top_sensitive_result = es_bci_history.search(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits'] top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0] except Exception, reason: print Exception, reason top_sensitive = 400 index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs'] sensitive_history_result = es_bci_history.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={'ids':uid_list}, fields=[sensitive_string], _source=False)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] bci_history_dict = bci_history_result[i] sensitive_history_dict = sensitive_history_result[i] #print sensitive_history_dict try: bci_source = bci_dict['_source'] except: bci_source = None