コード例 #1
0
def update_day_profile(uid_list):
    result_dict = dict()
    try:
        bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids':uid_list}, fields=['user_fansnum', 'weibo_month_sum', 'user_friendsnum'])['docs']
    except:
        bci_history_result = []
    iter_count = 0
    for uid in uid_list:
        try:
            bci_history_item = bci_history_result[iter_count]
        except:
            bci_history_item = {}
        if bci_history_item and bci_history_item['found']==True:
            if isinstance(bci_history_item['fields']['weibo_month_sum'][0], int):
                statusnum = bci_history_item['fields']['weibo_month_sum'][0]
            else:
                statusnum = 0
            if isinstance(bci_history_item['fields']['user_fansnum'][0], int):
                fansnum = bci_history_item['fields']['user_fansnum'][0]
            else:
                fansnum = 0
            if isinstance(bci_history_item['fields']['user_friendsnum'][0], int):
                friendsnum = bci_history_item['fields']['user_friendsnum'][0]
            else:
                friendsnum = 0
        else:
            statusnum = 0
            fansnum = 0
            friendsnum = 0

        result_dict[uid] = {'statusnum': statusnum, 'fansnum':fansnum, 'friendsnum': friendsnum}
        iter_count += 1

    return result_dict
コード例 #2
0
def get_influence(uid_list):
    result = {}
    now_ts = time.time()
    #run_type
    if RUN_TYPE == 1:
        now_date = ts2datetime(now_ts - DAY)
    else:
        now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY)

    index_time = 'bci_' + ''.join(now_date.split('-'))
    index_type = 'bci'
    try:
        es_result = es.mget(index=index_time, doc_type=index_type, body={'ids': uid_list})['docs']
    except Exception, e:
        raise e
コード例 #3
0
def get_influence(uid_list):
    result = {}
    now_ts = time.time()
    #run_type
    if RUN_TYPE == 1:
        now_date = ts2datetime(now_ts - DAY)
    else:
        now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY)

    index_time = 'bci_' + ''.join(now_date.split('-'))
    index_type = 'bci'
    try:
        es_result = es.mget(index=index_time, doc_type=index_type, body={'ids': uid_list}, _source=False, fields=['user_index'])['docs']
    except Exception, e:
        raise e
コード例 #4
0
ファイル: cron_group.py プロジェクト: taozhiiq/user_portrait
def get_attr_bci(uid_list):
    results = []
    now_ts = time.time()
    now_date = ts2datetime(now_ts - 24*3600)
    ts = datetime2ts(now_date)
    #test
    ts = datetime2ts('2013-09-07')
    user_results = {} # {'uid':{'origin_max..':[], ''}}
    total_weibo_number = 0
    
    fans_number = 0
    origin_weibo_number = 0
    retweeted_weibo_number = 0
    origin_weibo_retweeted_total_number = 0
    origin_weibo_comment_total_number = 0
    retweeted_weibo_retweeted_total_number = 0
    retweeted_weibo_comment_total_number = 0

    origin_weibo_retweeted_top = 0
    origin_weibo_comment_top = 0
    retweeted_weibo_retweeted_top = 0
    retweeted_weibo_comment_top = 0
    influence_dict = {}

    for i in range(0, 7):
        timestamp = ts - i*24*3600
        date = ts2datetime(timestamp)
        hash_key = ''.join(date.split('-'))
        es_user_results = es_cluster.mget(index=hash_key, doc_type='bci', body={'ids':uid_list})['docs']
        for user_dict in es_user_results:
            try:
                user_item = user_dict['_source']
            except:
                next
            uid = user_item['user']
            total_weibo_number += user_item['origin_weibo_number']
            total_weibo_number += user_item['retweeted_weibo_number']
            
            # yuankun revise
            origin_weibo_number += user_item['origin_weibo_number']
            retweeted_weibo_number += user_item['retweeted_weibo_number']
            origin_weibo_retweeted_top += user_item['origin_weibo_retweeted_top_number']
            origin_weibo_comment_top += user_item['origin_weibo_comment_top_number']
            retweeted_weibo_retweeted_top += user_item['retweeted_weibo_retweeted_top_number']
            retweeted_weibo_comment_top += user_item['retweeted_weibo_comment_top_number']
            #print 'user_item:', user_item
            if uid in user_results:
                try:
                    user_results[uid]['origin_weibo_retweeted_top'].append([user_item['origin_weibo_retweeted_top_number'], user_item['origin_weibo_top_retweeted_id']])
                    user_results[uid]['origin_weibo_comment_top'].append([user_item['origin_weibo_comment_top_number'], user_item['origin_weibo_top_comment_id']])
                    user_results[uid]['retweeted_weibo_retweeted_top'].append([user_item['retweeted_weibo_retweeted_top_number'], user_item['retweeted_weibo_top_retweeted_id']])
                    user_results[uid]['retweeted_weibo_comment_top'].append([user_item['retweeted_weibo_comment_top_number'], user_item['retweeted_weibo_top_comment_id']])
                except:
                    user_results[uid]['origin_weibo_retweeted_top'] = [[user_item['origin_weibo_retweeted_top_number'], user_item['origin_weibo_top_retweeted_id']]]
                    user_results[uid]['origin_weibo_comment_top'] = [[user_item['origin_weibo_comment_top_number'], user_item['origin_weibo_top_comment_id']]]
                    user_results[uid]['retweeted_weibo_retweeted_top'] = [[user_item['retweeted_weibo_retweeted_top_number'], user_item['retweeted_weibo_top_retweeted_id']]]
                    user_results[uid]['retweeted_weibo_comment_top'] = [[user_item['retweeted_weibo_comment_top_number'], user_item['retweeted_weibo_top_comment_id']]]
            else:
                #print 'user_item:', [[user_item['origin_weibo_retweeted_top_number'], user_item['origin_weibo_top_retweeted_id']]]
                user_results[uid] = {'origin_weibo_retweeted_top':[[user_item['origin_weibo_retweeted_top_number'], user_item['origin_weibo_top_retweeted_id']]]}
                user_results[uid] = {'origin_weibo_comment_top': [[user_item['origin_weibo_comment_top_number'], user_item['origin_weibo_top_comment_id']]]}
                user_results[uid] = {'retweeted_weibo_retweeted_top': [[user_item['retweeted_weibo_retweeted_top_number'], user_item['retweeted_weibo_top_retweeted_id']]]}
                user_results[uid] = {'retweeted_weibo_comment_top': [[user_item['retweeted_weibo_comment_top_number'], user_item['retweeted_weibo_top_comment_id']]]}
            
            # yuankun need
            #print 'fan_num:', user_item['user_fansnum'], type(user_item['user_fansnum']), type(fans_number)
            fans_number += int(user_item['user_fansnum'])
            origin_weibo_retweeted_total_number += user_item['origin_weibo_retweeted_total_number']
            origin_weibo_comment_total_number += user_item['origin_weibo_comment_total_number']
            retweeted_weibo_retweeted_total_number += user_item['retweeted_weibo_retweeted_total_number']
            retweeted_weibo_comment_total_number += user_item['retweeted_weibo_comment_total_number']

    user_portrait_result = es.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs']
    #print 'user_portrait_result:', user_portrait_result[0]

    # get activeness max & importance max & influence max to normalize
    evaluate_max_result = get_evaluate_max()

    for user_portrait in user_portrait_result:
        #print 'user_portrait:', user_portrait
        try:
            user_portrait_dict = user_portrait['_source']
            #print 'user_portrait_dict:', user_portrait_dict
            uname = user_portrait_dict['uname']
            importance = user_portrait_dict['importance']
            normal_importance = math.log((importance / evaluate_max_result['importance']) * 9 + 1, 10) * 100
            activeness = user_portrait_dict['activeness']
            normal_activeness = math.log(activeness / evaluate_max_result['activeness'] * 9 + 1, 10) * 100
            influence = user_portrait_dict['influence']
            normal_influence = math.log(influence / evaluate_max_result['influence'] * 9 + 1, 10) * 100
        except:
            uname = ''
            normal_importance = ''
            normal_activeness = ''
            normal_influence = ''
        #print 'user_portrait_dict:', user_portrait_dict
        uid = user_portrait_dict['uid']
        user_item_dict = user_results[uid]
        origin_weibo_retweeted_top_item = sorted(user_item_dict['origin_weibo_retweeted_top'], key=lambda x:x[0], reverse=True)[0]
        origin_weibo_comment_top_item = sorted(user_item_dict['origin_weibo_comment_top'], key=lambda x:x[0], reverse=True)[0]
        retweeted_weibo_retweeted_top_item = sorted(user_item_dict['retweeted_weibo_retweeted_top'], key=lambda x:x[0], reverse=True)[0]
        retweeted_weibo_comment_top_item = sorted(user_item_dict['retweeted_weibo_comment_top'], key=lambda x:x[0], reverse=True)[0]
        
        results.append([uid, uname, normal_activeness, normal_importance, normal_influence, origin_weibo_retweeted_top_item ,\
                        origin_weibo_comment_top_item, retweeted_weibo_retweeted_top_item, \
                        retweeted_weibo_comment_top_item])

    #yuankun need
    influence_dict['origin_weibo_retweeted_average_number'] = origin_weibo_retweeted_total_number/origin_weibo_number/7
    influence_dict['origin_weibo_comment_average_number'] = origin_weibo_comment_total_number/origin_weibo_number/7
    influence_dict['retweeted_weibo_retweeted_average_number'] = retweeted_weibo_retweeted_total_number/retweeted_weibo_number/7
    influence_dict['retweeted_weibo_comment_average_number'] = retweeted_weibo_comment_total_number/retweeted_weibo_number/7
    influence_dict['origin_weibo_retweeted_top_number'] = origin_weibo_retweeted_top/len(uid_list)/7
    influence_dict['origin_weibo_comment_top_number'] = origin_weibo_comment_top/len(uid_list)/7
    influence_dict['retweeted_weibo_retweeted_top_number'] = retweeted_weibo_retweeted_top/len(uid_list)/7
    influence_dict['retweeted_weibo_comment_top_number'] = retweeted_weibo_comment_top/len(uid_list)/7
    influence_dict['fans_number'] = fans_number
    influence_dict['total_weibo_number'] = total_weibo_number
    #print 'results:', results
    return {'user_influence_list': json.dumps(results), 'total_weibo_number': total_weibo_number}, influence_dict
コード例 #5
0
#write in version:15-12-08
#input: uid_list
#output: {uid:influence, ...}
def get_influence(uid_list):
    result = {}
    now_ts = time.time()
    #run_type
    if RUN_TYPE = 1:
        now_date = ts2datetime(now_ts - DAY)
    else:
        now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY)

    index_time = 'bci_' + ''.join(now_date.split('-'))
    index_type = 'bci'
    try:
        es_result = es.mget(index=index_time, doc_type=index_type, body={'ids': uid_list})['docs']
    except Exception, e:
        raise e
    for es_item in es_result:
        uid = es_item['_id']
        if es_item['found'] == True:
            result[uid] = es_item['_source']['user_index']
        else:
            result[uid] = 0

    return result


#use to get user importance
#wirte in version:15-12-08
#input: domain, topic, user_fansnum, fansnum_max for one user
コード例 #6
0
 sensitive_string = "sensitive_score_" + tmp_ts
 query_sensitive_body = {
 "query":{
     "match_all":{}
     },
     "size":1,
     "sort":{sensitive_string:{"order":"desc"}}
     }
 try:
     top_sensitive_result = es_bci_history.search(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits']
     top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0]
 except Exception, reason:
     print Exception, reason
     top_sensitive = 400
 index_type = 'bci'
 user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs']
 user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs']
 bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs']
 sensitive_history_result = es_bci_history.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={'ids':uid_list}, fields=[sensitive_string], _source=False)['docs']
 max_evaluate_influ = get_evaluate_max(index_name)
 for i in range(0, len(uid_list)):
     uid = uid_list[i]
     bci_dict = user_bci_result[i]
     profile_dict = user_profile_result[i]
     bci_history_dict = bci_history_result[i]
     sensitive_history_dict = sensitive_history_result[i]
     #print sensitive_history_dict
     try:
         bci_source = bci_dict['_source']
     except:
         bci_source = None