def compute_attribute(user_weibo_dict):
    # test
    uid_list = user_weibo_dict.keys()
    times = len(uid_list)/1000
    bulk_action = []
    count = 0
    count_list = set()
    for i in range(times+1):
        flow_result = get_flow_information(uid_list[1000*i:1000*(i+1)]) # 流数据更新
        register_result = get_profile_information(uid_list) # 背景信息数据更新
        for user in uid_list:
            weibo_list = user_weibo_dict[user]
            results = compute_text_attribute(user, weibo_list) # 文本属性计算
            results['uid'] = str(user)
            flow_dict = flow_result[str(user)]
            results.update(flow_dict)
            user_info = {'uid':str(user), 'domain':results['domain'], 'topic':results['topic'], 'activity_geo':results['geo_string']}
            evaluation_index = get_evaluate_index(user_info, status='insert')
            results.update(evaluation_index)
            register_dict = register_result[user]
            results.update(register_dict)
            action = {'index':{'_id':str(user)}}
            bulk_action.extend([action, results])
            count_list.add(user)
            count += 1
            if count % 200 == 0:
                es.bulk(bulk_action, index='sensitive_user_portrait_0103', doc_type="user", timeout=60)
                bulk_action = []
                print count
    if bulk_action:
        es.bulk(bulk_action, index='sensitive_user_portrait_0103', doc_type="user", timeout=60)
    return "1"
def main():
    # read the uid list
    uid_list = read_uid_list()
    # get user weibo 7day {user:[weibos]}
    user_weibo_dict = read_user_weibo(uid_list)
    uid_list = user_weibo_dict.keys()
    #print 'uid_list:', len(uid_list)
    #print 'user weibo dict:', len(user_weibo_dict)
    flow_result = get_flow_information(uid_list)
    register_result = get_profile_information(uid_list)
    # compute text attribute
    bulk_action = []
    for user in user_weibo_dict:
        weibo_list = user_weibo_dict[user]
        uname = weibo_list[0]['uname']
        results = compute_text_attribute(user, weibo_list)
        results['uid'] = str(user)
        flow_dict = flow_result[str(user)]
        results = dict(results, **flow_dict)
        # deal to the bulk action
        user_info = {'uid':str(user), 'domain':results['domain'], 'topic':results['topic'], 'activity_geo':results['activity_geo']}
        evaluation_index = get_evaluate_index(user_info, status='insert')
        results = dict(results, **evaluation_index)
        #print 'register_result:', register_result
        register_dict = register_result[str(user)]
        results = dict(results, **register_dict)
        action = {'index':{'_id': str(user)}}
        bulk_action.extend([action, results])
    status = save_user_results(bulk_action)
    return True # save by bulk
Exemple #3
0
def update_attribute_day():
    # scan the user_portrait and bulk action to update
    status = False
    results = {}
    count = 0
    index_name = "user_portrait"
    index_type = "user"
    s_re = scan(es, query={"query": {"match_all": {}}, "size": 1000}, index=index_name, doc_type=index_type)
    while True:
        bulk_action = []
        while True:
            try:
                scan_re = s_re.next()["_source"]
                count += 1
            except StopIteration:
                print "all done"
                if bulk_action:
                    # print 'bulk_action:', bulk_action
                    status = save_user_results(bulk_action)
                    # print 'status:', status
                sys.exit(0)
            except Exception, r:
                print Exception, r
                sys.exit(0)
            uid = scan_re["uid"]
            user_info = {"uid": uid}
            evaluate_result = get_evaluate_index(user_info, status="update")
            results = {}
            results = dict(results, **evaluate_result)
            action = {"update": {"_id": str(uid)}}
            bulk_action.extend([action, {"doc": results}])
Exemple #4
0
def compute_attribute(uid_list=[]):
    # test
    user_weibo_dict = read_user_weibo(uid_list)
    uid_list = user_weibo_dict.keys()
    flow_result = get_flow_information(uid_list)
    register_result = get_profile_information(uid_list)
    bulk_action = []
    count = 0
    count_list = set()
    for user in uid_list:
        weibo_list = user_weibo_dict[user]
        uname = weibo_list[0]['uname']
        results = compute_text_attribute(user, weibo_list)
        results['uname'] = uname
        results['uid'] = str(user)
        flow_dict = flow_result[str(user)]
        results.update(flow_dict)
        user_info = {'uid':str(user), 'domain':results['domain'], 'topic':results['topic'], 'activity_geo':results['geo_string']}
        evaluation_index = get_evaluate_index(user_info, status='insert')
        results.update(evaluation_index)
        register_dict = register_result[user]
        results.update(register_dict)
        action = {'index':{'_id':str(user)}}
        bulk_action.extend([action, results])
        count_list.add(user)
        count += 1
        if count % 200 == 0:
            es.bulk(bulk_action, index=index_name, doc_type="user", timeout=60)
            bulk_action = []
            print count
    if bulk_action:
        status = save_user_results(bulk_action)
    return "1"
def compute2in(uid_list, user_weibo_dict, status='insert'):
    flow_result = get_flow_information(uid_list)
    register_result = get_profile_information(uid_list)
    for user in user_weibo_dict:
        weibo_list = user_weibo_dict[user]
        uname = weibo_list[0]['uname']
        results = compute_text_attribute(user, weibo_list)
        results['uname'] = uname
        results['uid'] = str(user)
        flow_dict = flow_result[str(user)]
        results = dict(results, **flow_dict)
        user_info = {'uid':str(user), 'domain':results['domain'], 'topic':results['topic'], 'activity_geo':results['activity_geo']}
        evaluation_index = get_evaluate_index(user_info, status='insert')
        results = dict(results, **evaluation_index)
        register_dict = register_result[user]
        results = dict(results, **register_dict)
        if status=='insert':
            action = {'index':{'_id':str(user)}}
        else:
            action = {'update':{'_id', str(user)}}
            results = {'doc': results}
        bulk_action.extend([action, results])
    status = save_user_results(bulk_action)
    return True