Esempio n. 1
0
def recommentation_in(input_ts, recomment_type, submit_user):
    date = ts2datetime(input_ts)
    recomment_results = []
    # read from redis
    results = []
    hash_name = 'recomment_'+str(date) + "_" + recomment_type
    identify_in_hashname = "identify_in_" + str(date)
    submit_user_recomment = "recomment_" + submit_user + "_" + str(date) # 用户自推荐名单
    results = r.hgetall(hash_name)
    if not results:
        return []
    # search from user_profile to rich the show information
    recommend_list = set(r.hkeys(hash_name))
    identify_in_list = set(r.hkeys("compute"))
    submit_user_recomment = set(r.hkeys(submit_user_recomment))
    recomment_results = list(recommend_list - identify_in_list)
    recomment_results = list(set(recomment_results) - submit_user_recomment)

    if recomment_results:
        results = get_user_detail(date, recomment_results, 'show_in', recomment_type)
    else:
        results = []
    #test
    '''
    f = open('/home/user_portrait_0320/revised_user_portrait/user_portrait/user_portrait/recommentation/influence_recommend.csv', 'wb')
    writer = csv.writer(f)
    for item_result in results:
        writer.writerow(item_result)
    f.close()
    '''
    return results
Esempio n. 2
0
def recommentation_in_auto(seatch_date, submit_user):
    results = []
    #run type
    if RUN_TYPE == 1:
        now_date = ts2datetime(time.time() - DAY)
    else:
        now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY)
    recomment_hash_name = 'recomment_' + now_date + '_auto'
    recomment_influence_hash_name = 'recomment_' + now_date + '_influence'
    recomment_sensitive_hash_name = 'recomment_' + now_date + '_sensitive'
    recomment_compute_hash_name = 'compute'
    #step1: get auto
    auto_result = r.hget(recomment_hash_name, 'auto')
    if auto_result:
        auto_user_list = json.loads(auto_result)
    else:
        auto_user_list = []
    #step2: get admin user result
    admin_result = r.hget(recomment_hash_name, submit_user)
    if admin_result:
        admin_user_list = json.loads(admin_result)
    else:
        admin_user_list = []
    #step3: get union user and filter compute/influence/sensitive
    union_user_auto_set = set(auto_user_list) | set(admin_user_list)
    influence_user = set(r.hkeys(recomment_influence_hash_name))
    sensitive_user = set(r.hkeys(recomment_sensitive_hash_name))
    compute_user = set(r.hkeys(recomment_compute_hash_name))
    filter_union_user = union_user_auto_set - (influence_user | sensitive_user | compute_user)
    auto_user_list = list(filter_union_user)
    #step4: get user detail
    results = get_user_detail(now_date, auto_user_list, 'show_in', 'auto')
    return results
Esempio n. 3
0
def recommentation_in(input_ts, recomment_type, submit_user):
    date = ts2datetime(input_ts)
    recomment_results = []
    # read from redis
    results = []
    hash_name = 'recomment_' + str(date) + "_" + recomment_type
    identify_in_hashname = "identify_in_" + str(date)
    submit_user_recomment = "recomment_" + submit_user + "_" + str(
        date)  # 用户自推荐名单
    results = r.hgetall(hash_name)
    if not results:
        return []
    # search from user_profile to rich the show information
    recommend_list = set(r.hkeys(hash_name))
    identify_in_list = set(r.hkeys("compute"))
    submit_user_recomment = set(r.hkeys(submit_user_recomment))
    recomment_results = list(recommend_list - identify_in_list)
    recomment_results = list(set(recomment_results) - submit_user_recomment)

    if recomment_results:
        results = get_user_detail(date, recomment_results, 'show_in',
                                  recomment_type)
    else:
        results = []
    return results
Esempio n. 4
0
def submit_identify_in_uname(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    upload_data = input_data['upload_data']
    # get uname list from upload data
    uname_list = upload_data.split('\n')
    uid_list = []
    #step1: get uid list from uname
    profile_exist_result = es_user_profile.search(
        index=profile_index_name,
        doc_type=profile_index_type,
        body={'query': {
            'terms': {
                'nick_name': uname_list
            }
        }},
        _source=False)['hits']['hits']
    for profile_item in profile_exist_result:
        uid = profile_item['_id']
        uid_list.append(uid)
    if not uid_list:
        return 'uname list valid'
    #step2: filter user not in user_portrait and compute
    #step2.1: identify in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name,
                                                  doc_type=portrait_index_type,
                                                  body={'ids':
                                                        uid_list})['docs']
    new_uid_list = [
        exist_item['_id'] for exist_item in exist_portrait_result
        if exist_item['found'] == False
    ]
    if not new_uid_list:
        return 'uname list all in'
    #step2.2: identify in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_list = list(new_uid_set - compute_set)
    if not in_uid_list:
        return 'uname list all in'
    #step3: save submit
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(
        r.hkeys(hashname_sensitive))
    for in_item in in_uid_list:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system': '0', 'operation': submit_user}
        r.hset(hashname_submit, in_item, json.dumps(tmp))
        r.hset(submit_user_recomment, in_item, '0')
    return True
Esempio n. 5
0
def new_identify_in(data, date, submit_user):
    in_status = 1
    compute_status = 0
    hashname_submit = "submit_recomment_" + date
    hashname_influence = "recomment_" + date + "_influence"
    hashname_sensitive = "recomment_" + date + "_sensitive"
    submit_user_recomment = "recomment_" + submit_user + "_" + str(
        date)  # 用户自推荐名单
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(
        r.hkeys(hashname_sensitive))  # 系统自动推荐名单
    for item in data:
        date = item[0]  # identify the date form '2013-09-01' with web
        uid = item[1]
        #status = item[2]
        if uid in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, uid))
            recommentor_list = (tmp['operation']).split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = "&".join(new_list)
        else:
            tmp = {"system": "0", "operation": submit_user}
        r.hset(hashname_submit, uid, json.dumps(tmp))
        r.hset(submit_user_recomment, uid, "0")
    return True
Esempio n. 6
0
def submit_identify_in_uname(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    operation_type = input_data['operation_type']
    upload_data = input_data['upload_data']
    # get uname list from upload data
    uname_list_pre = upload_data.split('\n')
    uname_list = [item.split('\r')[0] for item in uname_list_pre]
    uid_list = []
    have_in_user_list = []
    invalid_user_list = []
    valid_uname_list = []
    #step1: get uid list from uname
    profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False, fields=['nick_name'])['hits']['hits']
    for profile_item in profile_exist_result:
        uid = profile_item['_id']
        uid_list.append(uid)
        uname = profile_item['fields']['nick_name'][0]
        valid_uname_list.append(uname)
    invalid_user_list = list(set(uname_list) - set(valid_uname_list))
    if len(invalid_user_list) != 0:
        return False, 'invalid user info', invalid_user_list
    #step2: filter user not in user_portrait and compute
    #step2.1: identify in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs']
    new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False]
    have_in_user_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True]
    if not new_uid_list:
        return False, 'all user in'
    #step2.2: identify in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_list = list(new_uid_set - compute_set)
    if not in_uid_list:
        return False, 'all user in'
    #step3: save submit
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    #identify final submit user list
    final_submit_user_list = []
    for in_item in in_uid_list:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system':'0', 'operation': submit_user}
        if operation_type == 'submit':
            r.hset(hashname_submit, in_item, json.dumps(tmp))
            r.hset(submit_user_recomment, in_item, '0')
        final_submit_user_list.append(in_item)
    return True, invalid_user_list, have_in_user_list, final_submit_user_list
Esempio n. 7
0
def get_recommentation(submit_user):
    if RUN_TYPE:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)

    in_portrait_set = set(r.hkeys("compute"))
    result = []
    for i in range(7):
        iter_ts = now_ts - i*DAY
        iter_date = ts2datetime(iter_ts)
        submit_user_recomment = "recomment_" + submit_user + "_" + str(iter_date)
        bci_date = ts2datetime(iter_ts - DAY)
        submit_user_recomment = r.hkeys(submit_user_recomment)
        bci_index_name = "bci_" + bci_date.replace('-', '')
        exist_bool = es_cluster.indices.exists(index=bci_index_name)
        if not exist_bool:
            continue
        if submit_user_recomment:
            user_bci_result = es_cluster.mget(index=bci_index_name, doc_type="bci", body={'ids':submit_user_recomment}, _source=True)['docs']
            user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':submit_user_recomment}, _source=True)['docs']
            max_evaluate_influ = get_evaluate_max(bci_index_name)
            for i in range(len(submit_user_recomment)):
                uid = submit_user_recomment[i]
                bci_dict = user_bci_result[i]
                profile_dict = user_profile_result[i]
                try:
                    bci_source = bci_dict['_source']
                except:
                    bci_source = None
                if bci_source:
                    influence = bci_source['user_index']
                    influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10)
                    influence = influence * 100
                else:
                    influence = ''
                try:
                    profile_source = profile_dict['_source']
                except:
                    profile_source = None
                if profile_source:
                    uname = profile_source['nick_name']
                    location = profile_source['user_location']
                    fansnum = profile_source['fansnum']
                    statusnum = profile_source['statusnum']
                else:
                    uname = ''
                    location = ''
                    fansnum = ''
                    statusnum = ''
                if uid in in_portrait_set:
                    in_portrait = "1"
                else:
                    in_portrait = "0"
                recomment_day = iter_date
                result.append([iter_date, uid, uname, location, fansnum, statusnum, influence, in_portrait])

    return result    
Esempio n. 8
0
def submit_identify_in_url(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    operation_type = input_data['operation_type']
    upload_data = input_data['upload_data']
    #step1: get uid list from input_data url
    url_list_pre = upload_data.split('\n')
    url_list = [item.split('\r')[0] for item in url_list_pre]
    uid_list = []
    invalid_uid_list = []
    have_in_uid_list = []
    for url_item in url_list:
        try:
            #url_item = 'http://weibo.com/p/1002065727942146/album?.....'
            url_list = url_item.split('/')
            uid = url_list[4][-10:]
            uid_list.append(uid)
        except:
            invalid_uid_list.append(url_item)
    if len(invalid_uid_list)!=0:
        return False, 'invalid user info', invalid_uid_list
    #step2: identify uid list is not exist in user_portrait and compute
    #step2.1: identify in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=True)['docs']
    new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False]
    have_in_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True]
    #step2.2: identify in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_list = list(new_uid_set - compute_set)
    if len(in_uid_list)==0:
        return False, 'all user in'
    #step3: save
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    #identify the final submit user
    final_submit_user_list = []
    for in_item in in_uid_list:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system': '0', 'operation': submit_user}
        if operation_type == 'submit':
            r.hset(hashname_submit, in_item, json.dumps(tmp))
            r.hset(submit_user_recomment, in_item, '0')
        final_submit_user_list.append(in_item)
    return True, invalid_uid_list, have_in_uid_list, final_submit_user_list
Esempio n. 9
0
def submit_identify_in_url(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    operation_type = input_data['operation_type']
    upload_data = input_data['upload_data']
    #step1: get uid list from input_data url
    url_list_pre = upload_data.split('\n')
    url_list = [item.split('\r')[0] for item in url_list_pre]
    uid_list = []
    invalid_uid_list = []
    have_in_uid_list = []
    for url_item in url_list:
        try:
            #url_item = 'http://weibo.com/p/1002065727942146/album?.....'
            url_list = url_item.split('/')
            uid = url_list[4][-10:]
            uid_list.append(uid)
        except:
            invalid_uid_list.append(url_item)
    if len(invalid_uid_list)!=0:
        return False, 'invalid user info', invalid_uid_list
    #step2: identify uid list is not exist in user_portrait and compute
    #step2.1: identify in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=True)['docs']
    new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False]
    have_in_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True]
    #step2.2: identify in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_list = list(new_uid_set - compute_set)
    if len(in_uid_list)==0:
        return False, 'all user in'
    #step3: save
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    #identify the final submit user
    final_submit_user_list = []
    for in_item in in_uid_list:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system': '0', 'operation': submit_user}
        if operation_type == 'submit':
            r.hset(hashname_submit, in_item, json.dumps(tmp))
            r.hset(submit_user_recomment, in_item, '0')
        final_submit_user_list.append(in_item)
    return True, invalid_uid_list, have_in_uid_list, final_submit_user_list
Esempio n. 10
0
def submit_identify_in_uname(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    upload_data = input_data['upload_data']
    # get uname list from upload data
    uname_list = upload_data.split('\n') 
    uid_list = []
    #step1: get uid list from uname
    profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False)['hits']['hits']
    for profile_item in profile_exist_result:
        uid = profile_item['_id']
        uid_list.append(uid)
    if not uid_list:
        return 'uname list valid'
    #step2: filter user not in user_portrait and compute
    #step2.1: identify in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs']
    new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False]
    if not new_uid_list:
        return 'uname list all in'
    #step2.2: identify in compute
    new_uid_set = set(new_uid_list)
    compute_set = r.hkeys('compute')
    in_uid_list = list(new_uid_set - compute_set)
    if not in_uid_list:
        return 'uname list all in'
    #step3: save submit
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    for in_item in in_uid_list:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, uid))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system':'0', 'operation': submit_user}
        r.hset(hashname_submit, uid, json.dumps(tmp))
        r.hset(submit_user_recomment, uid, '0')
    return True
Esempio n. 11
0
def submit_identify_in_uid(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(
        r.hkeys(hashname_sensitive))
    upload_data = input_data['upload_data']
    line_list = upload_data.split('\n')
    uid_list = []
    for line in line_list:
        uid = line[:10]
        if len(uid) == 10:
            uid_list.append(uid)
    #identify the uid is not exist in user_portrait and compute
    #step1: filter in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name,
                                                  doc_type=portrait_index_type,
                                                  body={'ids': uid_list},
                                                  _source=False)['docs']
    for exist_item in exist_portrait_result:
        if exist_item['found'] == False:
            new_uid_list.append(exist_item['_id'])
    #step2: filter in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_set = list(new_uid_set - compute_set)
    for in_item in in_uid_set:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashtname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system': '0', 'operation': submit_user}
        r.hset(hashname_submit, in_item, json.dumps(tmp))
        r.hset(submit_user_recomment, in_item, '0')
    return True
Esempio n. 12
0
def recommentation_in(input_ts, recomment_type):
    date = ts2datetime(input_ts)
    recomment_results = []
    # read from redis
    results = []
    hash_name = 'recomment_'+str(date) + "_" + recomment_type
    identify_in_hashname = "identify_in_" + str(date)
    results = r.hgetall(hash_name)
    if not results:
        return []
    # search from user_profile to rich the show information
    recommend_list = set(r.hkeys(hash_name))
    identify_in_list = set(r.hkeys("compute"))
    recomment_results = list(recommend_list - identify_in_list)

    if recomment_results:
        results = get_user_detail(date, recomment_results, 'show_in', recomment_type)
    else:
        results = []
    return results
Esempio n. 13
0
def admin_recommentation_in(input_ts):
    date = ts2datetime(input_ts)
    recomment_results = []
    identify_in_hashname = "identify_in_" + str(date)
    # read from redis
    results = []
    hashname_submit = "submit_recomment_" + date
    results = r.hgetall(hashname_submit)
    if not results:
        return []
    # search from user_profile to rich the show information
    submit_set = set(r.hkeys(hashname_submit))
    idntify_in_set = set(r.hkeys("compute")) # 已入库用户名单
    recomment_results = list(submit_set - idntify_in_set) #过滤一下
    if recomment_results:
        results = get_user_detail(date, recomment_results, 'show_in', "sensitive", "admin")
        sorted_results = sorted(results, key=lambda x:x[-1], reverse=True)
        results = sorted_results
    else:
        results = []
    return results
Esempio n. 14
0
def new_identify_in(data, date, submit_user):
    in_status = 1
    compute_status = 0
    hashname_submit = "submit_recomment_" + date
    hashname_influence = "recomment_" + date + "_influence"
    hashname_sensitive = "recomment_" + date + "_sensitive"
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) # 系统自动推荐名单
    for item in data:
        date = item[0] # identify the date form '2013-09-01' with web
        uid = item[1]
        #status = item[2]
        if uid in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, uid))
            recommentor_list = (tmp['operation']).split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = "&".join(new_list)
        else:
            tmp = {"system":"0", "operation":submit_user}
        r.hset(hashname_submit, uid, json.dumps(tmp))
    return True
Esempio n. 15
0
def admin_recommentation_in(input_ts):
    date = ts2datetime(input_ts)
    recomment_results = []
    identify_in_hashname = "identify_in_" + str(date)
    # read from redis
    results = []
    hashname_submit = "submit_recomment_" + date
    results = r.hgetall(hashname_submit)
    if not results:
        return []
    # search from user_profile to rich the show information
    submit_set = set(r.hkeys(hashname_submit))
    idntify_in_set = set(r.hkeys("compute")) # 已入库用户名单
    recomment_results = list(submit_set - idntify_in_set) #过滤一下
    if recomment_results:
        results = get_user_detail(date, recomment_results, 'show_in', "sensitive", "admin")
        sorted_results = sorted(results, key=lambda x:x[-1], reverse=True)
        results = sorted_results
    else:
        results = []
    return results
Esempio n. 16
0
def submit_identify_in_url(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    upload_data = input_data['upload_data']
    #step1: get uid list from input_data url
    url_list = upload_data.split('\n')
    uid_list = []
    for url_item in url_list:
        #url_item = 'weibo.com/p/1002065727942146/album?.....'
        url_list = url_item.split('/')
        uid = url_list[2][-10:]
        uid_list.append(uid)   
    #step2: identify uid list is not exist in user_portrait and compute
    #step2.1: identify in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=True)['docs']
    new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False]
    #step2.2: identify in compute
    new_uid_set = set(new_uid_list)
    compute_set = r.hkeys('compute')
    in_uid_list = list(new_uid_set - compute_set)
    #step3: save
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    for in_item in in_uid_list:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, uid))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system': '0', 'operation': submit_user}
        r.hset(hashname_submit, uid, json.dumps(tmp))
        r.hset(submit_user_recomment, uid, '0')
    return True
Esempio n. 17
0
def submit_identify_in_uid(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    upload_data = input_data['upload_data']
    line_list = upload_data.split('\n')
    uid_list = []
    for line in line_list:
        uid = line[:10]
        if len(uid)==10:
            uid_list.append(uid)
    #identify the uid is not exist in user_portrait and compute
    #step1: filter in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=False)['docs']
    for exist_item in exist_portrait_result:
        if exist_item['found'] == False:
            new_uid_list.append(exist_item['_id'])
    #step2: filter in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_set = list(new_uid_set - compute_set)
    for in_item in in_uid_set:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashtname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system':'0', 'operation':submit_user}
        r.hset(hashname_submit, in_item, json.dumps(tmp))
        r.hset(submit_user_recomment, in_item, '0')
    return True
Esempio n. 18
0
def recommentation_in_auto(search_date, submit_user):
    results = []
    #run type
    if RUN_TYPE == 1:
        now_date = search_date
    else:
        now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY)
    recomment_hash_name = 'recomment_' + now_date + '_auto'
    recomment_influence_hash_name = 'recomment_' + now_date + '_influence'
    recomment_sensitive_hash_name = 'recomment_' + now_date + '_sensitive'
    recomment_submit_hash_name = 'recomment_' + submit_user + '_' + now_date
    recomment_compute_hash_name = 'compute'
    #step1: get auto
    auto_result = r.hget(recomment_hash_name, 'auto')
    if auto_result:
        auto_user_list = json.loads(auto_result)
    else:
        auto_user_list = []
    #step2: get admin user result
    admin_result = r.hget(recomment_hash_name, submit_user)
    if admin_result:
        admin_user_list = json.loads(admin_result)
    else:
        admin_user_list = []
    #step3: get union user and filter compute/influence/sensitive
    union_user_auto_set = set(auto_user_list) | set(admin_user_list)
    influence_user = set(r.hkeys(recomment_influence_hash_name))
    sensitive_user = set(r.hkeys(recomment_sensitive_hash_name))
    compute_user = set(r.hkeys(recomment_compute_hash_name))
    been_submit_user = set(r.hkeys(recomment_submit_hash_name))
    filter_union_user = union_user_auto_set - (
        influence_user | sensitive_user | compute_user | been_submit_user)
    auto_user_list = list(filter_union_user)
    #step4: get user detail
    if auto_user_list == []:
        return auto_user_list
    results = get_user_detail(now_date, auto_user_list, 'show_in', 'auto')
    return results
Esempio n. 19
0
def get_recommentation(submit_user):
    if RUN_TYPE:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)

    in_portrait_set = set(r.hkeys("compute"))
    result = []
    for i in range(7):
        iter_ts = now_ts - i * DAY
        iter_date = ts2datetime(iter_ts)
        submit_user_recomment = "recomment_" + submit_user + "_" + str(
            iter_date)
        bci_date = ts2datetime(iter_ts - DAY)
        submit_user_recomment = r.hkeys(submit_user_recomment)
        bci_index_name = "bci_" + bci_date.replace('-', '')
        exist_bool = es_cluster.indices.exists(index=bci_index_name)
        if not exist_bool:
            continue
        if submit_user_recomment:
            user_bci_result = es_cluster.mget(
                index=bci_index_name,
                doc_type="bci",
                body={'ids': submit_user_recomment},
                _source=True)['docs']
            user_profile_result = es_user_profile.mget(
                index='weibo_user',
                doc_type='user',
                body={'ids': submit_user_recomment},
                _source=True)['docs']
            max_evaluate_influ = get_evaluate_max(bci_index_name)
            for i in range(len(submit_user_recomment)):
                uid = submit_user_recomment[i]
                bci_dict = user_bci_result[i]
                profile_dict = user_profile_result[i]
                try:
                    bci_source = bci_dict['_source']
                except:
                    bci_source = None
                if bci_source:
                    influence = bci_source['user_index']
                    influence = math.log(
                        influence / max_evaluate_influ['user_index'] * 9 + 1,
                        10)
                    influence = influence * 100
                else:
                    influence = ''
                try:
                    profile_source = profile_dict['_source']
                except:
                    profile_source = None
                if profile_source:
                    uname = profile_source['nick_name']
                    location = profile_source['user_location']
                    fansnum = profile_source['fansnum']
                    statusnum = profile_source['statusnum']
                else:
                    uname = ''
                    location = ''
                    fansnum = ''
                    statusnum = ''
                if uid in in_portrait_set:
                    in_portrait = "1"
                else:
                    in_portrait = "0"
                recomment_day = iter_date
                result.append([
                    iter_date, uid, uname, location, fansnum, statusnum,
                    influence, in_portrait
                ])

    return result
Esempio n. 20
0
def submit_identify_in_uid(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    operation_type = input_data['operation_type']
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    upload_data = input_data['upload_data']
    line_list = upload_data.split('\n')
    uid_list = []
    invalid_uid_list = []
    for line in line_list:
        uid = line.split('\r')[0]
        #if len(uid)==10:
        #    uid_list.append(uid)
        if uid != '':
            uid_list.append(uid)
    if len(invalid_uid_list)!=0:
        return False, 'invalid user info', invalid_uid_list
    #identify the uid is not exist in user_portrait and compute
    #step1: filter in user_portrait
    new_uid_list = []
    have_in_uid_list = []
    try:
        exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=False)['docs']
    except:
        exist_portrait_result = []
    if exist_portrait_result:
        for exist_item in exist_portrait_result:
            if exist_item['found'] == False:
                new_uid_list.append(exist_item['_id'])
            else:
                have_in_uid_list.append(exist_item['_id'])
    else:
        new_uid_list = uid_list
   
    #step2: filter in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_set = list(new_uid_set - compute_set)
    print 'new_uid_set:', new_uid_set 
    print 'in_uid_set:', in_uid_set
    if len(in_uid_set)==0:
        return False, 'all user in'
    #identify the final add user
    final_submit_user_list = []
    for in_item in in_uid_set:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system':'0', 'operation':submit_user}
        if operation_type == 'submit':
            r.hset(hashname_submit, in_item, json.dumps(tmp))
            r.hset(submit_user_recomment, in_item, '0')
        final_submit_user_list.append(in_item)
    return True, invalid_uid_list, have_in_uid_list, final_submit_user_list
Esempio n. 21
0
def submit_identify_in_uid(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    operation_type = input_data['operation_type']
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    upload_data = input_data['upload_data']
    line_list = upload_data.split('\n')
    uid_list = []
    invalid_uid_list = []
    for line in line_list:
        uid = line.split('\r')[0]
        #if len(uid)==10:
        #    uid_list.append(uid)
        if uid != '':
            uid_list.append(uid)
    if len(invalid_uid_list)!=0:
        return False, 'invalid user info', invalid_uid_list
    #identify the uid is not exist in user_portrait and compute
    #step1: filter in user_portrait
    new_uid_list = []
    have_in_uid_list = []
    try:
        exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=False)['docs']
    except:
        exist_portrait_result = []
    if exist_portrait_result:
        for exist_item in exist_portrait_result:
            if exist_item['found'] == False:
                new_uid_list.append(exist_item['_id'])
            else:
                have_in_uid_list.append(exist_item['_id'])
    else:
        new_uid_list = uid_list
   
    #step2: filter in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_set = list(new_uid_set - compute_set)
    print 'new_uid_set:', new_uid_set 
    print 'in_uid_set:', in_uid_set
    if len(in_uid_set)==0:
        return False, 'all user in'
    #identify the final add user
    final_submit_user_list = []
    for in_item in in_uid_set:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system':'0', 'operation':submit_user}
        if operation_type == 'submit':
            r.hset(hashname_submit, in_item, json.dumps(tmp))
            r.hset(submit_user_recomment, in_item, '0')
        final_submit_user_list.append(in_item)
    return True, invalid_uid_list, have_in_uid_list, final_submit_user_list