def main():
    RUN_TYPE = 0
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)
        now_ts = datetime2ts('2013-09-02')
    date = ts2datetime(now_ts - DAY)
    # auto recommendation: step 1:4
    #step1: read from top es_daily_rank
    top_user_set, user_dict = search_from_es(date)
    #step2: filter black_uid
    black_user_set = read_black_user()
    subtract_user_set = top_user_set - black_user_set
    #step3: filter users have been in
    subtract_user_set = list(subtract_user_set)
    candidate_results = filter_in(subtract_user_set)
    #step4: filter rules about ip count& reposts/bereposts count&activity count
    results = filter_rules(candidate_results)
    #step5: get sensitive user
    sensitive_user = list(get_sensitive_user(date))
    results = results - set(sensitive_user) # influence user - sensitive user
    new_date = ts2datetime(now_ts)
    hashname_influence = "recomment_" + new_date + "_influence"
    if results:
        for uid in results:
            #print uid
            r.hset(hashname_influence, uid, "0")

    hashname_sensitive = "recomment_" + new_date + "_sensitive"
    if sensitive_user:
        for uid in sensitive_user:
            #print "sensitive"
            r.hset(hashname_sensitive, uid, "0")
    """
def filter_recommend(top_user_set):
    recommend_keys = r.hkeys('recommend')
    recommend_list = []
    for key in recommend_keys:
        recommend_list.extend(json.loads(r.hget('recommend', key)))
    results = set(top_user_set) - set(recommend_list)

    return results
def filter_recommend(top_user_set):
    recommend_keys = r.hkeys('recommend')
    recommend_list = []
    for key in recommend_keys:
        recommend_list.extend(json.loads(r.hget('recommend', key)))
    results = set(top_user_set) - set(recommend_list)

    return results
def change_status_compute_fail(mapping_dict):
    hash_name = 'compute'
    status = 1
    new_mapping_dict = {}
    for uid in mapping_dict:
        user_list = json.loads(mapping_dict[uid])
        user_list[1] = '1'
        new_mapping_dict[uid] = json.dumps(user_list)
    r.hmset(hashname, new_mapping_dict)
def change_status_computed(mapping_dict):
    hash_name = "compute"
    status = 4
    new_mapping_dict = {}
    for uid in mapping_dict:
        user_list = json.loads(mapping_dict[uid])
        user_list[1] = "4"
        new_mapping_dict[uid] = json.dumps(user_list)
    r.hmset(hash_name, new_mapping_dict)
def change_status_compute_fail(mapping_dict):
    hash_name = 'compute'
    status = 2
    new_mapping_dict = {}
    for uid in mapping_dict:
        user_list = json.loads(mapping_dict[uid])
        user_list[1] = '2'
        new_mapping_dict[uid] = json.dumps(user_list)
    r.hmset(hashname, new_mapping_dict)
def change_status_computed(mapping_dict):
    hash_name = r_user_hash_name
    status = 4
    new_mapping_dict = {}
    for uid in mapping_dict:
        user_list = json.loads(mapping_dict[uid])
        user_list[1] = '4'
        new_mapping_dict[uid] = json.dumps(user_list)
    r.hmset(hash_name, new_mapping_dict)
def change_status_compute_fail(mapping_dict):
    hash_name = 'compute'
    status = 1
    new_mapping_dict = {}
    for uid in mapping_dict:
        user_list = mapping_dict[uid]
        user_list[1] = '1'
        new_mapping_dict[uid] = json.dumps(user_list)
    r.mset(hashname, new_mapping_dict)
def scan_compute_redis():
    hash_name = "compute"
    results = r.hgetall("compute")
    iter_user_list = []
    mapping_dict = dict()
    for uid in results:
        user_list = json.loads(results[uid])
        in_date = user_list[0]
        status = user_list[1]
        if status == "2":
            iter_user_list.append(uid)
            mapping_dict[uid] = json.dumps([in_date, "3"])  # mark status:3 computing
        if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0:
            # mark status from 2 to 3 as identify_compute to computing
            r.hmset("compute", mapping_dict)
            # acquire bulk user weibo data
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                    iter_user_list
                )
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                    iter_user_list
                )
            # compute text attribute
            compute_status = test_cron_text_attribute_v2(
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts
            )

            if compute_status == True:
                change_status_computed(mapping_dict)
            else:
                change_status_compute_fail(mapping_dict)

            iter_user_list = []
            mapping_dict = {}

    if iter_user_list != [] and mapping_dict != {}:
        r.hmset("compute", mapping_dict)
        # acquire bulk user weibo date
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                iter_user_list
            )
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                iter_user_list
            )
        # compute text attribute
        compute_status = test_cron_text_attribute_v2(
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts
        )
        if compute_status == True:
            change_status_computed(mapping_dict)
        else:
            change_status_compute_fail(mapping_dict)
예제 #10
0
def get_recommentation(admin_user):
    submit_recommentation_count = 0
    compute_count = 0
    search_date = ts2datetime(time.time() - DAY)
    submit_recomment_key = 'recomment_' + admin_user + '_' + search_date
    submit_user_recomment = set(R_RECOMMENTATION.hkeys(submit_recomment_key))
    all_compute_set = set(R_RECOMMENTATION.hkeys('compute'))
    submit_recommentation_count = len(submit_user_recomment)
    compute_count = len(submit_user_recomment & all_compute_set)

    return submit_recommentation_count, compute_count
예제 #11
0
def scan_compute_redis():
    hash_name = 'compute'
    results = r.hgetall('compute')
    iter_user_list = []
    mapping_dict = dict()
    for uid in results:
        user_list = json.loads(results[uid])
        in_date = user_list[0]
        status = user_list[1]
        if status == '2':
            iter_user_list.append(uid)
            mapping_dict[uid] = json.dumps([in_date,
                                            '3'])  # mark status:3 computing
        if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0:
            #mark status from 2 to 3 as identify_compute to computing
            r.hmset('compute', mapping_dict)
            #acquire bulk user weibo data
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                    iter_user_list)
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                    iter_user_list)
            #compute text attribute
            compute_status = test_cron_text_attribute_v2(
                user_keywords_dict, user_weibo_dict, online_pattern_dict,
                character_start_ts)

            if compute_status == True:
                change_status_computed(mapping_dict)
            else:
                change_status_compute_fail(mapping_dict)

            iter_user_list = []
            mapping_dict = {}

    if iter_user_list != [] and mapping_dict != {}:
        r.hmset('compute', mapping_dict)
        #acquire bulk user weibo date
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                iter_user_list)
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                iter_user_list)
        #compute text attribute
        compute_status = test_cron_text_attribute_v2(user_keywords_dict,
                                                     user_weibo_dict,
                                                     online_pattern_dict,
                                                     character_start_ts)
        if compute_status == True:
            change_status_computed(mapping_dict)
        else:
            change_status_compute_fail(mapping_dict)
def scan_compute_redis():
    hash_name = 'compute'
    results = r.hgetall('compute')
    iter_user_list = []
    mapping_dict = dict()
    for uid in results:
        user_list = json.loads(results[uid])
        in_date = user_list[0]
        status = user_list[1]
        if status == '2':
            iter_user_list.append(uid)
            mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing
        if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0:
            #mark status from 1 to 3 as identify_compute to computing
            r.hmset('compute', mapping_dict)
            #acquire bulk user weibo data
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list)
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list)
            #compute text attribute
            compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts)
            
            if compute_status==True:
                change_status_computed(mapping_dict)
            else:
                change_status_compute_fail(mapping_dict)

            #deal user no weibo to compute portrait attribute
            if len(user_keywords_dict) != len(iter_user_list):
                change_mapping_dict = dict()
                change_user_list = set(iter_user_list) - set(user_keywords_dict.keys())
                for change_user in change_user_list:
                    change_mapping_dict[change_user] = json.dumps([in_date, '2'])
                r.hmset(change_mapping_dict)

            iter_user_list = []
            mapping_dict = {}
            
    if iter_user_list != [] and mapping_dict != {}:
        r.hmset('compute', mapping_dict)
        #acquire bulk user weibo date
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list)
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list)
        #compute text attribute
        compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts)
        if compute_status==True:
            change_status_computed(mapping_dict)
        else:
            change_status_compute_fail(mapping_dict)
        #deal user no weibo to compute portrait attribute
        if len(user_keywords_dict) != len(iter_user_list):
            change_mapping_dict = dict()
            change_user_list = set(iter_user_list) - set(user_keywords_dict.keys())
            for change_user in change_user_list:
                change_mapping_dict[change_user] = json.dumps([in_date, '2'])
            r.hmset(change_mapping_dict)
def get_recommentation(admin_user):
    submit_recommentation_count = 0
    compute_count = 0
    search_date = ts2datetime(time.time() - DAY)
    submit_recomment_key = 'recomment_' + admin_user + '_' + search_date
    submit_user_recomment = set(R_RECOMMENTATION.hkeys(submit_recomment_key))
    all_compute_set = set(R_RECOMMENTATION.hkeys('compute'))
    submit_recommentation_count = len(submit_user_recomment)
    compute_count = len(submit_user_recomment & all_compute_set)
    
    return submit_recommentation_count, compute_count
def save_results(save_type, user, recomment_results):
    save_mark = False
    #run_type
    if RUN_TYPE == 1:
        now_date = ts2datetime(time.time())
    else:
        now_date = ts2datetime(datetime2ts(RUN_TEST_TIME))
    recomment_hash_name = 'recomment_' + now_date + '_auto'
    #print 'save operation results'
    R_RECOMMENTATION.hset(recomment_hash_name, user, json.dumps(recomment_results))
    save_mark = True
    return save_mark
def read_uid_list():
    date = ts2datetime(time.time()-24*3600)
    date = date.replace('-','')
    sensitive_dict = r.hgetall('identify_in_sensitive_'+str(date))
    influence_dict = r.hgetall('identify_in_influence_'+str(date))
    uid_list = []
    for uid in sensitive_dict:
        if sensitive_dict[uid] != '3':
            uid_list.append(uid)
    for uid in influence_dict:
        if influence_dict[uid] != '3':
            uid_list.append(uid)

    return uid_list
예제 #16
0
def scan_compute_redis():
    hash_name = 'compute'
    results = r.hgetall('compute')
    iter_user_list = []
    mapping_dict = dict()
    #test
    count = 0
    for uid in results:
        user_list = json.loads(results[uid])
        in_date = user_list[0]
        status = user_list[1]
        if status == '2': #imme
            #test
            #count += 1
            #if count >= 3:
            #    break
            iter_user_list.append(uid)
            mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing
        if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0:
            r.hmset('compute', mapping_dict)
            #acquire bulk user weibo data
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts,filter_keywords_dict = read_flow_text_sentiment(iter_user_list)
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts,filter_keywords_dict = read_flow_text(iter_user_list)
            #compute text attribute
            compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts,filter_keywords_dict)
            
            if compute_status==True:
                change_status_computed(mapping_dict)
            else:
                change_status_compute_fail(mapping_dict)
            
            iter_user_list = []
            mapping_dict = {}
            
    if iter_user_list != [] and mapping_dict != {}:
        r.hmset('compute', mapping_dict)
        #acquire bulk user weibo date
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts,filter_keywords_dict = read_flow_text_sentiment(iter_user_list)
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts,filter_keywords_dict = read_flow_text(iter_user_list)
        #compute text attribute
        compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts,filter_keywords_dict)
        if compute_status==True:
            change_status_computed(mapping_dict)
        else:
            change_status_compute_fail(mapping_dict)
def save_results(save_type, recomment_results):
    save_mark = False
    #run_type
    if RUN_TYPE == 1:
        now_date = ts2datetime(time.time())
    else:
        now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY)
    recomment_hash_name = 'recomment_' + now_date + '_auto'
    if save_type == 'hotspot':
        #print 'save hotspot results'
        R_RECOMMENTATION.hset(recomment_hash_name, 'auto', json.dumps(recomment_results))
        save_mark = True
    elif save_type == 'operation':
        #print 'save operation results'
        R_RECOMMENTATION.hmset(recomment_hash_name, recomment_results)
        save_mark = True
    return save_mark
예제 #18
0
def main():
    now_ts = time.time()
    delete_ts = datetime2ts(ts2datetime(now_ts-EXPIRE_TIME))  #待删除的时间戳
    delete_date = ts2datetime(now_ts-EXPIRE_TIME)
    del_day = ts2datetime(now_ts-MONTH_TIME)

    index_name = flow_text_index_name_pre + del_day
    exist_es = es_flow_text.indices.exists(index=index_name)
    if exist_es:
        es_flow_text.indices.delete(index=index_name)
    index_bci = "bci_" + del_day.replace('-', '')
    exist_bci = ES_CLUSTER_FLOW1.indices.exists(index=index_bci)
    if exist_bci:
        ES_CLUSTER_FLOW1.indices.delete(index=index_bci)


    #delete @
    redis_cluster.delete("at_"+str(delete_ts))
    redis_cluster.delete("sensitive_at_"+str(delete_ts))

    #delete ip
    redis_ip.delete('ip_'+str(delete_ts))
    if WORK_TYPE == 0:
        exist_ip = es_cluster.indices.exists(index="ip_"+delete_date)
        if exist_ip:
            es_cluster.indices.delete(index="ip_"+delete_date)
    redis_ip.delete('sensitive_ip_'+str(delete_ts))
    if WORK_TYPE == 0:
        exist_ip = es_cluster.indices.exists(index="sensitive_ip_"+delete_date)
        if exist_ip:
            es_cluster.indices.delete(index="sensitive_ip_"+delete_date)

    #delete activity
    redis_activity.delete('activity_'+str(delete_ts))
    if WORK_TYPE == 0:
        exist_activity = es_cluster.indices.exists(index="activity_"+delete_date)
        if exist_activity:
            es_cluster.indices.delete(index="activity_"+delete_date)
    redis_activity.delete('sensitive_activity_'+str(delete_ts))
    if WORK_TYPE == 0:
        exist_activity = es_cluster.indices.exists(index="sensitive_activity_"+delete_date)
        if exist_activity:
            es_cluster.indices.delete(index="sensitive_activity_"+delete_date)

    #delete hashtag
    redis_cluster.delete('hashtag_'+str(delete_ts))
    redis_cluster.delete('sensitive_hashtag_'+str(delete_ts))

    #delete sensitive words
    redis_cluster.delete('sensitive_'+str(delete_ts))

    #delete recommendation
    r.delete('recomment_'+str(delete_date)+"_influence")
    r.delete('recomment_'+str(delete_date)+"_sensitive")
    r.delete("identify_in_sensitive_" + str(delete_date))
    r.delete("identify_in_influence_" + str(delete_date)))
def save_results(save_type, recomment_results):
    save_mark = False
    #run_type
    if RUN_TYPE == 1:
        now_date = ts2datetime(time.time())
    else:
        now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY)
    recomment_hash_name = 'recomment_' + now_date + '_auto'
    if save_type == 'hotspot':
        #print 'save hotspot results'
        R_RECOMMENTATION.hset(recomment_hash_name, 'auto',
                              json.dumps(recomment_results))
        save_mark = True
    elif save_type == 'operation':
        #print 'save operation results'
        R_RECOMMENTATION.hmset(recomment_hash_name, recomment_results)
        save_mark = True
    return save_mark
def get_recomment_history(admin_user, now_date):
    results = set()
    now_ts = datetime2ts(now_date)
    for i in range(RECOMMEND_IN_AUTO_DATE, 0, -1):
        iter_date = ts2datetime(now_ts - i * DAY)
        submit_user_recomment = 'recoment_' + admin_user + '_' + str(iter_date)
        recomment_user_list = set(R_RECOMMENTATION.hkeys(submit_user_recomment))
        results = results | recomment_user_list

    return results
def get_recomment_history(admin_user, now_date):
    results = set()
    now_ts = datetime2ts(now_date)
    for i in range(RECOMMEND_IN_AUTO_DATE, 0, -1):
        iter_date = ts2datetime(now_ts - i * DAY)
        submit_user_recomment = 'recoment_' + admin_user + '_' + str(iter_date)
        recomment_user_list = set(R_RECOMMENTATION.hkeys(submit_user_recomment))
        results = results | recomment_user_list

    return results
def update_recommentation_compute(admin_user):
    status = False
    #step1: update lastest 6 day compute count
    end_ts = datetime2ts(ts2datetime(time.time() - DAY))
    all_compute_set = set(R_RECOMMENTATION.hkeys('compute'))
    bulk_action = []
    for i in range(1, 6):
        iter_ts = end_ts - i* DAY
        iter_date = ts2datetime(iter_ts)
        submit_recomment_key = 'recomment_' + admin_user + '_' + iter_date
        submit_recomment_set = set(R_RECOMMENTATION.hkeys(submit_recomment_key))
        compute_count = len(submit_recomment_set & all_compute_set)
        user_results = {'compute_count': compute_count}
        action = {'update': {'_id': admin_user + '_' + str(iter_ts)}}
        bulk_action.extend([action, {'doc': user_results}])
    #step2: update bulk action
    #print 'bulk_action:', bulk_action
    es_operation.bulk(bulk_action, index=operation_index_name, doc_type=operation_index_type)
    status = True
    return status
예제 #23
0
def recommentation_in(input_ts, recomment_type, submit_user):
    date = ts2datetime(input_ts)
    recomment_results = []
    # read from redis
    results = []
    hash_name = 'recomment_'+str(date) + "_" + recomment_type
    identify_in_hashname = "identify_in_" + str(date)
    submit_user_recomment = "recomment_" + submit_user + "_" + str(date) # 用户自推荐名单
    results = r.hgetall(hash_name)
    if not results:
        return []
    # search from user_profile to rich the show information
    recommend_list = set(r.hkeys(hash_name))
    identify_in_list = set(r.hkeys("compute"))
    submit_user_recomment = set(r.hkeys(submit_user_recomment))
    recomment_results = list(recommend_list - identify_in_list)
    recomment_results = list(set(recomment_results) - submit_user_recomment)
    if recomment_results:
        results = get_user_detail(date, recomment_results, 'show_in', recomment_type)
    else:
        results = []
    return results
예제 #24
0
def scan_compute_redis():
    hash_name = 'compute'
    results = r.hgetall('compute')
    user_list = []
    mapping_dict = dict()
    for uid in results:
        user_list = json.loads(results[uid])
        print 'user_list:', user_list
        in_date = user_list[0]
        status = user_list[1]
        if status == '1':
            user_list.append(uid)
            mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:2 computing
    print 'mapping_dict:', mapping_dict
    r.hmset('compute', mapping_dict)
    #acquire bulk user weibo data
    #user_weibo_dict = read_user_weibo(user_list)
    #compute text attribute
    #compute_status = compute2in(user_list, user_weibo_dict, status='insert')
    compute_status = False
    if compute_status==True:
        change_status_computed(mapping_dict)
예제 #25
0
def change_status_computed(mapping_dict):
    hash_name = 'compute'
    status = 4
    new_mapping_dict = {}
    for uid in mapping_dict:
        user_list = json.loads(mapping_dict[uid])
        user_list[1] = '4'
        in_date = user_list[0]
        new_mapping_dict[uid] = json.dumps(user_list)
        #revise identify_in_date
        influence_hashname = 'identify_in_influence_' + str(in_date)
        sensitive_hashname = 'identify_in_sensitive_' + str(in_date)
        manual_hashname = "identify_in_manual_" + str(in_date)
        tmp = r.hget(influence_hashname, uid)
        tmp1 = r.hget(sensitive_hashname, uid)
        if tmp:
            r.hset(influence_hashname, uid, '4')
        elif tmp1:
            r.hset(sensitive_hashname, uid, '4')
        else:
            r.hset(manual_hashname, uid, '4')
    r.hmset(hash_name, new_mapping_dict)
def main():
    now_ts = time.time()
    delete_ts = datetime2ts(ts2datetime(now_ts-EXPIRE_TIME))  #待删除的时间戳
    delete_date = ts2datetime(now_ts-EXPIRE_TIME)

    #delete @
    r_cluster.delete("at_"+str(delete_ts))

    #delete ip
    r_cluster.delete('new_ip_'+str(delete_ts))

    #delete activity
    r_cluster.delete('activity_'+str(delete_ts))

    #delete hashtag
    r_cluster.delete('hashtag_'+str(delete_ts))

    #delete sensitive words
    r_cluster.delete('sensitive_'+str(delete_ts))

    #delete recommendation
    r.delete('recomment_'+str(delete_date))
def change_status_computed(mapping_dict):
    hash_name = 'compute'
    status = 4
    new_mapping_dict = {}
    for uid in mapping_dict:
        user_list = json.loads(mapping_dict[uid])
        user_list[1] = '4'
        in_date = user_list[0]
        new_mapping_dict[uid] = json.dumps(user_list)
        #revise identify_in_date
        influence_hashname = 'identify_in_influence_'+str(in_date)
        sensitive_hashname = 'identify_in_sensitive_'+str(in_date)
        manual_hashname = "identify_in_manual_"+str(in_date)
        tmp = r.hget(influence_hashname, uid)
        tmp1 = r.hget(sensitive_hashname, uid)
        if tmp:
            r.hset(influence_hashname, uid, '4')
        elif tmp1:
            r.hset(sensitive_hashname, uid, '4')
        else:
            r.hset(manual_hashname, uid, '4')
    r.hmset(hash_name, new_mapping_dict)
예제 #28
0
def get_operate_information():
    result = dict()
    now_ts = time.time()
    date = ts2datetime(now_ts - 24*3600)
    #test
    date = '2013-09-07'
    delete_date = ''.join(date.split('-'))
    #test
    #delete_date = '20150727'
    result['in_count'] = len(r_recomment.hkeys('recomment_'+str(date)))
    out_count_list = r_recomment.hget('recommend_delete_list', delete_date)
    #print 'out_count_list:', out_count_list
    if out_count_list:
        result['out_count'] = len(json.loads(out_count_list))
    else:
        result['out_count'] = 0
    compute_list = r_recomment.hkeys('compute')
    '''
    if compute_list:
        result['compute'] = len(compute_list)
    '''
    #print 'operate compute:', result
    return result
예제 #29
0
def update_recommentation_compute(admin_user):
    status = False
    #step1: update lastest 6 day compute count
    end_ts = datetime2ts(ts2datetime(time.time() - DAY))
    all_compute_set = set(R_RECOMMENTATION.hkeys('compute'))
    bulk_action = []
    for i in range(1, 6):
        iter_ts = end_ts - i * DAY
        iter_date = ts2datetime(iter_ts)
        submit_recomment_key = 'recomment_' + admin_user + '_' + iter_date
        submit_recomment_set = set(
            R_RECOMMENTATION.hkeys(submit_recomment_key))
        compute_count = len(submit_recomment_set & all_compute_set)
        user_results = {'compute_count': compute_count}
        action = {'update': {'_id': admin_user + '_' + str(iter_ts)}}
        bulk_action.extend([action, {'doc': user_results}])
    #step2: update bulk action
    #print 'bulk_action:', bulk_action
    es_operation.bulk(bulk_action,
                      index=operation_index_name,
                      doc_type=operation_index_type)
    status = True
    return status
def filter_rules(candidate_results):
    results = []
    #rule1: activity count
    filter_result1 = filter_activity(candidate_results)
    #rule2: ip count
    filter_result2 = filter_ip(filter_result1)
    #rule3: retweet count & beretweeted count
    filter_result3 = filter_retweet_count(filter_result2)
    #rule4: mention count
    results = filter_mention(filter_result3)
    #rule5: compute count
    compute_uid_set = r.hkeys("compute")
    results = set(results) - set(compute_uid_set)
    return results
def filter_rules(candidate_results):
    results = []
    #rule1: activity count
    filter_result1 = filter_activity(candidate_results)
    #rule2: ip count
    filter_result2 = filter_ip(filter_result1)
    #rule3: retweet count & beretweeted count
    filter_result3 = filter_retweet_count(filter_result2)
    #rule4: mention count
    results = filter_mention(filter_result3)
    #rule5: compute count
    compute_uid_set = r.hkeys("compute")
    results = set(results) - set(compute_uid_set)
    return results
예제 #32
0
def scan_compute_redis_v2():
    task_type = 'user'
    bulk_action = []
    count = 0
    iter_user_list = []
    verified_mark_dict = dict()
    relation_mark_dict = dict()
    submit_user_dict = dict()
    submit_ts_dict = dict()
    while True:
        r_user_item = r.rpop(r_user_update_hash_name)
        #print 'r_user_item:', r_user_item
        if r_user_item:
            #print 'r_user_item:', r_user_item
            r_user_item = json.loads(r_user_item)
            uid = r_user_item[0]
            relation_mark = r_user_item[1]
            iter_user_list.append(uid)
            relation_mark_dict[uid] = relation_mark
            count += 1
        else:
            break

        if count % 100 == 0 and count != 0:
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                    iter_user_list)
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                    iter_user_list)
            compute_status = test_cron_text_attribute_v2(
                user_keywords_dict, user_weibo_dict, online_pattern_dict,
                character_start_ts, relation_mark_dict, task_type,
                submit_user_dict, submit_ts_dict)

            iter_user_list = []
            relation_mark_dict = dict()

    if iter_user_list != []:
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                iter_user_list)
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                iter_user_list)
        compute_status = test_cron_text_attribute_v2(
            user_keywords_dict, user_weibo_dict, online_pattern_dict,
            character_start_ts, relation_mark_dict, task_type,
            submit_user_dict, submit_ts_dict)
예제 #33
0
def get_operate_information():
    result = dict()
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)

    date = ts2datetime(now_ts - DAY)
    delete_date = ''.join(date.split('-'))
    result['in_count'] = len(r_recomment.hkeys('recomment_' + str(date)))
    out_count_list = r_recomment.hget('recommend_delete_list', delete_date)
    if out_count_list:
        result['out_count'] = len(json.loads(out_count_list))
    else:
        result['out_count'] = 0
    compute_list = r_recomment.hkeys('compute')

    if compute_list:
        result['compute'] = len(compute_list)
    else:
        result['compute'] = 0

    return result
def change_status_compute_fail(mapping_dict):
    hash_name = 'compute'
    status = 1
    new_mapping_dict = {}
    for uid in mapping_dict:
        user_list = json.loads(mapping_dict[uid])
        user_list[1] = '1'
        new_mapping_dict[uid] = json.dumps(user_list)
        in_date = user_list[0]
        #revise identify_in_date
        influence_hashname = 'identify_in_influence_'+str(in_date)
        sensitive_hashname = 'identify_in_sensitive_'+str(in_date)
        tmp = r.hget(influence_hashname, uid)
        if tmp:
            r.hset(influence_hashname, uid, '1')
        else:
            r.hset(sensitive_hashname, uid, '1')
    r.hmset(hash_name, new_mapping_dict)
def createWordTree():
    awords = []
    sensitive_words = r.hkeys('sensitive_words')
    #for b in open('./../../sensitive_words.txt', 'rb'):
    #    awords.append(b.strip())

    awords = sensitive_words
    for word in awords:
        temp = wordTree
        for a in range(0,len(word)):
            index = ord(word[a])
            if a < (len(word) - 1):
                if temp[index] == None:
                    node = [[None for x in range(256)],0]
                    temp[index] = node
                elif temp[index] == 1:
                    node = [[None for x in range(256)],1]
                    temp[index] = node

                temp = temp[index][0]
            else:
                temp[index] = 1
예제 #36
0
def main():
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)
    date = ts2datetime(now_ts - DAY)
    print date
    # auto recommendation: step 1:4
    #step1: read from top es_daily_rank
    top_user_set, user_dict = search_from_es(date)
    #step2: filter black_uid
    black_user_set = read_black_user()
    subtract_user_set = top_user_set - black_user_set
    #step3: filter users have been in
    subtract_user_set = list(subtract_user_set)
    candidate_results = filter_in(subtract_user_set)
    #step4: filter rules about ip count& reposts/bereposts count&activity count
    results = filter_rules(candidate_results)
    new_date = ts2datetime(now_ts)
    hashname_influence = "recomment_" + new_date + "_influence"
    if results:
        for uid in results:
            r.hset(hashname_influence, uid, "0")
    #step5: get sensitive user
    print date,'date'
    sensitive_user = list(get_sensitive_user(date))
    hashname_sensitive = "recomment_" + new_date + "_sensitive"
    if sensitive_user:
        for uid in sensitive_user:
            print uid, hashname_sensitive
            r.hset(hashname_sensitive, uid, "0")

    results.extend(sensitive_user)
    results = set(results)
    # step6: write to recommentation csv/redis ++for super admin
    hashname_submit = "submit_recomment_" + new_date
    if results:
        for uid in results:
            r.hset(hashname_submit, uid, json.dumps({"system":1, "operation":"system"}))
def main():
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)
    date = ts2datetime(now_ts - DAY)
    # auto recommendation: step 1:4
    #step1: read from top es_daily_rank
    top_user_set, user_dict = search_from_es(date)
    #step2: filter black_uid
    black_user_set = read_black_user()
    subtract_user_set = top_user_set - black_user_set
    #step3: filter users have been in
    subtract_user_set = list(subtract_user_set)
    candidate_results = filter_in(subtract_user_set)
    #step4: filter rules about ip count& reposts/bereposts count&activity count
    results = filter_rules(candidate_results)
    new_date = ts2datetime(now_ts)
    hashname_influence = "recomment_" + new_date + "_influence"
    if results:
        for uid in results:
            r.hset(hashname_influence, uid, "0")
    #step5: get sensitive user
    sensitive_user = list(get_sensitive_user(date))
    hashname_sensitive = "recomment_" + new_date + "_sensitive"
    if sensitive_user:
        for uid in sensitive_user:
            r.hset(hashname_sensitive, uid, "0")
    results.extend(sensitive_user)
    results = set(results)
    #step6: write to recommentation csv/redis
    hashname_submit = "submit_recomment_" + new_date
    if results:
        for uid in results:
            r.hset(hashname_submit, uid, json.dumps({"system":1, "operation":"system"}))
예제 #38
0
def save_result(results):
    hash_name = 'overview'
    for item in results:
        r_recomment.hset(hash_name, item, results[item])
    return True
예제 #39
0
                    sensitive_words = sensitive_dict.keys()
                else:
                    sensitive_words = []
                if sensitive_history_dict.get('fields',0):
                    #print sensitive_history_dict['fields'][sensitive_string][0]
                    #print top_sensitive
                    sensitive_value = math.log(sensitive_history_dict['fields'][sensitive_string][0]/float(top_sensitive)*9+1, 10)*100
                    #print "sensitive_value", sensitive_value
                else:
                    sensitive_value = 0
                results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words, sensitive_value])
            else:
                results.append([uid, uname, location, fansnum, statusnum, influence])
                if auth:
                    hashname_submit = "submit_recomment_" + date
                    tmp_data = json.loads(r.hget(hashname_submit, uid))
                    recommend_list = (tmp_data['operation']).split('&')
                    admin_list = []
                    admin_list.append(tmp_data['system'])
                    admin_list.append(list(set(recommend_list)))
                    admin_list.append(len(recommend_list))
    return results

def get_evaluate_max(index_name):
    max_result = {}
    index_type = 'bci'
    evaluate_index = ['user_index']
    for evaluate in evaluate_index:
        query_body = {
        'query':{
            'match_all':{}
# every 30 minutes reexecute this program

import sys
import time
import json
import redis
from elasticsearch import Elasticsearch
from text_attribute import compute_attribute
reload(sys)
sys.path.append('./../../')
from global_utils import R_RECOMMENTATION as r
from global_utils import es_sensitive_user_text as es_text
from time_utils import datetime2ts, ts2datetime

date = ts2datetime(time.time()).replace('-', '')
temp = r.hget('compute_now', date)
if temp:
    now_list = json.loads(temp)
    uid_list = []
    count = 0
    for item in now_list:
        uid_list.append(item[0])
    user_weibo_dict = dict()
    # extract user weibo text
    compute_attribute(user_weibo_dict)
    for i in range(now_list):
        uid = now_list[i][0]
        source = now_list[i][1]
        if source == '1':
            r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute
        else:
def scan_compute_redis():
    task_mark = 'user'
    hash_name = r_user_hash_name
    results = r.hgetall(hash_name)
    iter_user_list = []
    mapping_dict = dict()
    verify_mark_dict = dict()
    relation_mark_dict = dict()
    submit_user_dict = dict()
    submit_ts_dict = dict()
    count = 0
    for uid in results:
        user_list = json.loads(results[uid])
        in_date = user_list[0]
        status = user_list[1]
        verify_mark = user_list[2]
        relation_list = user_list[3]
        submit_user = user_list[4]
        submit_ts = datetime2ts(in_date)
        verify_mark_dict[uid] = verify_mark
        relation_mark_dict[uid] = relation_list
        submit_user_dict[uid] = submit_user
        submit_ts_dict[uid] = submit_ts
        if status == '2':  #imme
            #test
            count += 1
            iter_user_list.append(uid)
            mapping_dict[uid] = json.dumps(
                [in_date, '3', verify_mark, relation_list,
                 submit_user])  # mark status:3 computing
        if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0:
            r.hmset(r_user_hash_name, mapping_dict)
            #acquire bulk user weibo data
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                    iter_user_list)
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                    iter_user_list)
            #compute text attribute
            compute_status = test_cron_text_attribute_v2(
                user_keywords_dict, user_weibo_dict, online_pattern_dict,
                character_start_ts, relation_mark_dict, task_mark,
                submit_user_dict, submit_ts_dict)

            if compute_status == True:
                change_status_computed(mapping_dict)
            else:
                change_status_compute_fail(mapping_dict)

            #when uid user no weibo at latest week to change compute status to 1
            if len(user_keywords_dict) != len(iter_user_list):
                change_mapping_dict = dict()
                change_user_list = set(iter_user_list) - set(
                    user_keywords_dict.keys())
                for change_user in change_user_list:
                    change_mapping_dict[change_user] = json.dumps([
                        in_date, '2', verify_mark_dict[change_user],
                        relation_mark_dict[change_user],
                        submit_user_dict[change_user],
                        submit_ts_dict[change_user]
                    ])
                r.hmset(r_user_hash_name, change_mapping_dict)

            iter_user_list = []
            mapping_dict = {}
            verify_mark_dict = dict()
            relation_mark_dict = dict()
            submit_user_dict = dict()
            submit_ts_dict = dict()

    if iter_user_list != [] and mapping_dict != {}:
        r.hmset(r_user_hash_name, mapping_dict)
        #acquire bulk user weibo date
        print 'iter_user_list:', len(iter_user_list)
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                iter_user_list)
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                iter_user_list)
        #compute text attribute
        print 'user_weibo_dict:', len(user_weibo_dict)
        compute_status = test_cron_text_attribute_v2(
            user_keywords_dict, user_weibo_dict, online_pattern_dict,
            character_start_ts, relation_mark_dict, task_mark,
            submit_user_dict, submit_ts_dict)
        if compute_status == True:
            change_status_computed(mapping_dict)
        else:
            change_status_compute_fail(mapping_dict)
        #when uid user no weibo at latest week to change compute status to 1
        if len(user_keywords_dict) != len(iter_user_list):
            change_mapping_dict = dict()
            change_user_list = set(iter_user_list) - set(
                user_keywords_dict.keys())
            for change_user in change_user_list:
                change_mapping_dict[change_user] = json.dumps([
                    in_date, '2', verify_mark_dict[change_user],
                    relation_mark_dict[change_user],
                    submit_user_dict[change_user], submit_ts_dict[change_user]
                ])
            r.hmset(r_user_hash_name, change_mapping_dict)
def scan_compute_redis():
    hash_name = 'compute'
    results = r.hgetall('compute')
    iter_user_list = []
    mapping_dict = dict()
    #test
    count = 0
    for uid in results:
        user_list = json.loads(results[uid])
        in_date = user_list[0]
        status = user_list[1]
        if status == '1': #imme
            #test
            count += 1
            iter_user_list.append(uid)
            mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing
        if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0:
            r.hmset('compute', mapping_dict)
            #acquire bulk user weibo data
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list)
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list)
            #compute text attribute
            compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts)
            
            if compute_status==True:
                change_status_computed(mapping_dict)
            else:
                change_status_compute_fail(mapping_dict)
            
            #when uid user no weibo at latest week to change compute status to 1
            if len(user_keywords_dict) != len(iter_user_list):
                change_mapping_dict = dict()
                change_user_list = set(iter_user_list) - set(user_keywords_dict.keys())
                for change_user in change_user_list:
                    change_mapping_dict[change_user] = json.dumps([in_date, '1'])
                r.hmset('compute', change_mapping_dict)

            iter_user_list = []
            mapping_dict = {}
            
    if iter_user_list != [] and mapping_dict != {}:
        r.hmset('compute', mapping_dict)
        #acquire bulk user weibo date
        print 'iter_user_list:', len(iter_user_list)
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list)
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list)
        #compute text attribute
        print 'user_weibo_dict:', len(user_weibo_dict)
        compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts)
        if compute_status==True:
            change_status_computed(mapping_dict)
        else:
            change_status_compute_fail(mapping_dict)
        #when uid user no weibo at latest week to change compute status to 1
        if len(user_keywords_dict) != len(iter_user_list):
            change_mapping_dict = dict()
            change_user_list = set(iter_user_list) - set(user_keywords_dict.keys())
            for change_user in change_user_list:
                change_mapping_dict[change_user] = json.dumps([in_date, '1'])
            r.hmset('compute', change_mapping_dict)
    }

    result = es_cluster.search(index=index_name,
                               doc_type="bci",
                               body=query_body)["hits"]["hits"]
    sensitive_uid = []
    for item in result:
        sensitive_uid.append(item['_source']['uid'])

    return sensitive_uid


if __name__ == "__main__":
    now_date = ts2datetime(time.time()).replace('-', '')
    former_date = ts2datetime(time.time() - 7 * 24 * 3600).replace('-', '')
    r_recommend.hdel('recommend_sensitive',
                     former_date)  # delete 7 days ago recommentation uid_list
    r_recommend.hdel('recommend_influence',
                     former_date)  # delete 7 days ago recommentation uid_list
    now_date = '20130901'  # test
    sensitive_weibo_uid = search_sensitive_weibo(
        now_date)  # sensitive words uid list, direct recommend in
    top_influence_uid = search_top_k(now_date,
                                     10000)  # top influence uid list, filter

    # step 1: no sensitive user in top influence
    revise_influence_uid_list = set(top_influence_uid) - set(
        sensitive_weibo_uid)
    black_uid_list = read_black_user_list()
    revise_influence_uid_list = set(revise_influence_uid_list) - set(
        black_uid_list)
    print 'filter black list: ', len(revise_influence_uid_list)
        "sort": [{"user_index": {"order": "desc"}}]
    }

    result = es_cluster.search(index=index_name,doc_type="bci", body=query_body)["hits"]["hits"]
    sensitive_uid = []
    for item in result:
        sensitive_uid.append(item['_source']['uid'])

    return sensitive_uid



if __name__ == "__main__":
    now_date = ts2datetime(time.time()).replace('-','')
    former_date = ts2datetime(time.time()-7*24*3600).replace('-','')
    r_recommend.hdel('recommend_sensitive', former_date) # delete 7 days ago recommentation uid_list
    r_recommend.hdel('recommend_influence', former_date) # delete 7 days ago recommentation uid_list
    now_date = '20130901' # test
    sensitive_weibo_uid = search_sensitive_weibo(now_date) # sensitive words uid list, direct recommend in
    top_influence_uid = search_top_k(now_date, 10000) # top influence uid list, filter

    # step 1: no sensitive user in top influence
    revise_influence_uid_list = set(top_influence_uid) - set(sensitive_weibo_uid)
    black_uid_list = read_black_user_list()
    revise_influence_uid_list = set(revise_influence_uid_list) - set(black_uid_list)
    print 'filter black list: ', len(revise_influence_uid_list)
    #total = set(sensitive_weibo_uid) | set(top_influence_uid)
    # step 2: no recommending
    sensitive_uid_recommending_filter = filter_recommend(sensitive_weibo_uid)
    top_influence_recommending_filter = filter_recommend(revise_influence_uid_list)
    # step 3: no one in portrait
def save_recommentation2redis(date, user_set):
    hash_name = 'recomment_' + str(date)
    status = 0
    for uid in user_set:
        r.hset(hash_name, uid, status)
    return True
def save_recommentation2redis(date, user_set):
    hash_name = 'recomment_'+str(date)
    status = 0
    for uid in user_set:
        r.hset(hash_name, uid, status)
    return True
예제 #47
0
def get_attr(date):
    results = dict()
    total_number = es.count(index="sensitive_user_portrait",
                            doc_type="user")['count']
    results['total_number'] = total_number

    max_result = get_evaluate_max()
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "term": {
                        "sensitive": 0
                    }
                }
            }
        }
    }
    influence_number = es.count(index="sensitive_user_portrait",
                                doc_type="user",
                                body=query_body)['count']
    results['sensitive_number'] = total_number - influence_number
    results['influence_number'] = influence_number

    # 政治倾向性统计
    query_body = query_body_module('politics')
    politic_array = es.search(
        index='sensitive_user_portrait', doc_type='user',
        body=query_body)['aggregations']['all_interests']['buckets']
    politic_dict = dict()
    for item in politic_array:
        politic_dict[item['key']] = item['doc_count']
    results['politics'] = politic_dict

    # 入库推荐人数
    recommend_in_sensitive = 0
    recommend_in_sensitive = r.hlen("recomment_" + date + 'sensitive')

    recommend_in_influence = 0
    recommend_in_influence = r.hlen("recomment_" + date + "_influence")
    results['recommend_in'] = recommend_in_influence + recommend_in_sensitive

    # 群体分析任务
    results['monitor_number'] = [4, 83]  # test
    query_body = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        'task_type': "detect"
                    }
                }, {
                    "term": {
                        "state": 0
                    }
                }]
            }
        }
    }
    group_detect_number = es.count(index=group_index_name,
                                   doc_type=group_index_type,
                                   body=query_body)["count"]
    query_body = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        'task_type': "analysis"
                    }
                }, {
                    "term": {
                        "state": 0
                    }
                }]
            }
        }
    }
    group_analysis_number = es.count(index=group_index_name,
                                     doc_type=group_index_type,
                                     body=query_body)["count"]
    results["group_detect_number"] = group_detect_number
    results["group_analysis_number"] = group_analysis_number

    # 敏感词
    query_body = query_body_module('sensitive_words_string')
    sw_list = es.search(
        index='sensitive_user_portrait', doc_type='user',
        body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_words = []
    for item in sw_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_words.append(temp)
    results['sensitive_words'] = sensitive_words

    query_body = query_body_module('keywords_string')
    sg_list = es.search(
        index='sensitive_user_portrait', doc_type='user',
        body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_geo = []
    for item in sg_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_geo.append(temp)
    results['keywords_string'] = sensitive_geo

    query_body = query_body_module('sensitive_hashtag_string')
    sh_list = es.search(
        index='sensitive_user_portrait', doc_type='user',
        body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_hashtag = []
    for item in sh_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_hashtag.append(temp)
    results['sensitive_hashtag'] = sensitive_hashtag

    query_body = query_body_module('sensitive_activity_geo_aggs')
    sg_list = es.search(
        index='sensitive_user_portrait', doc_type='user',
        body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_geo = []
    for item in sg_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_geo.append(temp)
    results['sensitive_geo'] = sensitive_geo
    '''
    query_body = query_body_module('domain_string')
    sd_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    domain = []
    for item in sd_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        domain.append(temp)
    results['domain'] = domain
    '''

    # tendency distribution

    # domain and topic
    rank_results = get_top_user()
    results.update(rank_results)

    # rank
    results['importance'] = search_in_portrait('importance', max_result)
    results['sensitive'] = search_in_portrait('sensitive', max_result)
    results['influence'] = search_in_portrait('influence', max_result)
    results['activeness'] = search_in_portrait('activeness', max_result)

    # 敏感微博转发量和评论量
    mid_list = get_top_mid()
    sensitive_hot_retweet = sort_retweet_sensitive_weibo(mid_list)
    sensitive_hot_comment = sort_comment_sensitive_weibo(mid_list)
    sensitive_weibo_text = get_weibo_detail(mid_list)

    results['sensitive_hot_retweet'] = sensitive_hot_retweet
    results['sensitive_hot_comment'] = sensitive_hot_comment
    results['sensitive_weibo_text'] = sensitive_weibo_text

    r.set('overview', json.dumps(results))
def save_recommentation2redis(date, user_set):
    hash_name = 'recommend'
    date = date.replace('-','')
    if user_set:
        r.hset(hash_name, date, json.dumps(list(user_set)))
    return 1
def get_attr(date):
    results = dict()
    total_number = es.count(index="sensitive_user_portrait", doc_type="user")['count']
    results['total_number'] = total_number

    max_result = get_evaluate_max()
    query_body={
        "query":{
            "filtered":{
                "filter":{
                    "term":{
                        "sensitive": 0
                    }
                }
            }
        }
    }
    influence_number = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)['count']
    results['sensitive_number'] = total_number - influence_number
    results['influence_number'] = influence_number

    # 政治倾向性统计
    query_body = query_body_module('politics')
    politic_array =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    politic_dict = dict()
    for item in politic_array:
        politic_dict[item['key']] = item['doc_count']
    results['politics'] = politic_dict

    # 入库推荐人数
    recommend_in_sensitive = 0
    recommend_in_sensitive = r.hlen("recomment_" + date +'sensitive')

    recommend_in_influence = 0
    recommend_in_influence = r.hlen("recomment_" + date + "_influence")
    results['recommend_in'] = recommend_in_influence + recommend_in_sensitive

    # 群体分析任务
    results['monitor_number'] = [4, 83] # test
    query_body = {
        "query":{
            "bool":{
                "must":[
                    {"term":{'task_type':"detect"}},
                    {"term":{"state":0}}
                ]
            }
        }
    }
    group_detect_number = es.count(index=group_index_name, doc_type=group_index_type, body=query_body)["count"]
    query_body = {
        "query":{
            "bool":{
                "must":[
                    {"term":{'task_type':"analysis"}},
                    {"term":{"state":0}}
                ]
            }
        }
    }
    group_analysis_number = es.count(index=group_index_name, doc_type=group_index_type, body=query_body)["count"]
    results["group_detect_number"] = group_detect_number
    results["group_analysis_number"] = group_analysis_number


    # 敏感词
    query_body = query_body_module('sensitive_words_string')
    sw_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_words = []
    for item in sw_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_words.append(temp)
    results['sensitive_words'] = sensitive_words

    query_body = query_body_module('keywords_string')
    sg_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_geo = []
    for item in sg_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_geo.append(temp)
    results['keywords_string'] = sensitive_geo

    query_body = query_body_module('sensitive_hashtag_string')
    sh_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_hashtag = []
    for item in sh_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_hashtag.append(temp)
    results['sensitive_hashtag'] = sensitive_hashtag

    query_body = query_body_module('sensitive_activity_geo_aggs')
    sg_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_geo = []
    for item in sg_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_geo.append(temp)
    results['sensitive_geo'] = sensitive_geo


    '''
    query_body = query_body_module('domain_string')
    sd_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    domain = []
    for item in sd_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        domain.append(temp)
    results['domain'] = domain
    '''

    # tendency distribution


    # domain and topic
    rank_results = get_top_user()
    results.update(rank_results)



    # rank
    results['importance'] = search_in_portrait('importance', max_result)
    results['sensitive'] = search_in_portrait('sensitive', max_result)
    results['influence'] = search_in_portrait('influence', max_result)
    results['activeness'] = search_in_portrait('activeness', max_result)

    # 敏感微博转发量和评论量
    mid_list = get_top_mid()
    sensitive_hot_retweet = sort_retweet_sensitive_weibo(mid_list)
    sensitive_hot_comment = sort_comment_sensitive_weibo(mid_list)
    sensitive_weibo_text = get_weibo_detail(mid_list)

    results['sensitive_hot_retweet'] = sensitive_hot_retweet
    results['sensitive_hot_comment'] = sensitive_hot_comment
    results['sensitive_weibo_text'] = sensitive_weibo_text

    r.set('overview', json.dumps(results))
def scan_compute_redis():
    hash_name = 'compute'
    results = r.hgetall('compute')
    iter_user_list = []
    mapping_dict = dict()
    #test
    count = 0
    for uid in results:
        user_list = json.loads(results[uid])
        in_date = user_list[0]
        status = user_list[1]
        if status == '1':  #imme
            #test
            count += 1
            iter_user_list.append(uid)
            mapping_dict[uid] = json.dumps([in_date,
                                            '3'])  # mark status:3 computing
        if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0:
            r.hmset('compute', mapping_dict)
            #acquire bulk user weibo data
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                    iter_user_list)
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                    iter_user_list)
            #compute text attribute
            compute_status = test_cron_text_attribute_v2(
                user_keywords_dict, user_weibo_dict, online_pattern_dict,
                character_start_ts)

            if compute_status == True:
                change_status_computed(mapping_dict)
            else:
                change_status_compute_fail(mapping_dict)

            #when uid user no weibo at latest week to change compute status to 1
            if len(user_keywords_dict) != len(iter_user_list):
                change_mapping_dict = dict()
                change_user_list = set(iter_user_list) - set(
                    user_keywords_dict.keys())
                for change_user in change_user_list:
                    change_mapping_dict[change_user] = json.dumps(
                        [in_date, '1'])
                r.hmset('compute', change_mapping_dict)

            iter_user_list = []
            mapping_dict = {}

    if iter_user_list != [] and mapping_dict != {}:
        r.hmset('compute', mapping_dict)
        #acquire bulk user weibo date
        print 'iter_user_list:', len(iter_user_list)
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                iter_user_list)
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                iter_user_list)
        #compute text attribute
        print 'user_weibo_dict:', len(user_weibo_dict)
        compute_status = test_cron_text_attribute_v2(user_keywords_dict,
                                                     user_weibo_dict,
                                                     online_pattern_dict,
                                                     character_start_ts)
        if compute_status == True:
            change_status_computed(mapping_dict)
        else:
            change_status_compute_fail(mapping_dict)
        #when uid user no weibo at latest week to change compute status to 1
        if len(user_keywords_dict) != len(iter_user_list):
            change_mapping_dict = dict()
            change_user_list = set(iter_user_list) - set(
                user_keywords_dict.keys())
            for change_user in change_user_list:
                change_mapping_dict[change_user] = json.dumps([in_date, '1'])
            r.hmset('compute', change_mapping_dict)
import sys
import time
import json
import redis
from elasticsearch import Elasticsearch
from text_attribute import compute_attribute

reload(sys)
sys.path.append('./../../')
from global_utils import R_RECOMMENTATION as r
from global_utils import es_sensitive_user_text as es_text
from time_utils import datetime2ts, ts2datetime

date = ts2datetime(time.time()-24*3600).replace('-', '')
temp = r.hget('compute_appoint', date)
if temp:
    now_list = json.loads(temp)
    uid_list = []
    count = 0
    for item in now_list:
        uid_list.append(item[0])
    user_weibo_dict = dict()
    # extract user weibo text
    compute_attribute(user_weibo_dict)
    for i in range(now_list):
        uid = now_list[i][0]
        source = now_list[i][1]
        if source == '1':
            r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute
        else:
예제 #52
0
def save_result(results):
    hash_name = 'overview'
    for item in results:
        r_recomment.hset(hash_name, item, results[item])
    return True
예제 #53
0
def scan_compute_redis():
    hash_name = 'compute'
    results = r.hgetall('compute')
    iter_user_list = []
    mapping_dict = dict()
    for uid in results:
        user_list = json.loads(results[uid])
        in_date = user_list[0]
        status = user_list[1]
        if status == '2':
            iter_user_list.append(uid)
            mapping_dict[uid] = json.dumps([in_date,
                                            '3'])  # mark status:3 computing
            #revise identify_in_date
            influence_hashname = 'identify_in_influence_' + str(in_date)
            sensitive_hashname = 'identify_in_sensitive_' + str(in_date)
            manual_hashname = "identify_in_manual_" + str(in_date)
            tmp = r.hget(influence_hashname, uid)
            tmp1 = r.hget(sensitive_hashname, uid)
            if tmp:
                r.hset(influence_hashname, uid, '3')
            elif tmp1:
                r.hset(sensitive_hashname, uid, '3')
            else:
                r.hset(manual_hashname, uid, '3')
        if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0:
            #mark status from 1 to 3 as identify_compute to computing
            r.hmset('compute', mapping_dict)
            #acquire bulk user weibo data
            if WEIBO_API_INPUT_TYPE == 0:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                    iter_user_list)
            else:
                user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                    iter_user_list)
            #compute text attribute
            compute_status = test_cron_text_attribute_v2(
                user_keywords_dict, user_weibo_dict, online_pattern_dict,
                character_start_ts)

            if compute_status == True:
                change_status_computed(mapping_dict)
            else:
                change_status_compute_fail(mapping_dict)

            #deal user no weibo to compute portrait attribute
            if len(user_keywords_dict) != len(iter_user_list):
                change_mapping_dict = dict()
                change_user_list = set(iter_user_list) - set(
                    user_keywords_dict.keys())
                for change_user in change_user_list:
                    change_mapping_dict[change_user] = json.dumps(
                        [in_date, '2'])
                r.hmset(change_mapping_dict)

            iter_user_list = []
            mapping_dict = {}

    if iter_user_list != [] and mapping_dict != {}:
        r.hmset('compute', mapping_dict)
        #acquire bulk user weibo date
        if WEIBO_API_INPUT_TYPE == 0:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(
                iter_user_list)
        else:
            user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(
                iter_user_list)
        #compute text attribute
        compute_status = test_cron_text_attribute_v2(user_keywords_dict,
                                                     user_weibo_dict,
                                                     online_pattern_dict,
                                                     character_start_ts)
        if compute_status == True:
            change_status_computed(mapping_dict)
        else:
            change_status_compute_fail(mapping_dict)
        #deal user no weibo to compute portrait attribute
        if len(user_keywords_dict) != len(iter_user_list):
            change_mapping_dict = dict()
            change_user_list = set(iter_user_list) - set(
                user_keywords_dict.keys())
            for change_user in change_user_list:
                change_mapping_dict[change_user] = json.dumps([in_date, '2'])
            r.hmset(change_mapping_dict)
def save_recommentation2redis(date, user_set):
    hash_name = 'recommend'
    date = date.replace('-', '')
    if user_set:
        r.hset(hash_name, date, json.dumps(list(user_set)))
    return 1
예제 #55
0
# every 30 minutes reexecute this program

import sys
import time
import json
import redis
from elasticsearch import Elasticsearch
from text_attribute import compute_attribute
reload(sys)
sys.path.append('./../../')
from global_utils import R_RECOMMENTATION as r
from global_utils import es_sensitive_user_text as es_text
from time_utils import datetime2ts, ts2datetime

date = ts2datetime(time.time()).replace('-', '')
temp = r.hget('compute_now', date)
if temp:
    now_list = json.loads(temp)
    uid_list = []
    count = 0
    for item in now_list:
        uid_list.append(item[0])
    user_weibo_dict = dict()
    # extract user weibo text
    compute_attribute(user_weibo_dict)
    for i in range(now_list):
        uid = now_list[i][0]
        source = now_list[i][1]
        if source == '1':
            r.hset('identify_in_sensitive_' + str(date), uid,
                   '3')  # finish comoute