"sort": [{"user_index": {"order": "desc"}}]
    }

    result = es_cluster.search(index=index_name,doc_type="bci", body=query_body)["hits"]["hits"]
    sensitive_uid = []
    for item in result:
        sensitive_uid.append(item['_source']['uid'])

    return sensitive_uid



if __name__ == "__main__":
    now_date = ts2datetime(time.time()).replace('-','')
    former_date = ts2datetime(time.time()-7*24*3600).replace('-','')
    r_recommend.hdel('recommend_sensitive', former_date) # delete 7 days ago recommentation uid_list
    r_recommend.hdel('recommend_influence', former_date) # delete 7 days ago recommentation uid_list
    now_date = '20130901' # test
    sensitive_weibo_uid = search_sensitive_weibo(now_date) # sensitive words uid list, direct recommend in
    top_influence_uid = search_top_k(now_date, 10000) # top influence uid list, filter

    # step 1: no sensitive user in top influence
    revise_influence_uid_list = set(top_influence_uid) - set(sensitive_weibo_uid)
    black_uid_list = read_black_user_list()
    revise_influence_uid_list = set(revise_influence_uid_list) - set(black_uid_list)
    print 'filter black list: ', len(revise_influence_uid_list)
    #total = set(sensitive_weibo_uid) | set(top_influence_uid)
    # step 2: no recommending
    sensitive_uid_recommending_filter = filter_recommend(sensitive_weibo_uid)
    top_influence_recommending_filter = filter_recommend(revise_influence_uid_list)
    # step 3: no one in portrait
    }

    result = es_cluster.search(index=index_name,
                               doc_type="bci",
                               body=query_body)["hits"]["hits"]
    sensitive_uid = []
    for item in result:
        sensitive_uid.append(item['_source']['uid'])

    return sensitive_uid


if __name__ == "__main__":
    now_date = ts2datetime(time.time()).replace('-', '')
    former_date = ts2datetime(time.time() - 7 * 24 * 3600).replace('-', '')
    r_recommend.hdel('recommend_sensitive',
                     former_date)  # delete 7 days ago recommentation uid_list
    r_recommend.hdel('recommend_influence',
                     former_date)  # delete 7 days ago recommentation uid_list
    now_date = '20130901'  # test
    sensitive_weibo_uid = search_sensitive_weibo(
        now_date)  # sensitive words uid list, direct recommend in
    top_influence_uid = search_top_k(now_date,
                                     10000)  # top influence uid list, filter

    # step 1: no sensitive user in top influence
    revise_influence_uid_list = set(top_influence_uid) - set(
        sensitive_weibo_uid)
    black_uid_list = read_black_user_list()
    revise_influence_uid_list = set(revise_influence_uid_list) - set(
        black_uid_list)
    print 'filter black list: ', len(revise_influence_uid_list)
from text_attribute import compute_attribute

reload(sys)
sys.path.append('./../../')
from global_utils import R_RECOMMENTATION as r
from global_utils import es_sensitive_user_text as es_text
from time_utils import datetime2ts, ts2datetime

date = ts2datetime(time.time()-24*3600).replace('-', '')
temp = r.hget('compute_appoint', date)
if temp:
    now_list = json.loads(temp)
    uid_list = []
    count = 0
    for item in now_list:
        uid_list.append(item[0])
    user_weibo_dict = dict()
    # extract user weibo text
    compute_attribute(user_weibo_dict)
    for i in range(now_list):
        uid = now_list[i][0]
        source = now_list[i][1]
        if source == '1':
            r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute
        else:
            r.hset('identify_in_influence_'+str(date), uid, '3')

    r.hdel('compute_appoint', date)


from global_utils import es_sensitive_user_text as es_text
from time_utils import datetime2ts, ts2datetime

date = ts2datetime(time.time()).replace('-', '')
temp = r.hget('compute_now', date)
if temp:
    now_list = json.loads(temp)
    uid_list = []
    count = 0
    for item in now_list:
        uid_list.append(item[0])
    user_weibo_dict = dict()
    # extract user weibo text
    compute_attribute(user_weibo_dict)
    for i in range(now_list):
        uid = now_list[i][0]
        source = now_list[i][1]
        if source == '1':
            r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute
        else:
            r.hset('identify_in_influence_'+str(date), uid, '3')

    renow_list = json.loads(r.hget('compute_now', date))
    revise_set = set(renow_list) - set(now_list)
    if revise_set:
        r.hset('compute_now', date)
    else:
        r.hdel('compute_now', date)


Ejemplo n.º 5
0
from global_utils import R_RECOMMENTATION as r
from global_utils import es_sensitive_user_text as es_text
from time_utils import datetime2ts, ts2datetime

date = ts2datetime(time.time()).replace('-', '')
temp = r.hget('compute_now', date)
if temp:
    now_list = json.loads(temp)
    uid_list = []
    count = 0
    for item in now_list:
        uid_list.append(item[0])
    user_weibo_dict = dict()
    # extract user weibo text
    compute_attribute(user_weibo_dict)
    for i in range(now_list):
        uid = now_list[i][0]
        source = now_list[i][1]
        if source == '1':
            r.hset('identify_in_sensitive_' + str(date), uid,
                   '3')  # finish comoute
        else:
            r.hset('identify_in_influence_' + str(date), uid, '3')

    renow_list = json.loads(r.hget('compute_now', date))
    revise_set = set(renow_list) - set(now_list)
    if revise_set:
        r.hset('compute_now', date)
    else:
        r.hdel('compute_now', date)