"sort": [{"user_index": {"order": "desc"}}] } result = es_cluster.search(index=index_name,doc_type="bci", body=query_body)["hits"]["hits"] sensitive_uid = [] for item in result: sensitive_uid.append(item['_source']['uid']) return sensitive_uid if __name__ == "__main__": now_date = ts2datetime(time.time()).replace('-','') former_date = ts2datetime(time.time()-7*24*3600).replace('-','') r_recommend.hdel('recommend_sensitive', former_date) # delete 7 days ago recommentation uid_list r_recommend.hdel('recommend_influence', former_date) # delete 7 days ago recommentation uid_list now_date = '20130901' # test sensitive_weibo_uid = search_sensitive_weibo(now_date) # sensitive words uid list, direct recommend in top_influence_uid = search_top_k(now_date, 10000) # top influence uid list, filter # step 1: no sensitive user in top influence revise_influence_uid_list = set(top_influence_uid) - set(sensitive_weibo_uid) black_uid_list = read_black_user_list() revise_influence_uid_list = set(revise_influence_uid_list) - set(black_uid_list) print 'filter black list: ', len(revise_influence_uid_list) #total = set(sensitive_weibo_uid) | set(top_influence_uid) # step 2: no recommending sensitive_uid_recommending_filter = filter_recommend(sensitive_weibo_uid) top_influence_recommending_filter = filter_recommend(revise_influence_uid_list) # step 3: no one in portrait
} result = es_cluster.search(index=index_name, doc_type="bci", body=query_body)["hits"]["hits"] sensitive_uid = [] for item in result: sensitive_uid.append(item['_source']['uid']) return sensitive_uid if __name__ == "__main__": now_date = ts2datetime(time.time()).replace('-', '') former_date = ts2datetime(time.time() - 7 * 24 * 3600).replace('-', '') r_recommend.hdel('recommend_sensitive', former_date) # delete 7 days ago recommentation uid_list r_recommend.hdel('recommend_influence', former_date) # delete 7 days ago recommentation uid_list now_date = '20130901' # test sensitive_weibo_uid = search_sensitive_weibo( now_date) # sensitive words uid list, direct recommend in top_influence_uid = search_top_k(now_date, 10000) # top influence uid list, filter # step 1: no sensitive user in top influence revise_influence_uid_list = set(top_influence_uid) - set( sensitive_weibo_uid) black_uid_list = read_black_user_list() revise_influence_uid_list = set(revise_influence_uid_list) - set( black_uid_list) print 'filter black list: ', len(revise_influence_uid_list)
from text_attribute import compute_attribute reload(sys) sys.path.append('./../../') from global_utils import R_RECOMMENTATION as r from global_utils import es_sensitive_user_text as es_text from time_utils import datetime2ts, ts2datetime date = ts2datetime(time.time()-24*3600).replace('-', '') temp = r.hget('compute_appoint', date) if temp: now_list = json.loads(temp) uid_list = [] count = 0 for item in now_list: uid_list.append(item[0]) user_weibo_dict = dict() # extract user weibo text compute_attribute(user_weibo_dict) for i in range(now_list): uid = now_list[i][0] source = now_list[i][1] if source == '1': r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute else: r.hset('identify_in_influence_'+str(date), uid, '3') r.hdel('compute_appoint', date)
from global_utils import es_sensitive_user_text as es_text from time_utils import datetime2ts, ts2datetime date = ts2datetime(time.time()).replace('-', '') temp = r.hget('compute_now', date) if temp: now_list = json.loads(temp) uid_list = [] count = 0 for item in now_list: uid_list.append(item[0]) user_weibo_dict = dict() # extract user weibo text compute_attribute(user_weibo_dict) for i in range(now_list): uid = now_list[i][0] source = now_list[i][1] if source == '1': r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute else: r.hset('identify_in_influence_'+str(date), uid, '3') renow_list = json.loads(r.hget('compute_now', date)) revise_set = set(renow_list) - set(now_list) if revise_set: r.hset('compute_now', date) else: r.hdel('compute_now', date)
from global_utils import R_RECOMMENTATION as r from global_utils import es_sensitive_user_text as es_text from time_utils import datetime2ts, ts2datetime date = ts2datetime(time.time()).replace('-', '') temp = r.hget('compute_now', date) if temp: now_list = json.loads(temp) uid_list = [] count = 0 for item in now_list: uid_list.append(item[0]) user_weibo_dict = dict() # extract user weibo text compute_attribute(user_weibo_dict) for i in range(now_list): uid = now_list[i][0] source = now_list[i][1] if source == '1': r.hset('identify_in_sensitive_' + str(date), uid, '3') # finish comoute else: r.hset('identify_in_influence_' + str(date), uid, '3') renow_list = json.loads(r.hget('compute_now', date)) revise_set = set(renow_list) - set(now_list) if revise_set: r.hset('compute_now', date) else: r.hdel('compute_now', date)