def filter_recommend(top_user_set): recommend_keys = r.hkeys('recommend') recommend_list = [] for key in recommend_keys: recommend_list.extend(json.loads(r.hget('recommend', key))) results = set(top_user_set) - set(recommend_list) return results
def change_status_computed(mapping_dict): hash_name = 'compute' status = 4 new_mapping_dict = {} for uid in mapping_dict: user_list = json.loads(mapping_dict[uid]) user_list[1] = '4' in_date = user_list[0] new_mapping_dict[uid] = json.dumps(user_list) #revise identify_in_date influence_hashname = 'identify_in_influence_' + str(in_date) sensitive_hashname = 'identify_in_sensitive_' + str(in_date) manual_hashname = "identify_in_manual_" + str(in_date) tmp = r.hget(influence_hashname, uid) tmp1 = r.hget(sensitive_hashname, uid) if tmp: r.hset(influence_hashname, uid, '4') elif tmp1: r.hset(sensitive_hashname, uid, '4') else: r.hset(manual_hashname, uid, '4') r.hmset(hash_name, new_mapping_dict)
def change_status_computed(mapping_dict): hash_name = 'compute' status = 4 new_mapping_dict = {} for uid in mapping_dict: user_list = json.loads(mapping_dict[uid]) user_list[1] = '4' in_date = user_list[0] new_mapping_dict[uid] = json.dumps(user_list) #revise identify_in_date influence_hashname = 'identify_in_influence_'+str(in_date) sensitive_hashname = 'identify_in_sensitive_'+str(in_date) manual_hashname = "identify_in_manual_"+str(in_date) tmp = r.hget(influence_hashname, uid) tmp1 = r.hget(sensitive_hashname, uid) if tmp: r.hset(influence_hashname, uid, '4') elif tmp1: r.hset(sensitive_hashname, uid, '4') else: r.hset(manual_hashname, uid, '4') r.hmset(hash_name, new_mapping_dict)
def change_status_compute_fail(mapping_dict): hash_name = 'compute' status = 1 new_mapping_dict = {} for uid in mapping_dict: user_list = json.loads(mapping_dict[uid]) user_list[1] = '1' new_mapping_dict[uid] = json.dumps(user_list) in_date = user_list[0] #revise identify_in_date influence_hashname = 'identify_in_influence_'+str(in_date) sensitive_hashname = 'identify_in_sensitive_'+str(in_date) tmp = r.hget(influence_hashname, uid) if tmp: r.hset(influence_hashname, uid, '1') else: r.hset(sensitive_hashname, uid, '1') r.hmset(hash_name, new_mapping_dict)
def get_operate_information(): result = dict() now_ts = time.time() date = ts2datetime(now_ts - 24*3600) #test date = '2013-09-07' delete_date = ''.join(date.split('-')) #test #delete_date = '20150727' result['in_count'] = len(r_recomment.hkeys('recomment_'+str(date))) out_count_list = r_recomment.hget('recommend_delete_list', delete_date) #print 'out_count_list:', out_count_list if out_count_list: result['out_count'] = len(json.loads(out_count_list)) else: result['out_count'] = 0 compute_list = r_recomment.hkeys('compute') ''' if compute_list: result['compute'] = len(compute_list) ''' #print 'operate compute:', result return result
def get_operate_information(): result = dict() #run_type if RUN_TYPE == 1: now_ts = time.time() else: now_ts = datetime2ts(RUN_TEST_TIME) date = ts2datetime(now_ts - DAY) delete_date = ''.join(date.split('-')) result['in_count'] = len(r_recomment.hkeys('recomment_' + str(date))) out_count_list = r_recomment.hget('recommend_delete_list', delete_date) if out_count_list: result['out_count'] = len(json.loads(out_count_list)) else: result['out_count'] = 0 compute_list = r_recomment.hkeys('compute') if compute_list: result['compute'] = len(compute_list) else: result['compute'] = 0 return result
def scan_compute_redis(): hash_name = 'compute' results = r.hgetall('compute') iter_user_list = [] mapping_dict = dict() for uid in results: user_list = json.loads(results[uid]) in_date = user_list[0] status = user_list[1] if status == '2': iter_user_list.append(uid) mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing #revise identify_in_date influence_hashname = 'identify_in_influence_'+str(in_date) sensitive_hashname = 'identify_in_sensitive_'+str(in_date) tmp = r.hget(influence_hashname, uid) if tmp: r.hset(influence_hashname, uid, '3') else: r.hset(sensitive_hashname, uid, '3') if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: #mark status from 1 to 3 as identify_compute to computing r.hmset('compute', mapping_dict) #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '2']) r.hmset(change_mapping_dict) iter_user_list = [] mapping_dict = {} if iter_user_list != [] and mapping_dict != {}: r.hmset('compute', mapping_dict) #acquire bulk user weibo date if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '2']) r.hmset(change_mapping_dict)
def scan_compute_redis(): hash_name = 'compute' results = r.hgetall('compute') iter_user_list = [] mapping_dict = dict() for uid in results: user_list = json.loads(results[uid]) in_date = user_list[0] status = user_list[1] if status == '2': iter_user_list.append(uid) mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing #revise identify_in_date influence_hashname = 'identify_in_influence_' + str(in_date) sensitive_hashname = 'identify_in_sensitive_' + str(in_date) manual_hashname = "identify_in_manual_" + str(in_date) tmp = r.hget(influence_hashname, uid) tmp1 = r.hget(sensitive_hashname, uid) if tmp: r.hset(influence_hashname, uid, '3') elif tmp1: r.hset(sensitive_hashname, uid, '3') else: r.hset(manual_hashname, uid, '3') if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: #mark status from 1 to 3 as identify_compute to computing r.hmset('compute', mapping_dict) #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment( iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text( iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2( user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set( user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps( [in_date, '2']) r.hmset(change_mapping_dict) iter_user_list = [] mapping_dict = {} if iter_user_list != [] and mapping_dict != {}: r.hmset('compute', mapping_dict) #acquire bulk user weibo date if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment( iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text( iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set( user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '2']) r.hmset(change_mapping_dict)
# every 30 minutes reexecute this program import sys import time import json import redis from elasticsearch import Elasticsearch from text_attribute import compute_attribute reload(sys) sys.path.append('./../../') from global_utils import R_RECOMMENTATION as r from global_utils import es_sensitive_user_text as es_text from time_utils import datetime2ts, ts2datetime date = ts2datetime(time.time()).replace('-', '') temp = r.hget('compute_now', date) if temp: now_list = json.loads(temp) uid_list = [] count = 0 for item in now_list: uid_list.append(item[0]) user_weibo_dict = dict() # extract user weibo text compute_attribute(user_weibo_dict) for i in range(now_list): uid = now_list[i][0] source = now_list[i][1] if source == '1': r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute else:
sensitive_words = sensitive_dict.keys() else: sensitive_words = [] if sensitive_history_dict.get('fields',0): #print sensitive_history_dict['fields'][sensitive_string][0] #print top_sensitive sensitive_value = math.log(sensitive_history_dict['fields'][sensitive_string][0]/float(top_sensitive)*9+1, 10)*100 #print "sensitive_value", sensitive_value else: sensitive_value = 0 results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words, sensitive_value]) else: results.append([uid, uname, location, fansnum, statusnum, influence]) if auth: hashname_submit = "submit_recomment_" + date tmp_data = json.loads(r.hget(hashname_submit, uid)) recommend_list = (tmp_data['operation']).split('&') admin_list = [] admin_list.append(tmp_data['system']) admin_list.append(list(set(recommend_list))) admin_list.append(len(recommend_list)) return results def get_evaluate_max(index_name): max_result = {} index_type = 'bci' evaluate_index = ['user_index'] for evaluate in evaluate_index: query_body = { 'query':{ 'match_all':{}
import sys import time import json import redis from elasticsearch import Elasticsearch from text_attribute import compute_attribute reload(sys) sys.path.append('./../../') from global_utils import R_RECOMMENTATION as r from global_utils import es_sensitive_user_text as es_text from time_utils import datetime2ts, ts2datetime date = ts2datetime(time.time()-24*3600).replace('-', '') temp = r.hget('compute_appoint', date) if temp: now_list = json.loads(temp) uid_list = [] count = 0 for item in now_list: uid_list.append(item[0]) user_weibo_dict = dict() # extract user weibo text compute_attribute(user_weibo_dict) for i in range(now_list): uid = now_list[i][0] source = now_list[i][1] if source == '1': r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute else:
def scan_compute_redis(): hash_name = 'compute' results = r.hgetall('compute') iter_user_list = [] mapping_dict = dict() identify_in_dict = dict() #test count = 0 for uid in results: user_list = json.loads(results[uid]) in_date = user_list[0] status = user_list[1] if int(status) == 1: #imme #test count += 1 iter_user_list.append(uid) mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing #revise identify_in_date influence_hashname = 'identify_in_influence_'+str(in_date) sensitive_hashname = 'identify_in_sensitive_'+str(in_date) manual_hashname = "identify_in_manual_"+str(in_date) tmp = r.hget(influence_hashname, uid) tmp1 = r.hget(sensitive_hashname, uid) if tmp: r.hset(influence_hashname, uid, '3') elif tmp1: r.hset(sensitive_hashname, uid, '3') else: r.hset(manual_hashname, uid, '3') if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: print iter_user_list r.hmset('compute', mapping_dict) #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #when uid user no weibo at latest week to change compute status to 1 if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '1']) r.hmset('compute', change_mapping_dict) iter_user_list = [] mapping_dict = {} if iter_user_list != [] and mapping_dict != {}: r.hmset('compute', mapping_dict) #acquire bulk user weibo date print 'iter_user_list:', iter_user_list if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute print 'user_weibo_dict:', len(user_weibo_dict) compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #when uid user no weibo at latest week to change compute status to 1 if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '1']) r.hmset('compute', change_mapping_dict)
# every 30 minutes reexecute this program import sys import time import json import redis from elasticsearch import Elasticsearch from text_attribute import compute_attribute reload(sys) sys.path.append('./../../') from global_utils import R_RECOMMENTATION as r from global_utils import es_sensitive_user_text as es_text from time_utils import datetime2ts, ts2datetime date = ts2datetime(time.time()).replace('-', '') temp = r.hget('compute_now', date) if temp: now_list = json.loads(temp) uid_list = [] count = 0 for item in now_list: uid_list.append(item[0]) user_weibo_dict = dict() # extract user weibo text compute_attribute(user_weibo_dict) for i in range(now_list): uid = now_list[i][0] source = now_list[i][1] if source == '1': r.hset('identify_in_sensitive_' + str(date), uid, '3') # finish comoute