def main(): RUN_TYPE = 0 if RUN_TYPE == 1: now_ts = time.time() else: now_ts = datetime2ts(RUN_TEST_TIME) now_ts = datetime2ts('2013-09-02') date = ts2datetime(now_ts - DAY) # auto recommendation: step 1:4 #step1: read from top es_daily_rank top_user_set, user_dict = search_from_es(date) #step2: filter black_uid black_user_set = read_black_user() subtract_user_set = top_user_set - black_user_set #step3: filter users have been in subtract_user_set = list(subtract_user_set) candidate_results = filter_in(subtract_user_set) #step4: filter rules about ip count& reposts/bereposts count&activity count results = filter_rules(candidate_results) #step5: get sensitive user sensitive_user = list(get_sensitive_user(date)) results = results - set(sensitive_user) # influence user - sensitive user new_date = ts2datetime(now_ts) hashname_influence = "recomment_" + new_date + "_influence" if results: for uid in results: #print uid r.hset(hashname_influence, uid, "0") hashname_sensitive = "recomment_" + new_date + "_sensitive" if sensitive_user: for uid in sensitive_user: #print "sensitive" r.hset(hashname_sensitive, uid, "0") """
def save_results(save_type, user, recomment_results): save_mark = False #run_type if RUN_TYPE == 1: now_date = ts2datetime(time.time()) else: now_date = ts2datetime(datetime2ts(RUN_TEST_TIME)) recomment_hash_name = 'recomment_' + now_date + '_auto' #print 'save operation results' R_RECOMMENTATION.hset(recomment_hash_name, user, json.dumps(recomment_results)) save_mark = True return save_mark
def save_results(save_type, recomment_results): save_mark = False #run_type if RUN_TYPE == 1: now_date = ts2datetime(time.time()) else: now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY) recomment_hash_name = 'recomment_' + now_date + '_auto' if save_type == 'hotspot': #print 'save hotspot results' R_RECOMMENTATION.hset(recomment_hash_name, 'auto', json.dumps(recomment_results)) save_mark = True elif save_type == 'operation': #print 'save operation results' R_RECOMMENTATION.hmset(recomment_hash_name, recomment_results) save_mark = True return save_mark
def change_status_computed(mapping_dict): hash_name = 'compute' status = 4 new_mapping_dict = {} for uid in mapping_dict: user_list = json.loads(mapping_dict[uid]) user_list[1] = '4' in_date = user_list[0] new_mapping_dict[uid] = json.dumps(user_list) #revise identify_in_date influence_hashname = 'identify_in_influence_'+str(in_date) sensitive_hashname = 'identify_in_sensitive_'+str(in_date) tmp = r.hget(influence_hashname, uid) if tmp: r.hset(influence_hashname, uid, '4') else: r.hset(sensitive_hashname, uid, '4') r.hmset(hash_name, new_mapping_dict)
def main(): #run_type if RUN_TYPE == 1: now_ts = time.time() else: now_ts = datetime2ts(RUN_TEST_TIME) date = ts2datetime(now_ts - DAY) print date # auto recommendation: step 1:4 #step1: read from top es_daily_rank top_user_set, user_dict = search_from_es(date) #step2: filter black_uid black_user_set = read_black_user() subtract_user_set = top_user_set - black_user_set #step3: filter users have been in subtract_user_set = list(subtract_user_set) candidate_results = filter_in(subtract_user_set) #step4: filter rules about ip count& reposts/bereposts count&activity count results = filter_rules(candidate_results) new_date = ts2datetime(now_ts) hashname_influence = "recomment_" + new_date + "_influence" if results: for uid in results: r.hset(hashname_influence, uid, "0") #step5: get sensitive user print date,'date' sensitive_user = list(get_sensitive_user(date)) hashname_sensitive = "recomment_" + new_date + "_sensitive" if sensitive_user: for uid in sensitive_user: print uid, hashname_sensitive r.hset(hashname_sensitive, uid, "0") results.extend(sensitive_user) results = set(results) # step6: write to recommentation csv/redis ++for super admin hashname_submit = "submit_recomment_" + new_date if results: for uid in results: r.hset(hashname_submit, uid, json.dumps({"system":1, "operation":"system"}))
def change_status_computed(mapping_dict): hash_name = 'compute' status = 4 new_mapping_dict = {} for uid in mapping_dict: user_list = json.loads(mapping_dict[uid]) user_list[1] = '4' in_date = user_list[0] new_mapping_dict[uid] = json.dumps(user_list) #revise identify_in_date influence_hashname = 'identify_in_influence_' + str(in_date) sensitive_hashname = 'identify_in_sensitive_' + str(in_date) manual_hashname = "identify_in_manual_" + str(in_date) tmp = r.hget(influence_hashname, uid) tmp1 = r.hget(sensitive_hashname, uid) if tmp: r.hset(influence_hashname, uid, '4') elif tmp1: r.hset(sensitive_hashname, uid, '4') else: r.hset(manual_hashname, uid, '4') r.hmset(hash_name, new_mapping_dict)
def change_status_compute_fail(mapping_dict): hash_name = 'compute' status = 1 new_mapping_dict = {} for uid in mapping_dict: user_list = json.loads(mapping_dict[uid]) user_list[1] = '1' new_mapping_dict[uid] = json.dumps(user_list) in_date = user_list[0] #revise identify_in_date influence_hashname = 'identify_in_influence_'+str(in_date) sensitive_hashname = 'identify_in_sensitive_'+str(in_date) manual_hashname = "identify_in_manual_"+str(in_date) tmp = r.hget(influence_hashname, uid) tmp1 = r.hget(sensitive_hashname, uid) if tmp: r.hset(influence_hashname, uid, '1') elif tmp1: r.hset(sensitive_hashname, uid, '1') else: r.hset(manual_hashname, uid, '1') r.hmset(hash_name, new_mapping_dict)
def main(): #run_type if RUN_TYPE == 1: now_ts = time.time() else: now_ts = datetime2ts(RUN_TEST_TIME) date = ts2datetime(now_ts - DAY) # auto recommendation: step 1:4 #step1: read from top es_daily_rank top_user_set, user_dict = search_from_es(date) #step2: filter black_uid black_user_set = read_black_user() subtract_user_set = top_user_set - black_user_set #step3: filter users have been in subtract_user_set = list(subtract_user_set) candidate_results = filter_in(subtract_user_set) #step4: filter rules about ip count& reposts/bereposts count&activity count results = filter_rules(candidate_results) new_date = ts2datetime(now_ts) hashname_influence = "recomment_" + new_date + "_influence" if results: for uid in results: r.hset(hashname_influence, uid, "0") #step5: get sensitive user sensitive_user = list(get_sensitive_user(date)) hashname_sensitive = "recomment_" + new_date + "_sensitive" if sensitive_user: for uid in sensitive_user: r.hset(hashname_sensitive, uid, "0") results.extend(sensitive_user) results = set(results) #step6: write to recommentation csv/redis hashname_submit = "submit_recomment_" + new_date if results: for uid in results: r.hset(hashname_submit, uid, json.dumps({"system":1, "operation":"system"}))
def scan_compute_redis(): hash_name = 'compute' results = r.hgetall('compute') iter_user_list = [] mapping_dict = dict() for uid in results: user_list = json.loads(results[uid]) in_date = user_list[0] status = user_list[1] if status == '2': iter_user_list.append(uid) mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing #revise identify_in_date influence_hashname = 'identify_in_influence_'+str(in_date) sensitive_hashname = 'identify_in_sensitive_'+str(in_date) tmp = r.hget(influence_hashname, uid) if tmp: r.hset(influence_hashname, uid, '3') else: r.hset(sensitive_hashname, uid, '3') if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: #mark status from 1 to 3 as identify_compute to computing r.hmset('compute', mapping_dict) #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '2']) r.hmset(change_mapping_dict) iter_user_list = [] mapping_dict = {} if iter_user_list != [] and mapping_dict != {}: r.hmset('compute', mapping_dict) #acquire bulk user weibo date if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '2']) r.hmset(change_mapping_dict)
def scan_compute_redis(): hash_name = 'compute' results = r.hgetall('compute') iter_user_list = [] mapping_dict = dict() for uid in results: user_list = json.loads(results[uid]) in_date = user_list[0] status = user_list[1] if status == '2': iter_user_list.append(uid) mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing #revise identify_in_date influence_hashname = 'identify_in_influence_' + str(in_date) sensitive_hashname = 'identify_in_sensitive_' + str(in_date) manual_hashname = "identify_in_manual_" + str(in_date) tmp = r.hget(influence_hashname, uid) tmp1 = r.hget(sensitive_hashname, uid) if tmp: r.hset(influence_hashname, uid, '3') elif tmp1: r.hset(sensitive_hashname, uid, '3') else: r.hset(manual_hashname, uid, '3') if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: #mark status from 1 to 3 as identify_compute to computing r.hmset('compute', mapping_dict) #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment( iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text( iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2( user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set( user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps( [in_date, '2']) r.hmset(change_mapping_dict) iter_user_list = [] mapping_dict = {} if iter_user_list != [] and mapping_dict != {}: r.hmset('compute', mapping_dict) #acquire bulk user weibo date if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment( iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text( iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set( user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '2']) r.hmset(change_mapping_dict)
def save_recommentation2redis(date, user_set): hash_name = 'recomment_' + str(date) status = 0 for uid in user_set: r.hset(hash_name, uid, status) return True
def save_recommentation2redis(date, user_set): hash_name = 'recomment_'+str(date) status = 0 for uid in user_set: r.hset(hash_name, uid, status) return True
from text_attribute import compute_attribute reload(sys) sys.path.append('./../../') from global_utils import R_RECOMMENTATION as r from global_utils import es_sensitive_user_text as es_text from time_utils import datetime2ts, ts2datetime date = ts2datetime(time.time()-24*3600).replace('-', '') temp = r.hget('compute_appoint', date) if temp: now_list = json.loads(temp) uid_list = [] count = 0 for item in now_list: uid_list.append(item[0]) user_weibo_dict = dict() # extract user weibo text compute_attribute(user_weibo_dict) for i in range(now_list): uid = now_list[i][0] source = now_list[i][1] if source == '1': r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute else: r.hset('identify_in_influence_'+str(date), uid, '3') r.hdel('compute_appoint', date)
def save_result(results): hash_name = 'overview' for item in results: r_recomment.hset(hash_name, item, results[item]) return True
# step 1: no sensitive user in top influence revise_influence_uid_list = set(top_influence_uid) - set(sensitive_weibo_uid) black_uid_list = read_black_user_list() revise_influence_uid_list = set(revise_influence_uid_list) - set(black_uid_list) print 'filter black list: ', len(revise_influence_uid_list) #total = set(sensitive_weibo_uid) | set(top_influence_uid) # step 2: no recommending sensitive_uid_recommending_filter = filter_recommend(sensitive_weibo_uid) top_influence_recommending_filter = filter_recommend(revise_influence_uid_list) # step 3: no one in portrait sensitive_uid_in_filter = filter_in(sensitive_uid_recommending_filter) top_influence_in_filter = filter_in(top_influence_recommending_filter) print len(sensitive_uid_in_filter) print len(top_influence_in_filter) top_influence_filter_result = filter_rules(top_influence_in_filter) if sensitive_uid_in_filter: r_recommend.hset('recommend_sensitive', now_date, json.dumps(sensitive_uid_in_filter)) else: r_recommend.hset('recommend_sensitive', now_date, '0') if top_influence_filter_result: r_recommend.hset('recommend_influence', now_date, json.dumps(top_influence_in_filter)) else: r_recommend.hset('recommend_influence', now_date, '0')
def save_recommentation2redis(date, user_set): hash_name = 'recommend' date = date.replace('-','') if user_set: r.hset(hash_name, date, json.dumps(list(user_set))) return 1
from global_utils import es_sensitive_user_text as es_text from time_utils import datetime2ts, ts2datetime date = ts2datetime(time.time()).replace('-', '') temp = r.hget('compute_now', date) if temp: now_list = json.loads(temp) uid_list = [] count = 0 for item in now_list: uid_list.append(item[0]) user_weibo_dict = dict() # extract user weibo text compute_attribute(user_weibo_dict) for i in range(now_list): uid = now_list[i][0] source = now_list[i][1] if source == '1': r.hset('identify_in_sensitive_'+str(date), uid, '3') # finish comoute else: r.hset('identify_in_influence_'+str(date), uid, '3') renow_list = json.loads(r.hget('compute_now', date)) revise_set = set(renow_list) - set(now_list) if revise_set: r.hset('compute_now', date) else: r.hdel('compute_now', date)
sensitive_weibo_uid) black_uid_list = read_black_user_list() revise_influence_uid_list = set(revise_influence_uid_list) - set( black_uid_list) print 'filter black list: ', len(revise_influence_uid_list) #total = set(sensitive_weibo_uid) | set(top_influence_uid) # step 2: no recommending sensitive_uid_recommending_filter = filter_recommend(sensitive_weibo_uid) top_influence_recommending_filter = filter_recommend( revise_influence_uid_list) # step 3: no one in portrait sensitive_uid_in_filter = filter_in(sensitive_uid_recommending_filter) top_influence_in_filter = filter_in(top_influence_recommending_filter) print len(sensitive_uid_in_filter) print len(top_influence_in_filter) top_influence_filter_result = filter_rules(top_influence_in_filter) if sensitive_uid_in_filter: r_recommend.hset('recommend_sensitive', now_date, json.dumps(sensitive_uid_in_filter)) else: r_recommend.hset('recommend_sensitive', now_date, '0') if top_influence_filter_result: r_recommend.hset('recommend_influence', now_date, json.dumps(top_influence_in_filter)) else: r_recommend.hset('recommend_influence', now_date, '0')
def save_recommentation2redis(date, user_set): hash_name = 'recommend' date = date.replace('-', '') if user_set: r.hset(hash_name, date, json.dumps(list(user_set))) return 1
def scan_compute_redis(): hash_name = 'compute' results = r.hgetall('compute') iter_user_list = [] mapping_dict = dict() identify_in_dict = dict() #test count = 0 for uid in results: user_list = json.loads(results[uid]) in_date = user_list[0] status = user_list[1] if int(status) == 1: #imme #test count += 1 iter_user_list.append(uid) mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing #revise identify_in_date influence_hashname = 'identify_in_influence_'+str(in_date) sensitive_hashname = 'identify_in_sensitive_'+str(in_date) manual_hashname = "identify_in_manual_"+str(in_date) tmp = r.hget(influence_hashname, uid) tmp1 = r.hget(sensitive_hashname, uid) if tmp: r.hset(influence_hashname, uid, '3') elif tmp1: r.hset(sensitive_hashname, uid, '3') else: r.hset(manual_hashname, uid, '3') if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: print iter_user_list r.hmset('compute', mapping_dict) #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #when uid user no weibo at latest week to change compute status to 1 if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '1']) r.hmset('compute', change_mapping_dict) iter_user_list = [] mapping_dict = {} if iter_user_list != [] and mapping_dict != {}: r.hmset('compute', mapping_dict) #acquire bulk user weibo date print 'iter_user_list:', iter_user_list if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute print 'user_weibo_dict:', len(user_weibo_dict) compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #when uid user no weibo at latest week to change compute status to 1 if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '1']) r.hmset('compute', change_mapping_dict)