def scan_compute_redis(): hash_name = 'compute' results = r.hgetall('compute') iter_user_list = [] mapping_dict = dict() for uid in results: user_list = json.loads(results[uid]) in_date = user_list[0] status = user_list[1] if status == '2': iter_user_list.append(uid) mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing #revise identify_in_date influence_hashname = 'identify_in_influence_'+str(in_date) sensitive_hashname = 'identify_in_sensitive_'+str(in_date) tmp = r.hget(influence_hashname, uid) if tmp: r.hset(influence_hashname, uid, '3') else: r.hset(sensitive_hashname, uid, '3') if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: #mark status from 1 to 3 as identify_compute to computing r.hmset('compute', mapping_dict) #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '2']) r.hmset(change_mapping_dict) iter_user_list = [] mapping_dict = {} if iter_user_list != [] and mapping_dict != {}: r.hmset('compute', mapping_dict) #acquire bulk user weibo date if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '2']) r.hmset(change_mapping_dict)
def deal_bulk_action(user_info_list, fansnum_max): start_ts = time.time() uid_list = user_info_list.keys() #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(uid_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(uid_list) #compute attribute--keywords, topic, online_pattern #get user topic results by bulk action topic_results_dict, topic_results_label = topic_classfiy(uid_list, user_keywords_dict) #get bulk action bulk_action = [] for uid in uid_list: results = {} results['uid'] = uid #add user topic attribute user_topic_dict = topic_results_dict[uid] user_label_dict = topic_results_label[uid] results['topic'] = json.dumps(user_topic_dict) results['topic_string'] = topic_en2ch(user_label_dict) #add user keywords attribute keywords_dict = user_keywords_dict[uid] keywords_top50 = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True)[:50] keywords_top50_string = '&'.join([keyword_item[0] for keyword_item in keywords_top50]) results['keywords'] = json.dumps(keywords_top50) results['keywords_string'] = keywords_top50_string #add online_pattern user_online_pattern = json.dumps(online_pattern_dict[uid]) results['online_pattern'] = user_online_pattern try: results['online_pattern_aggs'] = '&'.join(user_online_pattern.keys()) except: results['online_pattern_aggs'] = '' #add user importance user_domain = user_info_list[uid]['domain'].encode('utf-8') user_fansnum = user_info_list[uid]['fansnum'] results['importance'] = get_importance(user_domain, results['topic_string'], user_fansnum, fansnum_max) #bulk action action = {'update':{'_id': uid}} bulk_action.extend([action, {'doc': results}]) es_user_portrait.bulk(bulk_action, index=portrait_index_name, doc_type=portrait_index_type) end_ts = time.time() #log_should_delete #print '%s sec count %s' % (end_ts - start_ts, len(uid_list)) #log_should_delete start_ts = end_ts
def scan_compute_redis(): hash_name = 'compute' results = r.hgetall('compute') iter_user_list = [] mapping_dict = dict() for uid in results: user_list = json.loads(results[uid]) in_date = user_list[0] status = user_list[1] if status == '2': #imme iter_user_list.append(uid) mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: r.hmset('compute', mapping_dict) #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) iter_user_list = [] mapping_dict = {} if iter_user_list != [] and mapping_dict != {}: r.mset('compute', mapping_dict) #acquire bulk user weibo date if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict)
def scan_compute_redis(): iter_user_list = [] mapping_dict = dict() #test count = 0 while 1: uid = r.rpop("uid_list") #用户列表 print uid count += 1 if not uid: break iter_user_list.append(uid) if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: #acquire bulk user weibo data out_list = es_km_storage(iter_uid_list) if out_list: iter_uid_list = out_list if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment( iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text( iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2( user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: print "finish iteration" else: for uid in iter_user_list: r.lpush("uid_list", uid) #when uid user no weibo at latest week to change compute status to 1 if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set( user_keywords_dict.keys()) for change_user in change_user_list: r.lpush("uid_list", change_user) iter_user_list = [] mapping_dict = {} if iter_user_list != []: #acquire bulk user weibo date print 'iter_user_list:', len(iter_user_list) out_list = es_km_storage(iter_user_list) iter_user_list = out_list if iter_user_list: if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment( iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text( iter_user_list) #compute text attribute print 'user_weibo_dict:', len(user_weibo_dict) compute_status = test_cron_text_attribute_v2( user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: print "finish iteration" else: for uid in iter_user_list: r.lpush("uid_list", uid) #when uid user no weibo at latest week to change compute status to 1 if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set( user_keywords_dict.keys()) for change_user in change_user_list: r.lpush("uid_list", change_user)
def scan_compute_redis(): hash_name = 'compute' results = r.hgetall('compute') iter_user_list = [] mapping_dict = dict() #test count = 0 for uid in results: user_list = json.loads(results[uid]) in_date = user_list[0] status = user_list[1] if status == '1': #imme #test count += 1 iter_user_list.append(uid) mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: r.hmset('compute', mapping_dict) #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #when uid user no weibo at latest week to change compute status to 1 if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '1']) r.hmset('compute', change_mapping_dict) iter_user_list = [] mapping_dict = {} if iter_user_list != [] and mapping_dict != {}: r.hmset('compute', mapping_dict) #acquire bulk user weibo date print 'iter_user_list:', len(iter_user_list) if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_user_list) #compute text attribute print 'user_weibo_dict:', len(user_weibo_dict) compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #when uid user no weibo at latest week to change compute status to 1 if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '1']) r.hmset('compute', change_mapping_dict)
def scan_compute_redis(): hash_name = 'compute' results = r.hgetall('compute') iter_user_list = [] mapping_dict = dict() for uid in results: user_list = json.loads(results[uid]) in_date = user_list[0] status = user_list[1] if status == '2': iter_user_list.append(uid) mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing #revise identify_in_date influence_hashname = 'identify_in_influence_' + str(in_date) sensitive_hashname = 'identify_in_sensitive_' + str(in_date) manual_hashname = "identify_in_manual_" + str(in_date) tmp = r.hget(influence_hashname, uid) tmp1 = r.hget(sensitive_hashname, uid) if tmp: r.hset(influence_hashname, uid, '3') elif tmp1: r.hset(sensitive_hashname, uid, '3') else: r.hset(manual_hashname, uid, '3') if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: #mark status from 1 to 3 as identify_compute to computing r.hmset('compute', mapping_dict) #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment( iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text( iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2( user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set( user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps( [in_date, '2']) r.hmset(change_mapping_dict) iter_user_list = [] mapping_dict = {} if iter_user_list != [] and mapping_dict != {}: r.hmset('compute', mapping_dict) #acquire bulk user weibo date if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment( iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text( iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) #deal user no weibo to compute portrait attribute if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set( user_keywords_dict.keys()) for change_user in change_user_list: change_mapping_dict[change_user] = json.dumps([in_date, '2']) r.hmset(change_mapping_dict)
def scan_compute_redis(): hash_name = 'compute' results = r.hgetall('compute') iter_user_list = [] mapping_dict = dict() #test count = 0 for uid in results: user_list = json.loads(results[uid]) in_date = user_list[0] status = user_list[1] if status == '2': #imme #test count += 1 if count >= 3: break iter_user_list.append(uid) mapping_dict[uid] = json.dumps([in_date, '3']) # mark status:3 computing if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: r.hmset('compute', mapping_dict) #acquire bulk user weibo data if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment( iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text( iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2( user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict) iter_user_list = [] mapping_dict = {} if iter_user_list != [] and mapping_dict != {}: r.hmset('compute', mapping_dict) #acquire bulk user weibo date if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment( iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text( iter_user_list) #compute text attribute compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: change_status_computed(mapping_dict) else: change_status_compute_fail(mapping_dict)
def scan_compute_redis(): in_portrait_list = [] new_portrait_list = [] non_portrait_list = [] task_detail = r.rpop("user_portrait_task") if not task_detail: sys.exit(0) task_detail = json.loads(task_detail) task_name = task_detail[0] task_time = task_detail[1] task_uid_list = task_detail[2] iter_user_list = [] mapping_dict = dict() #test count = 0 for uid in task_uid_list: count += 1 iter_user_list.append(uid) if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: #acquire bulk user weibo data in_list, out_list = es_km_storage(iter_uid_list) in_portrait_list.extend(in_list) if out_list: iter_uid_list = out_list if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment(iter_uid_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text(iter_uid_list) #compute text attribute iter_in_list = user_keywords_dict.keys() compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: new_portrait_list.extend(iter_in_list) print "finish iteration" non_in = list(set(iter_uid_list) - set(iter_in_list)) non_portrait_list.extend(non_in) else: non_portrait_list.extend(iter_in_list) #when uid user no weibo at latest week to change compute status to 1 """ if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: r.lpush("uid_list",change_user) """ iter_user_list = [] mapping_dict = {} if iter_user_list != []: #acquire bulk user weibo date print 'iter_user_list:', len(iter_user_list) in_list, out_list = es_km_storage(iter_user_list) in_portrait_list.extend(in_list) iter_user_list = out_list if iter_user_list: print iter_user_list[0][0], type(iter_user_list) if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts, filter_keywords_dict = read_flow_text_sentiment(iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts,filter_keywords_dict = read_flow_text(iter_user_list) #compute text attribute print 'user_weibo_dict:', len(user_weibo_dict) iter_in_list = user_keywords_dict.keys() compute_status = test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status==True: new_portrait_list.extend(iter_in_list) print "finish iteration" non_in = list(set(iter_user_list) - set(iter_in_list)) non_portrait_list.extend(non_in) else: non_portrait_list.extend(iter_in_list) #when uid user no weibo at latest week to change compute status to 1 """ if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: r.lpush("uid_list",change_user) """ results = dict() results["task_name"] = task_name results["task_time"] = task_time results["in_portrait_list"] = json.dumps(in_portrait_list) results["new_in_list"] = json.dumps(new_portrait_list) results["not_in_list"] = json.dumps(non_portrait_list) es_km.index(index="user_portrait_task_results", doc_type="user", id=task_name, body=results) try: es_km.update(index="user_status", doc_type="user", id=task_name, body={"doc":{"status": "2"}})["_source"] except Exception: print Exception
def scan_compute_redis(): in_portrait_list = [] new_portrait_list = [] non_portrait_list = [] task_detail = r.rpop("user_portrait_task") if not task_detail: sys.exit(0) task_detail = json.loads(task_detail) task_name = task_detail[0] task_time = task_detail[1] task_uid_list = task_detail[2] iter_user_list = [] mapping_dict = dict() #test count = 0 for uid in task_uid_list: count += 1 iter_user_list.append(uid) if len(iter_user_list) % 100 == 0 and len(iter_user_list) != 0: #acquire bulk user weibo data in_list, out_list = es_km_storage(iter_uid_list) in_portrait_list.extend(in_list) if out_list: iter_uid_list = out_list if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text_sentiment( iter_uid_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts = read_flow_text( iter_uid_list) #compute text attribute iter_in_list = user_keywords_dict.keys() compute_status = test_cron_text_attribute_v2( user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: new_portrait_list.extend(iter_in_list) print "finish iteration" non_in = list(set(iter_uid_list) - set(iter_in_list)) non_portrait_list.extend(non_in) else: non_portrait_list.extend(iter_in_list) #when uid user no weibo at latest week to change compute status to 1 """ if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: r.lpush("uid_list",change_user) """ iter_user_list = [] mapping_dict = {} if iter_user_list != []: #acquire bulk user weibo date print 'iter_user_list:', len(iter_user_list) in_list, out_list = es_km_storage(iter_user_list) in_portrait_list.extend(in_list) iter_user_list = out_list if iter_user_list: print iter_user_list[0][0], type(iter_user_list) if WEIBO_API_INPUT_TYPE == 0: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts, filter_keywords_dict = read_flow_text_sentiment( iter_user_list) else: user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts, filter_keywords_dict = read_flow_text( iter_user_list) #compute text attribute print 'user_weibo_dict:', len(user_weibo_dict) iter_in_list = user_keywords_dict.keys() compute_status = test_cron_text_attribute_v2( user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts) if compute_status == True: new_portrait_list.extend(iter_in_list) print "finish iteration" non_in = list(set(iter_user_list) - set(iter_in_list)) non_portrait_list.extend(non_in) else: non_portrait_list.extend(iter_in_list) #when uid user no weibo at latest week to change compute status to 1 """ if len(user_keywords_dict) != len(iter_user_list): change_mapping_dict = dict() change_user_list = set(iter_user_list) - set(user_keywords_dict.keys()) for change_user in change_user_list: r.lpush("uid_list",change_user) """ results = dict() results["task_name"] = task_name results["task_time"] = task_time results["in_portrait_list"] = json.dumps(in_portrait_list) results["new_in_list"] = json.dumps(new_portrait_list) results["not_in_list"] = json.dumps(non_portrait_list) es_km.index(index="user_portrait_task_results", doc_type="user", id=task_name, body=results) try: es_km.update(index="user_status", doc_type="user", id=task_name, body={"doc": { "status": "2" }})["_source"] except Exception: print Exception