def executeES(indexName, typeName, listData): current_time = int(time.time()) # indexName += '_' + ts2datetime(current_time) # print 'listData:',listData for list_data in listData: data = {} jsonData = json.loads(list_data) for key, val in jsonData.items(): # print key, '====', val data[key] = val data['update_time'] = current_time print 'indexName', indexName print indexName == 'weibo_feedback_follow' #print indexName if indexName != 'weibo_feedback_group': #print data # xnr_user_no = uid2xnr_user_no(data["root_uid"]) if uid2xnr_user_no(data["uid"]): xnr_user_no = uid2xnr_user_no(data["uid"]) else: xnr_user_no = uid2xnr_user_no(data["root_uid"]) #if not xnr_user_no: # continue #else: # pass #print data["root_uid"] #print data['uid'] try: sensor_mark = judge_sensing_sensor(xnr_user_no, data['uid']) data['sensor_mark'] = sensor_mark trace_follow_mark = judge_trace_follow(xnr_user_no, data['uid']) data['trace_follow_mark'] = trace_follow_mark data['sensitive_info'] = get_sensitive_info( data['timestamp'], data['mid']) data['sensitive_user'] = get_sensitive_user( data['timestamp'], data['uid']) except: pass # else: # print 'group index else' # _id = data["mid"] """ # 旧的关注关系存储方式,弃用。@hanmc 2019-1-16 11:49:50 print 'indexName:', indexName if indexName == 'weibo_feedback_follow': # 修改 _id、保存至fans_followers_es表 print "root_uid", data["root_uid"] _id = data["root_uid"]+'_'+data["mid"] save_type = 'followers' follow_type = 'follow' try: xnr_user_no = uid2xnr_user_no(data["root_uid"]) if xnr_user_no: save_to_fans_follow_ES(xnr_user_no,data["uid"],save_type,follow_type) save_to_redis_fans_follow(xnr_user_no,data["uid"],save_type) except Exception, e: traceback.print_exc(e) # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid']) # data['sensor_mark'] = sensor_mark # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid']) # data['trace_follow_mark'] = trace_follow_mark #print 1111111111111111111111111111111111111111111111111111111 print 'save to es!!!!',es.index(index=indexName, doc_type=typeName, id=_id, body=data) """ # 新的关注关系存储方式 if indexName == 'weibo_feedback_follow': root_uid = data['root_uid'] uid = data['uid'] xnr_user_no = uid2xnr_user_no(root_uid) sex_info = data['sex'] if sex_info == 'male': sex = 1 elif sex_info == 'female': sex = 2 else: sex = 0 user_data = { 'platform': 'weibo', 'xnr_no': xnr_user_no, 'xnr_uid': root_uid, 'uid': uid, 'nickname': data.get('nick_name', ''), 'sex': sex, 'geo': data.get('geo', ''), 'fensi_num': data.get('fans', 0), 'guanzhu_num': data.get('follower', 0), 'photo_url': data.get('photo_url', ''), 'pingtaiguanzhu': 1, } update_result = update_weibo_xnr_relations( root_uid, uid, user_data, update_portrait_info=True) # 新的粉丝关系存储方式 elif indexName == 'weibo_feedback_fans': root_uid = data['root_uid'] uid = data['uid'] xnr_user_no = uid2xnr_user_no(root_uid) sex_info = data['sex'] if sex_info == 'male': sex = 1 elif sex_info == 'female': sex = 2 else: sex = 0 user_data = { 'platform': 'weibo', 'xnr_no': xnr_user_no, 'xnr_uid': root_uid, 'uid': uid, 'nickname': data.get('nick_name', ''), 'sex': sex, 'geo': data.get('geo', ''), 'fensi_num': data.get('fans', 0), 'guanzhu_num': data.get('follower', 0), 'photo_url': data.get('photo_url', ''), 'pingtaifensi': 1, } update_result = update_weibo_xnr_relations( root_uid, uid, user_data, update_portrait_info=True) """ # 旧的关注关系存储方式,弃用。@hanmc 2019-1-17 11:47:08 elif indexName == 'weibo_feedback_fans': _id = data["root_uid"]+'_'+data["mid"] xnr_user_no = uid2xnr_user_no(data["root_uid"]) save_type = 'fans' follow_type = 'follow' if xnr_user_no: save_to_fans_follow_ES(xnr_user_no,data["uid"],save_type,follow_type) save_to_redis_fans_follow(xnr_user_no,data["uid"],save_type) # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid']) # data['sensor_mark'] = sensor_mark # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid']) # data['trace_follow_mark'] = trace_follow_mark try: print 1111111 es.get(index=indexName,doc_type=typeName,id=_id) except: print 'save to es!!!!',es.index(index=indexName, doc_type=typeName, id=_id, body=data) """ # print 'indexName', indexName # print indexName == 'weibo_feedback_comment' elif indexName == 'weibo_feedback_comment': print '+++++++++++++++++++++++++++++++++++++++++++++++' indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) # print 'date!!!!!!!',date_time # print 'indexName_date:::',indexName_date mappings_func = weibo_feedback_comment_mappings _id = data["mid"] # print "_id", _id # print 'comment_id........',_id mappings_func(date_time) #print 'data:::',data #print indexName_date, typeName print 'indexName_date', indexName_date print 'typeName', typeName print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_retweet': indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_retweet_mappings _id = data["mid"] mappings_func(date_time) #print json.dumps(data, ensure_ascii=False) print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_at': # indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_at_mappings _id = data["mid"] mappings_func(date_time) print 'intex: ', indexName_date print 'doc_type: ', typeName print 'id: ', _id print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_like': # indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_like_mappings _id = data["mid"] mappings_func(date_time) print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_private': # indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_private_mappings _id = data["mid"] mappings_func(date_time) print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) else: _id = data["mid"] print 'save to es!!!!', es.index(index=indexName, doc_type=typeName, id=_id, body=data) # print 'data.........',data # print 'indexName....',indexName # print '_id......',_id # #print 'typeName.....',typeName # print 'es...',es #print 'save to es!!!!',es.index(index=indexName, doc_type=typeName, id=_id, body=data) print 'update %s ES done' % indexName
def executeES(indexName, typeName, listData): #current_time = int(time.time()) #indexName += '_' + ts2datetime(current_time) #print 'listData:',listData for list_data in listData: data = {} jsonData = json.loads(list_data) for key, val in jsonData.items(): # print key, '====', val data[key] = val # data['update_time'] = current_time if indexName != 'weibo_feedback_group': xnr_user_no = uid2xnr_user_no(data["root_uid"]) sensor_mark = judge_sensing_sensor(xnr_user_no, data['uid']) data['sensor_mark'] = sensor_mark trace_follow_mark = judge_trace_follow(xnr_user_no, data['uid']) data['trace_follow_mark'] = trace_follow_mark data['sensitive_info'] = get_sensitive_info( data['timestamp'], data['mid']) data['sensitive_user'] = get_sensitive_user( data['timestamp'], data['uid']) if indexName == 'weibo_feedback_follow': # 修改 _id、保存至fans_followers_es表 _id = data["root_uid"] + '_' + data["mid"] xnr_user_no = uid2xnr_user_no(data["root_uid"]) save_type = 'followers' follow_type = 'follow' if xnr_user_no: save_to_fans_follow_ES(xnr_user_no, data["uid"], save_type, follow_type) save_to_redis_fans_follow(xnr_user_no, data["uid"], save_type) # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid']) # data['sensor_mark'] = sensor_mark # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid']) # data['trace_follow_mark'] = trace_follow_mark print 'save to es!!!!', es.index(index=indexName, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_fans': _id = data["root_uid"] + '_' + data["mid"] xnr_user_no = uid2xnr_user_no(data["root_uid"]) save_type = 'fans' follow_type = 'follow' if xnr_user_no: save_to_fans_follow_ES(xnr_user_no, data["uid"], save_type, follow_type) save_to_redis_fans_follow(xnr_user_no, data["uid"], save_type) # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid']) # data['sensor_mark'] = sensor_mark # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid']) # data['trace_follow_mark'] = trace_follow_mark try: es.get(index=indexName, doc_type=typeName, id=_id) except: print 'save to es!!!!', es.index(index=indexName, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_comment': indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) # print 'date!!!!!!!',date_time # print 'indexName_date:::',indexName_date mappings_func = weibo_feedback_comment_mappings _id = data["mid"] # print 'comment_id........',_id mappings_func(date_time) # print 'data:::',data print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_retweet': # indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_retweet_mappings _id = data["mid"] mappings_func(date_time) print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_at': # indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_at_mappings _id = data["mid"] mappings_func(date_time) print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_like': # indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_like_mappings _id = data["mid"] mappings_func(date_time) print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) elif indexName == 'weibo_feedback_private': # indexName += '_' + ts2datetime(data['timestamp']) indexName_date = indexName + '_' + ts2datetime( data['timestamp']) date_time = ts2datetime(data['timestamp']) mappings_func = weibo_feedback_private_mappings _id = data["mid"] mappings_func(date_time) print 'save to es!!!!', es.index(index=indexName_date, doc_type=typeName, id=_id, body=data) else: _id = data["mid"] print 'save to es!!!!', es.index(index=indexName, doc_type=typeName, id=_id, body=data) # print 'data.........',data # print 'indexName....',indexName # print '_id......',_id # #print 'typeName.....',typeName # print 'es...',es # print 'save to es!!!!',es.index(index=indexName, doc_type=typeName, id=_id, body=data) print 'update %s ES done' % indexName
def get_related_recommendation(task_detail): avg_sort_uid_dict = {} xnr_user_no = task_detail['xnr_user_no'] sort_item = task_detail['sort_item'] es_result = es.get(index=weibo_xnr_index_name,doc_type=weibo_xnr_index_type,id=xnr_user_no)['_source'] uid = es_result['uid'] monitor_keywords = es_result['monitor_keywords'] monitor_keywords_list = monitor_keywords.split(',') nest_query_list = [] #print 'monitor_keywords_list::',monitor_keywords_list for monitor_keyword in monitor_keywords_list: #print 'monitor_keyword::::',monitor_keyword nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_keyword+'*'}}) # else: try: recommend_list = es.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,id=xnr_user_no)['_source']['followers_list'] except: recommend_list = [] recommend_set_list = list(set(recommend_list)) if S_TYPE == 'test': current_date = S_DATE else: current_date = ts2datetime(int(time.time()-24*3600)) flow_text_index_name = flow_text_index_name_pre + current_date if sort_item != 'friend': uid_list = [] #uid_list = recommend_set_list if sort_item == 'influence': sort_item = 'user_fansnum' query_body_rec = { 'query':{ 'bool':{ 'should':nest_query_list } }, 'aggs':{ 'uid_list':{ 'terms':{'field':'uid','size':TOP_ACTIVE_SOCIAL,'order':{'avg_sort':'desc'} }, 'aggs':{'avg_sort':{'avg':{'field':sort_item}}} } } } es_rec_result = es_flow_text.search(index=flow_text_index_name,doc_type='text',body=query_body_rec)['aggregations']['uid_list']['buckets'] #print 'es_rec_result///',es_rec_result for item in es_rec_result: uid = item['key'] uid_list.append(uid) avg_sort_uid_dict[uid] = {} if sort_item == 'user_fansnum': avg_sort_uid_dict[uid]['sort_item_value'] = int(item['avg_sort']['value']) else: avg_sort_uid_dict[uid]['sort_item_value'] = round(item['avg_sort']['value'],2) else: if S_TYPE == 'test': uid_list = FRIEND_LIST #sort_item = 'sensitive' else: uid_list = [] ''' friends_list_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':recommend_set_list})['docs'] for result in friends_list_results: friends_list = friends_list + result['friend_list'] ''' friends_list = get_friends_list(recommend_set_list) friends_set_list = list(set(friends_list)) #uid_list = friends_set_list sort_item_new = 'fansnum' query_body_rec = { 'query':{ 'bool':{ 'must':[ {'terms':{'uid':friends_set_list}}, {'bool':{ 'should':nest_query_list }} ] } }, 'aggs':{ 'uid_list':{ 'terms':{'field':'uid','size':TOP_ACTIVE_SOCIAL,'order':{'avg_sort':'desc'} }, 'aggs':{'avg_sort':{'avg':{'field':sort_item_new}}} } } } es_friend_result = es_flow_text.search(index=flow_text_index_name,doc_type='text',body=query_body_rec)['aggregations']['uid_list']['buckets'] for item in es_friend_result: uid = item['key'] uid_list.append(uid) avg_sort_uid_dict[uid] = {} if not item['avg_sort']['value']: avg_sort_uid_dict[uid]['sort_item_value'] = 0 else: avg_sort_uid_dict[uid]['sort_item_value'] = int(item['avg_sort']['value']) results_all = [] for uid in uid_list: #if sort_item == 'friend': query_body = { 'query':{ 'filtered':{ 'filter':{ 'term':{'uid':uid} } } } } es_results = es_user_portrait.search(index=portrait_index_name,doc_type=portrait_index_type,body=query_body)['hits']['hits'] if es_results: #print 'portrait--',es_results[0]['_source'].keys() for item in es_results: uid = item['_source']['uid'] #nick_name,photo_url = uid2nick_name_photo(uid) item['_source']['nick_name'] = uid #nick_name item['_source']['photo_url'] = ''#photo_url weibo_type = judge_follow_type(xnr_user_no,uid) sensor_mark = judge_sensing_sensor(xnr_user_no,uid) item['_source']['weibo_type'] = weibo_type item['_source']['sensor_mark'] = sensor_mark try: del item['_source']['group'] del item['_source']['activity_geo_dict'] except: pass if sort_item == 'friend': if S_TYPE == 'test': item['_source']['fansnum'] = item['_source']['fansnum'] else: item['_source']['fansnum'] = avg_sort_uid_dict[uid]['sort_item_value'] elif sort_item == 'sensitive': item['_source']['sensitive'] = avg_sort_uid_dict[uid]['sort_item_value'] item['_source']['fansnum'] = item['_source']['fansnum'] else: item['_source']['fansnum'] = avg_sort_uid_dict[uid]['sort_item_value'] if S_TYPE == 'test': current_time = datetime2ts(S_DATE) else: current_time = int(time.time()) index_name = get_flow_text_index_list(current_time) query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'uid':uid}}, {'terms':{'message_type':[1,3]}} ] } }, 'sort':{'retweeted':{'order':'desc'}}, 'size':5 } es_weibo_results = es_flow_text.search(index=index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits'] weibo_list = [] for weibo in es_weibo_results: weibo = weibo['_source'] weibo_list.append(weibo) item['_source']['weibo_list'] = weibo_list item['_source']['portrait_status'] = True results_all.append(item['_source']) else: item_else = dict() item_else['uid'] = uid #nick_name,photo_url = uid2nick_name_photo(uid) item_else['nick_name'] = uid#nick_name item_else['photo_url'] = ''#photo_url weibo_type = judge_follow_type(xnr_user_no,uid) sensor_mark = judge_sensing_sensor(xnr_user_no,uid) item_else['weibo_type'] = weibo_type item_else['sensor_mark'] = sensor_mark item_else['portrait_status'] = False #if sort_item != 'friend': #item_else['sort_item_value'] = avg_sort_uid_dict[uid]['sort_item_value'] # else: # item_else['sort_item_value'] = '' if S_TYPE == 'test': current_time = datetime2ts(S_DATE) else: current_time = int(time.time()) index_name = get_flow_text_index_list(current_time) query_body = { 'query':{ 'term':{'uid':uid} }, 'sort':{'retweeted':{'order':'desc'}} } es_weibo_results = es_flow_text.search(index=index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits'] weibo_list = [] for weibo in es_weibo_results: item_else['fansnum'] = weibo['_source']['user_fansnum'] weibo = weibo['_source'] weibo_list.append(weibo) item_else['weibo_list'] = weibo_list item_else['friendsnum'] = 0 item_else['statusnum'] = 0 if sort_item == 'sensitive': item_else['sensitive'] = avg_sort_uid_dict[uid]['sort_item_value'] else: item_else['fansnum'] = avg_sort_uid_dict[uid]['sort_item_value'] results_all.append(item_else) return results_all