def all_makeup_info(id , sort_norm , time): item = {} query = {"query":{"bool":{"must":[{"term":{"user.uid":id}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{},"fields":["uid","nick_name","user_location","fansnum","statusnum"]} result = es.search(index=WEBUSER_INDEX_NAME , doc_type=WEBUSER_INDEX_TYPE , body=query)['hits'] if result['total'] != 0 : item['uid'] = result['hits'][0]['fields']['uid'][0] item['fans'] = result['hits'][0]['fields']['fansnum'][0] item['location'] = result['hits'][0]['fields']['user_location'][0] item['uname'] = result['hits'][0]['fields']['nick_name'][0] item['weibo_count'] = result['hits'][0]['fields']['statusnum'][0] else : item['uid'] = None item['fans'] = None item['location'] = None item['uname'] = None item['weibo_count'] = None item['uid'] = id query = {"query":{"bool":{"must":[{"term":{"user.uid":id}}],"must_not":[],"should":[]}},"size":10,"sort":[],"facets":{},"fields":[]} result = es.search(index=USER_INDEX_NAME , doc_type=USER_INDEX_TYPE , body=query)['hits'] if result['total'] != 0 : item['is_warehousing'] = True else : item['is_warehousing'] = False field_bci ,field_sen = get_all_filed(sort_norm , time) item['bci'] = history_info(BCIHIS_INDEX_NAME,BCIHIS_INDEX_TYPE,id,field_bci) item['sen'] = history_info(SESHIS_INDEX_NAME,SESHIS_INDEX_TYPE,id,field_sen) return item
def in_makeup_info(id , sort_norm , time): item = {} query = {"query":{"bool":{"must":[{"term":{"user.uid":id}}],"must_not":[],"should":[]}},"size":10,"sort":[],"facets":{},"fields":["uid","uname","location","topic_string","domain","fansnum"]} result = es.search(index=USER_INDEX_NAME , doc_type=USER_INDEX_TYPE , body=query)['hits'] if result['total'] != 0 : item['domain'] = result['hits'][0]['fields']['domain'][0] item['uid'] = result['hits'][0]['fields']['uid'][0] item['topic'] = result['hits'][0]['fields']['topic_string'][0] item['location'] = result['hits'][0]['fields']['location'][0] item['uname'] = result['hits'][0]['fields']['uname'][0] item['fans'] = result['hits'][0]['fields']['fansnum'][0] else : item['domain'] = None item['uid'] = None item['topic'] = None item['location'] = None item['uname'] = None item['fans'] = None item['uid'] = id field_bci , field_sen ,field_imp ,field_act = get_in_filed(sort_norm,time) item['bci'] = history_info(BCI_INDEX_NAME,BCI_INDEX_TYPE,id,field_bci) item['sen'] = history_info(SES_INDEX_NAME,SES_INDEX_TYPE,id,field_sen) item['imp'] = history_info(IMP_INDEX_NAME,IMP_INDEX_TYPE,id,field_imp) item['act'] = history_info(ACT_INDEX_NAME,ACT_INDEX_TYPE,id,field_act) return item
def find_domain(): #domain = [] #for item in domains: # domain.append([domains[item]]) #print domain index_name = 'user_portrait_1222' task_doc_type = 'user' uid = '' domain = '' uid_domain = [] query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"terms":{"domain":["媒体","高校","法律机构及人士","政府机构及人士"]}} ] } } } }, "size":1000 } search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits'] print len(search_results) for i in range(0,len(search_results)): uid = search_results[i]['_source']['uid'] domain = search_results[i]['_source']['domain'] uid_domain.append([uid,domain]) write_json(uid_domain)
def history_info(index_name , index_type , uid , fields): length = len(fields) query = { "query": { "bool": { "must": [ { "term": { "uid": uid } } ] } }, "fields": fields } try: result = es.search(index = index_name , doc_type = index_type , body = query) if result['timed_out'] == False and result['hits']['total'] != 0 : item = result['hits']['hits'][0]['fields'] return item[fields][0] else : return None except Exception , e: print "Exception : " + str(e) return None
def search_user_task(user_name): c_result = {} query = {"query":{"bool":{"must":[{"term":{"submit_user":str(user_name)}}]}},"size":MAX_ITEMS,"sort":[{"create_time":{"order":"desc"}}],"fields":["status","search_type","keyword","submit_user","sort_scope","sort_norm","start_time","user_ts","end_time","create_time",'number']}#"sort":[{"create_time":{"order":"desc"}}],;;field:"create_time", 'number' if 1: return_list = [] result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX , doc_type=USER_RANK_KEYWORD_TASK_TYPE , body=query)['hits'] c_result['flag'] = True for item in result['hits']: result_temp = {} result_temp['submit_user'] = item['fields']['submit_user'][0] result_temp['search_type'] = item['fields']['search_type'][0] #jln #result_temp['keyword'] = json.loads(item['fields']['keyword'][0]) result_temp['keyword'] = json.loads(item['fields']['keyword'][0]) result_temp['sort_scope'] = item['fields']['sort_scope'][0] result_temp['sort_norm'] = item['fields']['sort_norm'][0] # result_temp['start_time'] = ts2datetime(item['fields']['start_time'][0]) # result_temp['end_time'] = ts2datetime(item['fields']['end_time'][0]) result_temp['start_time'] = item['fields']['start_time'][0] result_temp['end_time'] = item['fields']['end_time'][0] result_temp['status'] = item['fields']['status'][0] result_temp['create_time'] = ts2date(item['fields']['create_time'][0]) result_temp['search_id'] = item['fields']['user_ts'][0] tmp = item['fields'].get('number', 0) if tmp: result_temp['number'] = int(tmp[0]) else: result_temp['number'] = 100 return_list.append(result_temp) c_result['data'] = return_list return c_result
def search_user_task(user_name): c_result = {} query = {"query":{"bool":{"must":[{"term":{"user_rank_task.submit_user":user_name}}],"must_not":[],"should":[]}},"from":0,"size":MAX_ITEMS,"sort":[],"facets":{},"fields":["status","search_type","keyword","submit_user","sort_scope","sort_norm","start_time","user_ts","end_time"]} try: return_list = [] result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX , doc_type=USER_RANK_KEYWORD_TASK_TYPE , body=query)['hits'] c_result['flag'] = True for item in result['hits']: result_temp = {} result_temp['submit_user'] = item['fields']['submit_user'][0] result_temp['search_type'] = item['fields']['search_type'][0] result_temp['keyword'] = item['fields']['keyword'][0] result_temp['sort_scope'] = item['fields']['sort_scope'][0] result_temp['sort_norm'] = item['fields']['sort_norm'][0] result_temp['start_time'] = item['fields']['start_time'][0] result_temp['end_time'] = item['fields']['end_time'][0] result_temp['status'] = item['fields']['status'][0] result_temp['search_id'] = item['fields']['user_ts'][0] return_list.append(result_temp) c_result['data'] = return_list return c_result except Exception , e1 : c_result['flag'] = False c_result['data'] = e1 print e1 return c_result
def delOfflineTask(search_id): query = { "query": { "bool": { "must": [{ "term": { "user_rank_task.user_ts": search_id } }], "must_not": [], "should": [] } }, "from": 0, "size": 10, "sort": [], "facets": {} } result = es.searresult = es.search(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, body=query)['hits']['hits'][0] task_id = result['_id'] es.delete(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, id=task_id) return True
def history_info(index_name, index_type, uid, fields): length = len(fields) query = { "query": { "bool": { "must": [{ "term": { "uid": uid } }] } }, "fields": fields } try: result = es.search(index=index_name, doc_type=index_type, body=query) if result['timed_out'] == False and result['hits']['total'] != 0: item = result['hits']['hits'][0]['fields'] return item[fields][0] else: return None except Exception, e: print "Exception : " + str(e) return None
def getResult(search_id): query = { "query": { "bool": { "must": [{ "term": { "user_rank_task.user_ts": search_id } }], "must_not": [], "should": [] } }, "from": 0, "size": 10, "sort": [], "facets": {} } result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, body=query)['hits'] item = result['hits'][0] if item['_source']['status'] == 1: result_obj = {} result_obj['keyword'] = item['_source']['keyword'] result_obj['sort_scope'] = item['_source']['sort_scope'] result_obj['sort_norm'] = item['_source']['sort_norm'] result_obj['start_time'] = item['_source']['start_time'] result_obj['end_time'] = item['_source']['end_time'] result_obj['result'] = json.loads(item['_source']['result']) return result_obj else: return []
def attribute_pattern_detect(input_dict): results = {} task_information_dict = input_dict['task_information'] task_name = task_information_dict['task_name'] task_exist_mark = identify_task_exist(task_name) if task_exist_mark == False: return 'task is not exist' query_condition_dict = input_dict['query_condition'] filter_dict = query_condition_dict['filter'] attribute_list = query_condition_dict['attribute'] pattern_list = query_condition_dict['pattern'] if len(attribute_list) != 0: #type1:have attribute condition and filter by pattern #step1: search user_portrait by attribute condition and filter condition count = MAX_DETECT_COUNT for filter_item in filter_dict: if filter_item == 'count': count = filter_dict[filter_item] * DETECT_COUNT_EXPAND else: filter_value_from = filter_dict[filter_item]['gte'] filter_value_to = filter_dict[filter_item]['lt'] attribute_list.append({'range':{filter_item: {'gte': filter_value_from, 'lt': filter_value_to}}}) try: user_portrait_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type ,\ body={'query':{'bool':{'should': attribute_list}}, 'size':count}, _source=False)['hits']['hits'] except: user_portrait_result = [] #step1.2:change process proportion process_mark = change_process_proportion(task_name, 30) if process_mark == 'task is not exist': return 'task is not exist' elif process_mark == False: return process_mark if len(pattern_list) != 0: #step2: filter user by pattern condition filter_user_result = attribute_filter_pattern(user_portrait_result, pattern_list) else: #step2: get user_list from user_portrait_result filter_user_result = [item['_id'] for item in user_portrait_result] #change process mrak process_mark = change_process_proportion(task_name, 60) if process_mark == 'task is not exist': return 'task is not exist' elif process_mark == False: return process_mark else: #type2: no attribute condition, just use pattern condition #step1: search pattern list and filter by in-user_portrait and filter_dict filter_user_result = pattern_filter_attribute(pattern_list, filter_dict) #step2.2: change process proportion process_mark = change_process_proportion(task_name, 60) if process_mark == 'task is not exist': return 'task is not exist' elif process_mark == False: return process_mark #step3: filter user list by filter count count = filter_dict['count'] results = filter_user_result[:count] return results
def search_low_number(low_range, index_name=index_destination, index_type=index_destination_doctype): query_body = { "query": { "filtered": { "query": { "match_all": {} }, "filter": { "range": { "low_number":{ "gt": low_range } } } } }, "size": 1000 } results = es.search(index=index_name, doc_type=index_type, body=query_body)["hits"]["hits"] user_list = [] if results: for item in results: user_list.append(item['_id']) return user_list
def export_random_user(): import random query_body={ 'query':{ 'match_all':{} }, 'size':50000 } result=es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits'] id_list = [user['_id'] for user in result] random.shuffle(id_list) print type(id_list), len(id_list) id_list = id_list[:9000] print len(id_list) final_results = [] for idx, uid in enumerate(id_list): try: user_bci = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid)['_source'] user_profile = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type, id=uid)['_source'] hb = dict(user_bci.items() + user_profile.items()) final_results.append(hb) print idx, 'over!!' except: print 'not found', uid print 'final len', len(final_results) fw = file('random_user.json', 'w') fw.write(json.dumps(final_results)) fw.close()
def export_date(): query_body={ 'query':{ 'match_all':{} }, 'size':1000, 'sort':{'influence':{'order':'desc'}} } result=es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits'] id_list = [user['_id'] for user in result] print len(id_list) final_results = [] for idx, uid in enumerate(id_list): print idx, 'over!!' try: user_bci = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid)['_source'] user_profile = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type, id=uid)['_source'] hb = dict(user_bci.items() + user_profile.items()) final_results.append(hb) except: print 'not found', uid print 'final len', len(final_results) fw = file('high_influence_user.json', 'w') fw.write(json.dumps(final_results)) fw.close()
def filter_top_influence_user(index_name, domain=[], topic=[], size=1000, influence=0): query_body = { "query": { "filtered":{ "filter": { "bool": { "must": [ #{"terms": {"topic_string": topic}}, #{"terms": {"domain": domain}}, {"range": { "influence": { "gte": influence } }} ] } } } }, "sort": {"influence": {"order": "desc"}}, "size": size } if domain: query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"terms": {"domain": domain}}) if topic: query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"terms": {"topic_string": topic}}) search_results = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)["hits"]["hits"] uid_list = [] for item in search_results: uid_list.append(item['_id']) print len(uid_list) return uid_list
def history_sort( prefix ,index_name , index_type , uid_list , time , ischange = False ,key_search = False, number=100): es = es_user_profile # 全网是81的es sort_field = prefix ts = datetime2ts(ts2datetime(TIME.time()-DAY)) if time == 1 : if ischange: sort_field += "day_change" else: if prefix == "bci_": sort_field = "bci_day_last" else: sort_field = "sensitive_score_%s" %ts elif time == 7: if ischange: sort_field += "week_change" else: sort_field += "week_ave" else: if ischange: sort_field += "month_change" else: if sort_field == "sensitive_": sort_field = "senstiive_month_ave" else: sort_field += "month_ave" query = {} if key_search: query = { "query": { "filtered": { "filter": { "terms": { "uid": uid_list } } } }, "sort": [{ sort_field : { "order": "desc" } }], "size" : number } else : query = { "query":{ "match_all":{}}, "sort": [{ sort_field : { "order": "desc" } }], "size" : number } result = es.search(index = index_name , doc_type = index_type , body = query, _source=False)['hits']['hits'] sorted_uid_list = [] for item in result : sorted_uid_list.append(item['_id'].encode("utf-8") ) #jln #none_in_list = set(uid_list) - set(sorted_uid_list) # if none_in_list: # sorted_uid_list.extend(list(none_in_list)) return sorted_uid_list
def get_influence_top(): result = [] index_name = 'user_portrait' index_type = 'user' query_body = {'query':{'match_all':{}}, 'sort':[{'influence':{'order':'desc'}}], 'size':100} try: es_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] except Exception, e: raise e
def search_user_task(user_name): c_result = {} query = { "query": { "bool": { "must": [{ "term": { "submit_user": str(user_name) } }] } }, "size": MAX_ITEMS, "sort": [{ "create_time": { "order": "desc" } }], "fields": [ "status", "search_type", "keyword", "submit_user", "sort_scope", "sort_norm", "start_time", "user_ts", "end_time", "create_time", 'number' ] } #"sort":[{"create_time":{"order":"desc"}}],;;field:"create_time", 'number' if 1: return_list = [] result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, body=query)['hits'] c_result['flag'] = True for item in result['hits']: result_temp = {} result_temp['submit_user'] = item['fields']['submit_user'][0] result_temp['search_type'] = item['fields']['search_type'][0] #jln #result_temp['keyword'] = json.loads(item['fields']['keyword'][0]) result_temp['keyword'] = json.loads(item['fields']['keyword'][0]) result_temp['sort_scope'] = item['fields']['sort_scope'][0] result_temp['sort_norm'] = item['fields']['sort_norm'][0] # result_temp['start_time'] = ts2datetime(item['fields']['start_time'][0]) # result_temp['end_time'] = ts2datetime(item['fields']['end_time'][0]) result_temp['start_time'] = item['fields']['start_time'][0] result_temp['end_time'] = item['fields']['end_time'][0] result_temp['status'] = item['fields']['status'][0] result_temp['create_time'] = ts2date( item['fields']['create_time'][0]) result_temp['search_id'] = item['fields']['user_ts'][0] tmp = item['fields'].get('number', 0) if tmp: result_temp['number'] = int(tmp[0]) else: result_temp['number'] = 100 return_list.append(result_temp) c_result['data'] = return_list return c_result
def get_domain_top_user(domain_top): result = {} domain_user = {} #test user list """ test_user_list = [['2803301701', '1639498782', '2656274875', '1402977920', '3114175427'], \ ['3575186384', '1316683401', '1894603174', '1641542052', '1068248497'], \ ['1729736051', '1396715380', '2377610962', '1828183230', '2718018210'], \ ['1250748474', '3270699555', '1417037145', '1193111400', '1403915120'], \ ['1671342103', '1255849511', '1647497355', '1989660417', '1189729754'], \ ['1182391231', '1670071920', '1689618340', '1494850741', '1708942053'],\ ['3400918220', '2685504141', '2056115850', '1768001547', '3317008062'],\ ['2001627641', '1773489534', '2458194884', '1822155333', '1799201635'],\ ['1709157165', '2074370833', '2167425990', '3204839810', '3690518992'],\ ['1664065962', '3299094722', '1942531237', '2799434700', '1784404677'],\ ['1218353337', '1761179351', '3482911112', '1220291284', '2504433601'],\ ['3682473195', '1627673351', '1779065471', '3316144700', '1896701827']] """ count = 0 k = 5 for item in domain_top: domain = item[0] #test #user_list = test_user_list[count] result[domain] = [] query_body = { 'query': { 'filtered': { 'filter': { 'term': { 'domain': domain } } } }, 'size': k, 'sort': [{ 'influence': { 'order': 'desc' } }] } profile_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \ body=query_body, _source=False, fields=['uid', 'uname', 'photo_url'])['hits']['hits'] for profile in profile_result: uid = profile['_id'] try: uname = profile['fields']['uname'][0] photo_url = profile['fields']['photo_url'][0] except: uname = 'unknown' photo_url = 'unknown' result[domain].append([uid, uname, photo_url]) count += 1 return result
def history_sort(prefix, index_name, index_type, uid_list, time, ischange=False, key_search=False): es = es_user_profile sort_field = prefix if time == 1: sort_field += "day_change" elif time == 7: if ischange: sort_field += "week_change" else: sort_field += "week_ave" else: if ischange: sort_field += "month_change" else: sort_field += "month_ave" query = {} if key_search: query = { "query": { "filtered": { "filter": { "terms": { "uid": uid_list } } } }, "sort": { sort_field: { "order": "desc" } }, "size": MAX_SIZE } else: query = {"sort": {sort_field: {"order": "desc"}}, "size": MAX_SIZE} try: result = es.search(index=index_name, doc_type=index_type, body=query)['hits']['hits'] uid_list = [] for item in result: uid_list.append(item['_id'].encode("utf-8")) return uid_list except Exception, e: print e raise Exception(index_name + " " + index_type + " " + str(query).replace("\'", "\""))
def sort_task(user, keyword, status, start_time, end_time, submit_time): query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"submit_user": user}} ] } } } }, "size": 10000, "sort":{"submit_time":{"order":"desc"}} } query_list = [] if keyword: keyword_list = keyword.split(',') query_list.append({"terms":{"keyword_string":keyword_list}}) if status != 2: query_list.append({"term":{"status": status}}) if start_time and end_time: start_ts = datetime2ts(start_time) end_ts = datetime2ts(end_time) query_list.append({"range":{"start_time":{"gte":start_ts, "lte":end_ts}}}) query_list.append({"range":{"end_time":{"gte":start_ts, "lte":end_ts}}}) if submit_time: query_list.append({"term":{"submit_time": submit_time}}) if query_list: query_body["query"]["filtered"]["filter"]["bool"]["must"].extend(query_list) #print query_body search_results = es.search(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, body=query_body)["hits"]["hits"] results = [] if search_results: for item in search_results: iter_item = item['_source'] tmp = [] tmp.append(iter_item['search_type']) tmp.append(json.loads(iter_item['keyword'])) tmp.append(ts2datetime(iter_item['start_time'])) tmp.append(ts2datetime(iter_item['end_time'])) tmp.append(iter_item['range']) tmp.append(ts2date(iter_item['create_time'])) tmp.append(iter_item['status']) tmp.append(iter_item['sort_norm']) tmp.append(iter_item['sort_scope']) tmp.append(item['_id']) # task_name results.append(tmp) return results
def get_tag_history(admin_user, now_date): results = set() now_ts = datetime2ts(now_date) search_tag_list = [] query_date_list = [] for i in range(RECOMMEND_IN_AUTO_DATE, 0, -1): iter_date = ts2datetime(now_ts - i * DAY) query_date_list.append(iter_date) attribute_query_body = { 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ #{'terms': {'date': query_date_list}}, {'term': {'user': admin_user}} ] } } } } } try: attribute_result = es_tag.get(index=attribute_index_name, doc_type=attribute_index_type,\ body=attribute_query_body)['hits']['hits'] except: attribute_result = [] tag_query_list = [] for attribute_item in attribute_result: attribute_item_source = attribute_item['_source'] attribute_name = attribute_item_source['attribute_name'] attribute_value_string = attribute_item_source['attribute_value'] item_tag_list = [attribute_name + '-' + attribute_value for attribute_value in attribute_value_string] tag_query_list.extend(item_tag_list) submit_user_attribute = admin_user + '-tag' portrait_query_body = { 'query':{ 'filtered':{ 'filter':{ 'terms': {submit_user_attribute: tag_query_list} } } }, 'size': RECOMMEND_IN_AUTO_SIZE } try: portrait_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\ body=portrait_query_body, _source=False)['hits']['hits'] except: portrait_result = [] results = set([item['_id'] for item in portrait_result]) return results
def get_max_index(term): query_body = { 'query':{ 'match_all':{} }, 'size':1, 'sort':[{term: {'order': 'desc'}}] } try: iter_max_value = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \ body=query_body)['hits']['hits'] except Exception, e: raise e
def get_influence_vary_top(): result = [] query_body = { 'query':{ 'match_all':{} }, 'size': 10000, 'sort':[{'vary':{'order': 'desc'}}] } try: es_result = es.search(index='vary', doc_type='bci', body=query_body)['hits']['hits'] except Exception, e: raise e
def create_task_list(): # 1. search from manage_sensing_task # 2. push to redis list-----task_work # print start info current_path = os.getcwd() file_path = os.path.join(current_path, 'task_list.py') if RUN_TYPE == 0: now_ts = 1463241600 # 1378008000 else: i = int(sys.argv[1]) now_ts = 1463241600 + 3600 * i #now_ts = date_hour2ts(ts2date_hour(time.time())) print_log = "&".join([file_path, "start", ts2date(now_ts)]) print print_log #ts = ts - 3600 query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"finish": "0"}}, {"term":{"processing_status": "1"}} ] } } } } } search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits'] count = 0 if search_results: for iter_item in search_results: _id = iter_item['_id'] item = iter_item['_source'] task = [] task.append(item['task_name']) # task_name task.append(item['keywords']) # keywords task.append(item['stop_time']) # stop time task.append(item['create_by']) task.append(now_ts) r.lpush('task_name', json.dumps(task)) count += 1 print count print_log = "&".join([file_path, "end", ts2date(time.time())]) print print_log
def get_topic_top_user(topic_top): result = {} topic_user = {} #test user list """ test_user_list = [['1499104401', '1265965213', '3270699555', '2073915493', '1686474312'],\ ['2803301701', '2105426467', '1665372775', '3716504593', '2892376557'],\ ['1457530250', '1698513182', '2793591492', '2218894100', '1737961042'],\ ['1656818110', '1660127070', '1890124610', '1182391230', '1243861100'],\ ['1680430844', '2998045524', '2202896360', '1639498782', '3494698730'],\ ['2587093162', '1677675054', '1871767009', '1193111400', '1672418622'],\ ['1730726640', '1752502540', '1868725480', '1262486750', '1235733080'],\ ['1250041100', '2275231150', '1268642530', '1658606270', '1857599860'],\ ['1929496477', '2167425990', '1164667670', '2417139911', '1708853044'],\ ['1993292930', '1645823930', '1890926610', '1641561810', '2023833990'],\ ['2005471590', '1233628160', '2074684140', '1396715380', '1236762250'],\ ['1423592890', '2612799560', '1926127090', '2684951180', '1760607220']] """ count = 0 k = 5 for item in topic_top: topic = item[0] #test #user_list = test_user_list[count] result[topic] = [] query_body = { 'query': { 'wildcard': { 'topic_string': '*' + topic + '*' } }, 'size': k, 'sort': [{ 'influence': { 'order': 'desc' } }] } profile_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \ body=query_body, _source=False, fields=['uid', 'uname', 'photo_url'])['hits']['hits'] for profile in profile_result: uid = profile['_id'] try: uname = profile['fields']['uname'][0] photo_url = profile['fields']['photo_url'][0] except: uname = 'unknown' photo_url = 'unknown' result[topic].append([uid, uname, photo_url]) count += 1 return result
def search_user_task(user_name): c_result = {} query = { "query": { "bool": { "must": [{ "term": { "user_rank_task.submit_user": user_name } }], "must_not": [], "should": [] } }, "from": 0, "size": MAX_ITEMS, "sort": [], "facets": {}, "fields": [ "status", "search_type", "keyword", "submit_user", "sort_scope", "sort_norm", "start_time", "user_ts", "end_time" ] } try: return_list = [] result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, body=query)['hits'] c_result['flag'] = True for item in result['hits']: result_temp = {} result_temp['submit_user'] = item['fields']['submit_user'][0] result_temp['search_type'] = item['fields']['search_type'][0] result_temp['keyword'] = item['fields']['keyword'][0] result_temp['sort_scope'] = item['fields']['sort_scope'][0] result_temp['sort_norm'] = item['fields']['sort_norm'][0] result_temp['start_time'] = item['fields']['start_time'][0] result_temp['end_time'] = item['fields']['end_time'][0] result_temp['status'] = item['fields']['status'][0] result_temp['search_id'] = item['fields']['user_ts'][0] return_list.append(result_temp) c_result['data'] = return_list return c_result except Exception, e1: c_result['flag'] = False c_result['data'] = e1 print e1 return c_result
def create_task_list(ts): # 1. search from manage_sensing_task # 2. push to redis list-----task_work # print start info current_path = os.getcwd() file_path = os.path.join(current_path, 'task_list.py') now_ts = str(int(time.time())) print_log = "&".join([file_path, "start", now_ts]) print print_log query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"finish": "0"}}, {"term":{"processing_status": "1"}} ] } } } } } search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits'] count = 0 if search_results: for iter_item in search_results: item = iter_item['_source'] task = [] task.append(item['task_name']) # task_name task.append(json.loads(item['social_sensors'])) # social sensors task.append(json.loads(item['keywords'])) # filter keywords task.append(json.loads(item['sensitive_words'])) #load sensitive_words task.append(item['stop_time']) # stop time task.append(item['warning_status']) # last step status task.append(item['task_type']) # task type task.append(ts) task.append(item['create_by']) r.lpush('task_name', json.dumps(task)) count += 1 print count now_ts = str(int(time.time())) print_log = "&".join([file_path, "end", now_ts]) print print_log
def getResult(search_id): query = {"query":{"bool":{"must":[{"term":{"user_rank_task.user_ts":search_id}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{}} result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX , doc_type=USER_RANK_KEYWORD_TASK_TYPE , body=query)['hits'] item = result['hits'][0] if item['_source']['status'] == 1: result_obj = {} result_obj['keyword'] = item['_source']['keyword'] result_obj['sort_scope'] = item['_source']['sort_scope'] result_obj['sort_norm'] = item['_source']['sort_norm'] result_obj['start_time'] = item['_source']['start_time'] result_obj['end_time'] =item['_source']['end_time'] result_obj['result'] = json.loads(item['_source']['result']) return result_obj else : return []
def modify_evaluate_index(filter_from, filter_to, evaluate_index): abnormal_filter_from = 0 abnormal_filter_to = 0 #step1: get evaluate_index max value query_body = { 'query':{ 'match_all':{}, }, 'size': 1, 'sort': [{evaluate_index: {'order': 'desc'}}] } try: evaluate_index_max = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type ,\ body=query_body)['hits']['hits'] except Exception, e: raise e
def es_get_userlist_by_all(fieldname, uid, key_search=False): sort = {fieldname: {"order": "desc"}} query = {} if key_search: query = { "query": { "filtered": { "filter": { "terms": { "uid": uid } } } }, "sort": sort, "fields": ["uid"], "size": MAX_SIZE } else: print "aa" query = { "query": { "bool": { "must": [], "must_not": [], "should": [] } }, "sort": sort, "facets": {}, "fields": ["uid"], "size": MAX_SIZE } try: print str(query).replace("\'", "\"") es = es_user_profile result = es.search(index=WEIBO_USER_INDEX_NAME, doc_type=WEIBO_USER_INDEX_TYPE, body=query)['hits']['hits'] uid_list = [] for item in result: uid_list.append(item['_id'].encode("utf-8")) return uid_list except Exception, e: print e raise Exception('user_list failed!')
def es_get_userlist_by_all(fieldname , uid, key_search = False): sort = { fieldname : { "order": "desc" } } query = {} if key_search: query = { "query": { "filtered": { "filter": { "terms": { "uid": uid } } } }, "sort": sort, "fields": [ "uid" ], "size" : MAX_SIZE } else : print "aa" query = { "query": { "bool": { "must": [], "must_not": [], "should": [] } }, "sort": sort , "facets": {}, "fields": [ "uid" ], "size" : MAX_SIZE } try: print str(query).replace("\'","\"") es = es_user_profile result = es.search(index = WEIBO_USER_INDEX_NAME , doc_type = WEIBO_USER_INDEX_TYPE , body = query)['hits']['hits'] uid_list = [] for item in result : uid_list.append(item['_id'].encode("utf-8") ) return uid_list except Exception,e: print e raise Exception('user_list failed!')
def scan_offline_task(): query = { "query": { "bool": { "must": [{ "term": { "status": 0 } }] } }, "size": 1000 } results = es_user_portrait.search(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, body=query)['hits']['hits'] print USER_RANK_KEYWORD_TASK_INDEX, USER_RANK_KEYWORD_TASK_TYPE if results: for item in results: task_id = item['_id'] iter_item = item['_source'] search_type = iter_item['search_type'] pre = iter_item['pre'] during = iter_item['during'] start_time = iter_item['start_time'] keyword = json.loads(iter_item['keyword']) search_key = iter_item['user_ts'] number = iter_item['number'] sort_norm = iter_item['sort_norm'] sort_scope = iter_item['sort_scope'] time = iter_item['time'] isall = iter_item['isall'] print redis_task redis_task.lpush( "task_user_rank", json.dumps([ task_id, search_type, pre, during, start_time, keyword, search_key, sort_norm, sort_scope, time, isall, number ])) iter_item['status'] = -1 task_id = item['_id'] #print item es_user_portrait.index(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, id=task_id, body=iter_item)
def scan_offlice_task(): query = {"query":{"bool":{"must":[{"term":{"user_rank_task.status":"0"}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{}} results = es_9200.search(index = USER_RANK_KEYWORD_TASK_INDEX , doc_type = USER_RANK_KEYWORD_TASK_TYPE,body=query)['hits'] if results['total'] > 0 : for item in results['hits']: search_type = item['_source']['search_type'] pre = item['_source']['pre'] during = item['_source']['during'] start_time = item['_source']['start_time'] keyword = item['_source']['keyword'] search_key = item['_source']['user_ts'] sort_norm = item['_source']['sort_norm'] sort_scope = item['_source']['sort_scope'] time = item['_source']['time'] isall = item['_source']['isall'] key_words_search( search_type , pre , during , start_time , keyword , search_key , sort_norm , sort_scope ,time , isall)
def get_single_user_portrait(seed_user_dict): if 'uid' in seed_user_dict: uid = seed_user_dict['uid'] try: user_portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid)['_source'] except: user_portrait_result = {} else: uname = seed_user_dict['uname'] query = {'term':{'uname': uname}} try: user_portrait_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type ,\ body={'query':{'bool':{'must': quuery}}})['_source'] except: user_portrait_result = {} return user_portrait_result
def history_sort( prefix ,index_name , index_type , uid_list , time , ischange = False ,key_search = False): sort_field = prefix if time == 1 : sort_field += "day_change" elif time == 7: if ischange: sort_field += "week_change" else: sort_field += "week_ave" else: if ischange: sort_field += "month_change" else: sort_field += "month_ave" query = {} if key_search: query = { "query": { "filtered": { "filter": { "terms": { "uid": uid_list } } } }, "sort": [{ sort_field : { "order": "desc" } }], "fields" : [], "size" : MAX_SIZE } else : query = { "sort": [{ sort_field : { "order": "desc" } }], "fields" : [], "size" : MAX_SIZE } try: result = es.search(index = index_name , doc_type = index_type , body = query)['hits']['hits'] uid_list = [] for item in result : uid_list.append(item['_id'].encode("utf-8") ) return uid_list except Exception,e: print e raise Exception(index_name + " " + index_type + " " + str(query).replace("\'","\""))
def find_domain(): index_name = 'user_portrait_1222' task_doc_type = 'user' query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"terms":{"domain": ['媒体','高校','法律']}} ] } } } } } search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits'] uid = search_results[0]['_source']['uid']
def get_evaluate_max(): max_result = {} index_name = 'user_portrait' index_type = 'user' evaluate_index = ['activeness', 'importance', 'influence'] for evaluate in evaluate_index: query_body = { 'query': { 'match_all':{} }, 'size': 1, 'sort': [{evaluate:{'order': 'desc'}}] } try: result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] except Exception, e: raise e max_evaluate = result[0]['_source'][evaluate] max_result[evaluate] = max_evaluate
def search_get_portrait(): query_body = { 'query':{ 'wildcard':{'keywords_string': '*' + '文革' + '*'} }, 'size': 1000 } #try: result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\ body=query_body)['hits']['hits'] #except: # result = [] f = open('/home/user_portrait_0320/revised_user_portrait/user_portrait/user_portrait/attribute/uid_list_0520.txt', 'w') for item in result: source = item['_source'] uid = source['uid'] f.write("%s\n" % uid ) #print 'source:', source f.close()
def key_words_search( search_type , pre , during , start_time , keyword , search_key = '' , sort_norm = '', sort_scope = '' ,time = 1 , isall = False): query = {"query":{"bool":{"must":[{"term":{"user_rank_task.user_ts":search_key}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{}} result = es_9200.search(index = USER_RANK_KEYWORD_TASK_INDEX , doc_type = USER_RANK_KEYWORD_TASK_TYPE , body = query)['hits']['hits'] search_id = result[0]['_id'] item = result[0]['_source'] item['status'] = -1 # 任务 item['result'] = json.dumps(results) es_9200.index(index = USER_RANK_KEYWORD_TASK_INDEX , doc_type=USER_RANK_KEYWORD_TASK_TYPE , id=search_id, body=item) keywords = keyword.split(",") should = [] for key in keywords: if search_type == "hashtag": should.append({"prefix":{"text.text": "#" + key + "#"}}) else: should.append({"prefix":{"text.text":key}}) date = start_time index_name = pre + start_time while not es_9206.indices.exists(index= index_name) : new_time = datetime2ts(date) + DAY date = ts2datetime(new_time) index_name = pre + date during -= 1 uid_set = set() for i in range(during): print index_name query = {"query":{"bool":{"must":[],"must_not":[],"should":should}},"size":MAX_ITEMS,"sort":[],"facets":{},"fields":['uid']} try : temp = es_9206.search(index = index_name , doc_type = 'text' , body = query) result = temp['hits']['hits'] print "Fetch " + str(len(result)) for item in result : uid_set.add(item['fields']['uid'][0].encode("utf-8") ) except Exception,e: print e raise Exception('user_list failed!') new_time = datetime2ts(date) + DAY date = ts2datetime(new_time) index_name = pre + date i += 1
def search_low_number(low_range, index_name=copy_user_portrait, index_type="bci"): query_body = { "query": { "filtered": { "filter": { "range": { "low_number":{ "gt": low_range } } } } }, "size": 1000 } results = es.search(index=index_name, doc_type=index_type, body=query_body)["hits"]["hits"] user_list = [] if results: for item in results: user_list.append(item['_id']) return user_list
def scan_offline_task(): query = {"query":{"bool":{"must":[{"term":{"status":0}}]}},"size":1000} results = es_user_portrait.search(index = USER_RANK_KEYWORD_TASK_INDEX , doc_type = USER_RANK_KEYWORD_TASK_TYPE,body=query)['hits']['hits'] if results : for item in results: task_id = item['_id'] iter_item = item['_source'] search_type = iter_item['search_type'] pre = iter_item['pre'] during = iter_item['during'] start_time = iter_item['start_time'] keyword = json.loads(iter_item['keyword']) search_key = iter_item['user_ts'] number = iter_item['number'] sort_norm = iter_item['sort_norm'] sort_scope = iter_item['sort_scope'] time = iter_item['time'] isall = iter_item['isall'] redis_task.lpush("task_user_rank", json.dumps([task_id, search_type , pre , during , start_time , keyword , search_key , sort_norm , sort_scope ,time , isall, number])) iter_item['status'] = -1 task_id = item['_id'] #print item es_user_portrait.index(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, id=task_id, body=iter_item)
def es_search( pre , scope , arg , index_name , type_name , time , ischange = False , uid_list = [] ,key_search = False): today = TIME.time() print pre print time sort_field = '' if time == 1: sort_field = pre + 'day_' + 'change' elif time == 7 : if ischange : sort_field = pre + 'week_' + 'change' else : sort_field = pre + 'week_' + 'ave' elif time == 30 : if ischange : sort_field = pre + 'month_' + 'change' else : sort_field = pre + 'month_' + 'ave' print sort_field must = [] if arg : must = [{"prefix": {scope: arg }} ] sort = [] if sort_field: sort = [{ sort_field : { "order": "desc" } }] print sort if not key_search: query = { "query": { "bool": { "must": must, "must_not": [], "should": [] } }, "sort": sort , "facets": {}, "fields": [ "uid" ], "size" : MAX_SIZE } else : query = { "query": { "filtered": { "filter": { "terms": { "uid": uid_list } } } }, "sort": sort, "fields" : [], "size" : MAX_SIZE } try: print index_name print type_name print str(query).replace("\'","\"") result = es.search(index = index_name , doc_type = type_name , body = query)['hits']['hits'] uid_list = [] for item in result : uid_list.append(item['_id'].encode("utf-8") ) return uid_list except Exception,e: print e raise Exception(index_name + " " + type_name + " " + str(query).replace("\'","\""))