def search_task(task_name, submit_date, state, status): results = [] query = [] condition_num = 0 if task_name: task_name_list = task_name.split(' ') for item in task_name_list: #print 'item:', item query.append({'wildcard':{'task_name': '*' + item + '*'}}) condition_num += 1 if submit_date: query.append({'match':{'submit_date': submit_date}}) condition_num += 1 if state: state_list = state.split(' ') for item in state_list: query.append({'wildcard':{'state': '*' + item + '*'}}) condition_num += 1 if status: query.append({'match':{'status': status}}) condition_num += 1 if condition_num > 0: try: source = es.search( index = 'group_result', doc_type = 'group', body = { 'query':{ 'bool':{ 'must':query } }, 'sort': [{'count':{'order': 'desc'}}], 'size': 10000 } ) except Exception as e: raise e else: source = es.search( index = 'group_result', doc_type = 'group', body = { 'query':{'match_all':{} }, 'sort': [{'count': {'order': 'desc'}}], 'size': 10000 } ) try: task_dict_list = source['hits']['hits'] except: return None result = [] print 'len task_dict_list:', len(task_dict_list) for task_dict in task_dict_list: result.append([task_dict['_source']['task_name'], task_dict['_source']['submit_date'], task_dict['_source']['count'], task_dict['_source']['state'], task_dict['_source']['status']]) return result
def conclusion_on_influence(uid): # test index_name = copy_portrait_index_name index_type = copy_portrait_index_type total_number = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type)["count"] try: influ_result = es.get(index=index_name, doc_type=index_type, id=uid)["_source"] except: influ_result = {} result = [0, 0, 0, 0, 0, 0, total_number] # aver_activeness, sorted, aver_influence, sorted return result aver_activeness = influ_result.get("aver_activeness", 0) aver_influence = influ_result.get("aver_influence", 0) aver_importance = influ_result.get("aver_importance", 0) influence_query_body = {"query": {"match_all": {}}, "sort": {"aver_influence": {"order": "desc"}}, "size": 1} top_influence = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body )["hits"]["hits"][0]["sort"][0] importance_query_body = {"query": {"match_all": {}}, "sort": {"aver_importance": {"order": "desc"}}, "size": 1} top_importance = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body )["hits"]["hits"][0]["sort"][0] activeness_query_body = {"query": {"match_all": {}}, "sort": {"aver_activeness": {"order": "desc"}}, "size": 1} top_activeness = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body )["hits"]["hits"][0]["sort"][0] influence_query_body = {"query": {"filtered": {"filter": {"range": {"aver_influence": {"gt": aver_influence}}}}}} activeness_query_body = {"query": {"filtered": {"filter": {"range": {"aver_activeness": {"gt": aver_activeness}}}}}} importance_query_body = {"query": {"filtered": {"filter": {"range": {"aver_importance": {"gt": aver_importance}}}}}} influence_count = es.count( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body )["count"] activeness_count = es.count( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body )["count"] importance_count = es.count( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body )["count"] result = [ int(aver_activeness * 100.0 / top_activeness), activeness_count, int(aver_influence * 100.0 / top_influence), influence_count, int(aver_importance * 100.0 / top_importance), importance_count, total_number, ] return result
def show_social_sensing_task(): query_body = { "query": { "filtered": { "filter": { "term": { "finish": "1" } } } }, "sort": { "create_at": { "order": "desc" } }, "size": 10000 } results = [] search_results = es.search(index=index_manage_social_task, doc_type=task_doc_type, body=query_body)['hits']['hits'] if search_results: for item in search_results: results.append(item['_source']['task_name']) return results
def get_top_all_influence(key, ts): query_body = { "query": { "match_all": {} }, "sort": { key: { "order": "desc" } }, "size": 1 } index_name = "bci_" + ts2datetime(ts).replace('-', '') if not es.indices.exists(index=index_name): index_name = "bci_" + ts2datetime(ts - DAY).replace('-', '') exist_es = es.indices.exists(index=index_name) if exist_es: search_result = es.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] else: search_result = {} if search_result: result = search_result[0]['_source'][key] else: result = 2000 return result
def get_attribute_name(): attribute_name_list = [] try: attribute_result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'match_all':{}}})['hits']['hits'] except Exception, e: raise e
def get_evaluate_max(): max_result = {} evaluate_index = ['influence', 'activeness', 'importance', 'sensitive'] for evaluate in evaluate_index: query_body = { 'query': { 'match_all': {} }, 'sort': [{ evaluate: { 'order': 'desc' } }], 'size': 1 } try: result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\ body=query_body)['hits']['hits'] except: result = {} try: max_evaluate = result[0]['_source'][evaluate] except: max_evaluate = MAX_VALUE max_result[evaluate] = max_evaluate return max_result
def get_evaluate_max(): max_result = {} index_name = portrait_index_name index_type = portrait_index_type evaluate_index = ['activeness', 'importance', 'influence'] for evaluate in evaluate_index: query_body = { 'query': { 'match_all': {} }, 'size': 1, 'sort': [{ evaluate: { 'order': 'desc' } }] } try: result = es_user_portrait.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] except Exception, e: raise e max_evaluate = result[0]['_source'][evaluate] max_result[evaluate] = max_evaluate
def delete_attribute(attribute_name): status = False try: result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: return status attribute_value = json.loads(result['value']) es.delete(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name) # delete attribute in user_portrait query = [] for value in attribute_value: query.append({'match':{attribute_name: value}}) try: attribute_user_result = es.search(index=user_index_name, doc_type=user_index_type, \ body={'query':{'bool':{'must':query}}})['hits']['hits'] except: attribute_user_result = [] if attribute_user_result==[]: status = True return status bulk_action = [] for user_dict in attribute_user_result: try: user_item = user_dict['_source'] except: next user_item.pop(attribute) user = user_item['uid'] action = {'index':{'_id':str(user)}} bulk_action.extend([action, user_item]) es.bulk(bulk_action, index=user_index_name, doc_type=index_type) status = True return status
def get_social_domain(uid_set): results = {} query_body = { 'query':{ 'filtered':{ 'filter':{ 'terms':{ 'uid': list(uid_set) } } } }, 'aggs':{ 'all_domain':{ 'terms':{'field': 'domain'} } } } search_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \ body=query_body)['aggregations']['all_domain']['buckets'] print 'search_result:', search_result for item in search_result: results[item['key']] = item['doc_count'] return results
def get_uid(uname): try: portrait_exist_result = es_user_portrait.search(index=profile_index_name, doc_type=profile_index_type, \ body={"query":{"bool":{"must":{"term":{"nick_name":uname}}}}})['hits']['hits'][0]['_id'] except: return None return portrait_exist_result
def show_social_sensing_task(user): query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term": {"finish": "1"}}, {"term": {"create_by": user}} ] } } } }, "sort": {"create_at": {"order": "desc"}}, "size": 10000 } results = [] search_results = es.search(index=index_manage_social_task, doc_type=task_doc_type, body=query_body)['hits']['hits'] if search_results: for item in search_results: results.append(item['_source']['task_name']) return results
def get_social_topic(uid_set): results = {} query_body = { 'query': { 'filtered': { 'filter': { 'terms': { 'uid': list(uid_set) } } } }, 'aggs': { 'all_topic': { 'terms': { 'field': 'topic_string' } } } } search_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\ body=query_body)['aggregations']['all_topic']['buckets'] for item in search_result: results[item['key']] = item['doc_count'] return results
def search_portrait(condition_num, query, sort, size): user_result = [] index_name = portrait_index_name index_type = portrait_index_type if condition_num > 0: #try: result = es_user_portrait.search(index=index_name, doc_type=index_type, \ body={'query':{'bool':{'must':query}}, 'sort':[{sort:{'order':'desc'}}], 'size':size})['hits']['hits'] #except Exception,e: # raise e #print 'result:', result else: try: result = es_user_portrait.search(index=index_name, doc_type=index_type, \ body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size})['hits']['hits'] except Exception, e: raise e
def search_attribute(query_body, condition_num): result = [] if condition_num==0: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'match_all':{}}})['hits']['hits'] except Exception, e: raise e
def get_top_influence(key): query_body = {"query": {"match_all": {}}, "sort": {key: {"order": "desc"}}, "size": 1} search_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)["hits"]["hits"] if search_result: result = search_result[0]["_source"][key] return result
def ajax_show_task(): # show all working task # "0": unfinish working task # "1": finish working task status = request.args.get("finish", "01") user = request.args.get('user', '') length = len(status) query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "create_by": user } }] } } } }, "sort": { "create_at": { "order": "desc" } }, "size": 10000 } #if length == 2: # category_list = [status[0], status[1]] # query_body['query']['filtered']['filter']["bool"]["must"].append({"term":{"finish": category_list}}) if length == 1: query_body['query']['filtered']['filter']['bool']['must'].append( {"term": { "finish": status }}) #else: # print "error" try: search_results = es.search(index=index_manage_sensing_task, doc_type=task_doc_type, body=query_body)['hits']['hits'] except: search_results = [] results = [] if search_results: for item in search_results: item = item['_source'] history_status = json.loads(item['history_status']) if history_status: item['history_status'] = sorted(history_status, key=lambda x: x, reverse=True) else: item['history_status'] = [] results.append(item) return json.dumps(results)
def search_attribute(query_body, condition_num): item_list = [] default_size = 100000 if condition_num==0: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'match_all':{}}, 'size':default_size})['hits']['hits'] except Exception, e: raise e
def ajax_show_task(): # show all working task # "0": unfinish working task # "1": finish working task status = request.args.get("finish", "01") length = len(status) query_body = { "query": { "filtered": { "filter": {} } }, "sort": { "create_at": { "order": "desc" } }, "size": 10000 } if length == 2: category_list = [status[0], status[1]] query_body['query']['filtered']['filter']['terms'] = { "finish": category_list } elif length == 1: query_body['query']['filtered']['filter']['term'] = {"finish": status} else: print "error" search_results = es.search(index=index_manage_sensing_task, doc_type=task_doc_type, body=query_body)['hits']['hits'] results = [] if search_results: for item in search_results: item = item['_source'] history_status = json.loads(item['history_status']) keywords = json.loads(item['keywords']) item['keywords'] = keywords if history_status: temp_list = [] temp_list.append(history_status[-1]) for iter_item in history_status[:-1]: if int(iter_item[-1]) != 0: temp_list.append(iter_item) sorted_list = sorted(temp_list, key=lambda x: x[0], reverse=True) item['history_status'] = sorted_list else: item['history_status'] = history_status results.append(item) return json.dumps(results)
def get_evaluate_max(): max_result = {} evaluate_index = ["influence", "activeness", "importance"] for evaluate in evaluate_index: query_body = {"query": {"match_all": {}}, "size": 1, "sort": [{evaluate: {"order": "desc"}}]} try: result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)["hits"]["hits"] except Exception, e: raise e max_evaluate = result[0]["_source"][evaluate] max_result[evaluate] = max_evaluate
def search_portrait(condition_num, query, sort, size): user_result = [] index_name = 'user_portrait' index_type = 'user' if condition_num > 0: try: #print query result = es_user_portrait.search(index=index_name, doc_type=index_type, \ body={'query':{'bool':{'must':query}}, 'sort':sort, 'size':size})['hits']['hits'] #print 'result:', result except Exception,e: raise e
def ajax_show_task(): # show all working task # "0": unfinish working task # "1": finish working task status = request.args.get("finish", "01") user = request.args.get('user', 'admin') length = len(status) query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"create_by": user}} ] } } } }, "sort": {"create_at": {"order": "desc"}}, "size": 10000 } #if length == 2: # category_list = [status[0], status[1]] # query_body['query']['filtered']['filter']["bool"]["must"].append({"term":{"finish": category_list}}) if length == 1: query_body['query']['filtered']['filter']['bool']['must'].append({"term":{"finish": status}}) #else: # print "error" try: search_results = es.search(index=index_manage_sensing_task, doc_type=task_doc_type, body=query_body)['hits']['hits'] except: search_results = [] results = [] if search_results: for item in search_results: item = item['_source'] history_status = json.loads(item['history_status']) if history_status: temp_list = [] temp_list.append(history_status[-1]) for iter_item in history_status[:-1]: if int(iter_item[-1]) != 0: temp_list.append(iter_item) sorted_list = sorted(temp_list, key=lambda x:x[0], reverse=True) item['history_status'] = sorted_list else: item['history_status'] = history_status results.append(item) print results return json.dumps(results)
def ajax_get_group_list(): # get all group list from group manage results = [] # query_body = { "query": { "filtered": { "filter": { "bool": { "must": [ { "term": { "task_type": "analysis" } }, { "term": { "status": 1 } } # attention------------------------- ] } } } }, "sort": { "submit_date": { "order": "desc" } }, "size": 10000 } search_results = es.search(index=index_group_manage, doc_type=doc_type_group, body=query_body, timeout=600)['hits']['hits'] if search_results: for item in search_results: item = item['_source'] temp = [] temp.append(item['task_name']) temp.append(item['submit_user']) temp.append(item['submit_date']) temp.append(item['count']) temp.append(item.get('state', "")) try: temp.append(json.loads(item['uid_list'])) except: temp.append(item['uid_list']) results.append(temp) return json.dumps(results)
def save_detect_single_task(input_dict): results = {} #step1: identify the seed user is in user_portrait seed_user = input_dict['query_condition']['seed_user'] query = [] query_list = [] for user_item in seed_user: query_list.append({'term': {user_item: seed_user[user_item]}}) query.append({'bool': {'should': query_list}}) try: seed_user_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \ body={'query':{'bool':{'must':query}}, 'size':1})['hits']['hits'] except Exception, e: raise e
def get_top_influence(key): query_body = { "query":{ "match_all": {} }, "sort":{key:{"order":"desc"}}, "size": 1 } search_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)["hits"]["hits"] if search_result: result = search_result[0]['_source'][key] return result
def save_detect_single_task(input_dict): results = {} #step1: identify the seed user is in user_portrait seed_user = input_dict['query_condition']['seed_user'] query = [] query_list = [] for user_item in seed_user: query_list.append({'term':{user_item: seed_user[user_item]}}) query.append({'bool':{'should': query_list}}) try: seed_user_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \ body={'query':{'bool':{'must':query}}, 'size':1})['hits']['hits'] except Exception, e: raise e
def get_top_influence(): query_body = { "query":{ "match_all": {} }, "sort":{"influence":{"order":"desc"}}, "size": 1 } search_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits'] if search_result: result = search_result[0]['_source']['influence'] else: result = 2000 return result
def get_top_influence(key="influence"): query_body = { "query":{ "match_all": {} }, "sort":{key:{"order":"desc"}}, "size": 1 } search_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits'] if search_result: result = search_result[0]['_source']['influence'] else: result = 2000 return result
def get_sort(uid): try: u_bci = es.get(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE, id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0] except: return None query_body={ 'query':{ 'filtered':{ 'filter':{ 'range':{'bci_week_ave':{'gte':u_bci}} } } } } result = es.search(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE,body=query_body) return str(result['hits']['total'])
def ajax_get_group_list(): user = request.args.get('user', '') # get all group list from group manage results = [] # query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "should":[ {"term": {"task_type": "analysis"}}, {"bool":{ "must":[ {"term": {"task_type": "detect"}}, {"term": {"detect_process":100}} ] }} ], "must":{"term": {"submit_user": user}} } } } }, "sort": {"submit_date": {"order": "desc"}}, "size": 10000 } search_results = es.search(index=index_group_manage, doc_type=doc_type_group, body=query_body, timeout=600)['hits']['hits'] if search_results: for item in search_results: item = item['_source'] temp = [] temp.append(item['task_name']) temp.append(item['submit_user']) temp.append(item['submit_date']) temp.append(0) temp.append(item.get('state', "")) try: temp.append(json.loads(item['uid_list'])) count = len(json.loads(item['uid_list'])) temp[3] = count except: temp.append(item['uid_list']) temp[3] = len(item['uid_list']) results.append(temp) return json.dumps(results)
def filter_in_uname(input_dict): input_uname = input_dict.keys() all_count = len(input_uname) iter_count = 0 in_portrait_result = [] while iter_count < all_count: iter_user_list = input_uname[iter_count: iter_count+FILTER_ITER_COUNT] try: portrait_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\ body={'query':{'terms': {'uname': iter_user_list}}}, _source=False, fields=['photo_url', 'uname'])['hits']['hits'] except: portrait_result = [] if portrait_result: iter_in_portrait = [[item['_id'], item['uname'][0], item['photo_url'][0], input_uname[item['uname']]] for item in portrait_result] in_portrait_result.extend(iter_in_portrait) iter_count += FILTER_ITER_COUNT return in_portrait_result
def get_evaluate_max(): max_result = {} evaluate_index = ['influence', 'activeness', 'importance'] for evaluate in evaluate_index: query_body = { 'query':{ 'match_all':{} }, 'size':1, 'sort':[{evaluate: {'order': 'desc'}}] } try: result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits'] except Exception, e: raise e max_evaluate = result[0]['_source'][evaluate] max_result[evaluate] = max_evaluate
def ajax_show_task(): # show all working task # "0": unfinish working task # "1": finish working task status = request.args.get("finish", "01") length = len(status) query_body = { "query":{ "filtered":{ "filter":{ } } }, "sort": {"create_at": {"order": "desc"}}, "size": 10000 } if length == 2: category_list = [status[0], status[1]] query_body['query']['filtered']['filter']['terms'] = {"finish": category_list} elif length == 1: query_body['query']['filtered']['filter']['term'] = {"finish": status} else: print "error" search_results = es.search(index=index_manage_sensing_task, doc_type=task_doc_type, body=query_body)['hits']['hits'] results = [] if search_results: for item in search_results: item = item['_source'] history_status = json.loads(item['history_status']) keywords = json.loads(item['keywords']) item['keywords'] = keywords if history_status: temp_list = [] temp_list.append(history_status[-1]) for iter_item in history_status[:-1]: if int(iter_item[-1]) != 0: temp_list.append(iter_item) sorted_list = sorted(temp_list, key=lambda x:x[0], reverse=True) item['history_status'] = sorted_list else: item['history_status'] = history_status results.append(item) return json.dumps(results)
def get_top_all_influence(key, ts): query_body = { "query":{ "match_all": {} }, "sort":{key:{"order":"desc"}}, "size": 1 } index_name = "bci_" + ts2datetime(ts-DAY).replace('-','') exist_es = es.indices.exists(index=index_name) if exist_es: search_result = es.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] else: search_result = {} if search_result: result = search_result[0]['_source'][key] else: result = 2000 return result
def get_evaluate_max(): max_result = {} evaluate_index = ['influence', 'activeness', 'importance', 'sensitive'] for evaluate in evaluate_index: query_body = { 'query':{ 'match_all':{} }, 'size': 1, 'sort':[{evaluate: {'order': 'desc'}}] } try: result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\ body=query_body)['hits']['hits'] except: result = {} try: max_evaluate = result[0]['_source'][evaluate] except: max_evaluate = MAX_VALUE max_result[evaluate] = max_evaluate return max_result
def get_max_value(es, index_name, _type): query_body = { "query": { "match_all": {} }, "sort": { 'bci_week_ave': { "order": "desc" } }, "size": 1 } max_value = 1 try: result = es.search(index=index_name, doc_type=_type, body=query_body)['hits']['hits'] except: result = [] if result: max_value = result[0]['_source']['bci_week_ave'] return max_value
def ajax_get_group_list(): # get all group list from group manage results = [] # query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term": {"task_type": "analysis"}}, {"term": {"status": 1}} # attention------------------------- ] } } } }, "sort": {"submit_date": {"order": "desc"}}, "size": 10000 } search_results = es.search(index=index_group_manage, doc_type=doc_type_group, body=query_body, timeout=600)['hits']['hits'] if search_results: for item in search_results: item = item['_source'] temp = [] temp.append(item['task_name']) temp.append(item['submit_user']) temp.append(item['submit_date']) temp.append(item['count']) temp.append(item.get('state', "")) try: temp.append(json.loads(item['uid_list'])) except: temp.append(item['uid_list']) results.append(temp) return json.dumps(results)
def get_person_value(uid): #认证类型 #print es_user_profile,profile_index_name,profile_index_type,uid try: value_static = es_bci_history.get(index=bci_history_index_name, doc_type=bci_history_index_type, id=uid) value_inf = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid) static = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type, id=uid) except: return 'no' fans_max = es_bci_history.search( index=bci_history_index_name, doc_type=bci_history_index_type, body={ 'query': { 'match_all': {} }, 'sort': { 'user_fansnum': { 'order': 'desc' } }, 'size': 1 })['hits']['hits'][0]['_source']['user_fansnum'] print 'max:', fans_max #print static['found'] if static['found'] == False: return 'no' else: static = static['_source'] #print "static",static try: ver_calue = verified_value[static['verified_type']] except: ver_calue = 0 #账号创建时间 times = math.ceil((time.time() - int(static['create_at'])) / 31536000) #粉丝数 #person = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)['_source'] fans_value = math.log( float(value_static['_source']['user_fansnum']) / float(fans_max) * 9 + 1, 10) #fans_value = (math.log(static['fansnum']+1000000,100000000)-0.75)*4 #if fans_value>1: # fans_value=1.0 influence_max = es_user_portrait.search( index=portrait_index_name, doc_type=portrait_index_type, body={ 'query': { 'match_all': {} }, 'sort': { 'influence': { 'order': 'desc' } }, 'size': 1 })['hits']['hits'][0]['_source']['influence'] influence_value = math.log( float(value_inf['_source']['influence']) / float(influence_max) * 9 + 1, 10) final = (ver_calue * 0.1 + times * 0.1 + fans_value * 0.3 + influence_value * 0.5) * 50 print ver_calue, times, fans_value, influence_value return final
def get_sort(uid, fe): result = {} try: u_bci = es.get(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE, id=uid, fields=['bci_week_ave'])['fields']['bci_week_ave'][0] #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0] result['in_score'] = u_bci except: result['in_score'] = "" query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'range': { 'bci_week_ave': { 'gte': u_bci } } }, { 'term': { 'topic_string': fe } }] } } } } } result['in_top'] = es.search(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE, body=query_body)['hits']['total'] print 'essearch' print es.search(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE, body=query_body) try: u_bci = es.get(index='bci_history', doc_type='bci', id=uid, fields=['bci_week_ave'])['fields']['bci_week_ave'][0] #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0] print "trymax" bci_max = get_max_value(es_user_profile, "bci_history", "bci") print "max", bci_max result['all_score'] = math.log(u_bci / float(bci_max) * 9 + 1, 10) * 100 except: result['all_score'] = "" result['all_top'] = "" query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'range': { 'bci_week_ave': { 'gte': u_bci } } }] } } } } } result['all_top'] = es.search(index='bci_history', doc_type='bci', body=query_body)['hits']['total'] #result = es.search(index='user_portrait_1222', doc_type='user',body=query_body) # return json.dumps([result['hits']['total'],u_bci]) return json.dumps(result)
def imagine(submit_user, uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type): default_setting_dict = query_fields_dict print es,portrait_index_name,portrait_index_type,uid try : personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source'] except: return None user_tag = submit_user + "-tag" user_tag_string = personal_info.get(user_tag, "") if user_tag_string: tag_pairs_list = user_tag_string.split('&') else: tag_pairs_list = [] tag_dict = dict() if tag_pairs_list: for item in tag_pairs_list: iter_pair = item.split('-') tag_dict[iter_pair[0]] = iter_pair[1] keys_list = [] for k, v in query_fields_dict.iteritems(): if v: keys_list.append(k) #需要进行关联的键 keys_list.remove('size') search_dict = {} iter_list = [] tag_attri_vaule = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if iter_key in personal_info: if not personal_info[iter_key] or not query_fields_dict[iter_key]: query_fields_dict.pop(iter_key) continue else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') else: query_fields_dict.pop(iter_key) if tag_dict.get(iter_key,''): tag_attri_vaule.append(iter_key+"-"+tag_dict[iter_key]) if len(iter_list) == 0 and len(tag_attri_vaule) == 0: return [] query_body = { 'query':{ 'function_score':{ 'query':{ 'bool':{ 'must':[ ] } } } } } number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 150 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') if tag_attri_vaule: query_body['query']['function_score']['query']['bool']['must'].append({"terms":{user_tag:tag_attri_vaule}}) for (k,v) in query_fields_dict.items(): temp = {} temp_list = [] if k in personal_info and v != 0: for iter_key in search_dict[k]: temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*', 'boost': v}}}) query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid','uname', 'activeness','importance', 'influence'] evaluate_index_list = ['activeness', 'importance', 'influence'] result_list = [] count = 0 if len(result) > 1 and result: if result[0]['_id'] != uid: top_score = result[0]['_score'] else: top_score = result[1]['_score'] #get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: return_dict = {} if uid == item['_id'] or uid in filter_uid: score = item['_score'] continue for field in field_list: if field == 'uid': uid = item['_source'][field] normal_value = uid return_dict['uid'] = uid elif field in evaluate_index_list: value = item['_source'][field] normal_value = math.log(value / float(evaluate_max_dict[field] )* 9 + 1, 10) * 100 return_dict[field] = normal_value else: normal_value = item['_source'][field] return_dict[field] = normal_value return_dict['similiar'] = item['_score']/float(top_score)*100 result_list.append(return_dict) count += 1 if count == query_number: break #return result_list temp_list = [] for field in field_list: if field in evaluate_index_list: value = personal_info[field] normal_value = math.log(value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100 else: normal_value = personal_info[field] temp_list.append(normal_value) results = [] results.append(temp_list) results.extend(result_list) return results
try: result = es_tag.get(index=attribute_index_name, doc_type=attribute_index_type, id=id_attribute)['_source'] except Exception, e: #raise e return status es_tag.delete(index=attribute_index_name, doc_type=attribute_index_type, id=id_attribute) # delete attribute in user_portrait # user_portrait中,以attribute_name-attribute_value给用户赋值 query = [] portrait_attribute_field = [] attribute_value = result['attribute_value'].split('&') for value in attribute_value: portrait_attribute_field.append(attribute_name +"-"+value) query.append({'match':{submit_user_tag: portrait_attribute_field}}) try: attribute_user_result = es.search(index=user_index_name, doc_type=user_index_type, \ body={'query':{'bool':{'should':query}}, "size": 100000000})['hits']['hits'] except: attribute_user_result = [] if attribute_user_result==[]: status = True return status bulk_action = [] count = 0 for user_dict in attribute_user_result: try: user_item = user_dict['_source'] except: next tmp = user_item[submit_user_tag] delete_set = set(tmp.split('&')) - set(portrait_attribute_field) user_item[submit_user_tag] = "&".join(list(delete_set))
#use to search user_portrait by lots of condition def search_portrait(condition_num, query, sort, size): user_result = [] index_name = 'user_portrait' index_type = 'user' if condition_num > 0: try: #print query result = es_user_portrait.search(index=index_name, doc_type=index_type, \ body={'query':{'bool':{'must':query}}, 'sort':sort, 'size':size})['hits']['hits'] #print 'result:', result except Exception,e: raise e else: try: result = es_user_portrait.search(index=index_name, doc_type=index_type, \ body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size})['hits']['hits'] except Exception, e: raise e if result: #print 'result:', result filter_set = all_delete_uid() # filter_uids_set for item in result: user_dict = item['_source'] score = item['_score'] if not user_dict['uid'] in filter_set: user_result.append([user_dict['uid'], user_dict['uname'], user_dict['location'], user_dict['activeness'], user_dict['importance'], user_dict['influence'], score]) return user_result
status = True return status # use to search attribute table def search_attribute(query_body, condition_num): item_list = [] default_size = 100000 if condition_num==0: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'match_all':{}}, 'size':default_size})['hits']['hits'] except Exception, e: raise e else: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'bool':{'must':query_body}}, 'size':default_size})['hits']['hits'] except Exception, e: raise e if result: for item in result: print 'item:', item source = item['_source'] item_list.append(source) return item_list # use to change attribtue def change_attribute(attribute_name, value, user, state): status = False # identify the attribute_name is in ES - custom attribute try: result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source']
def imagine(uid, query_fields_dict, index_name=portrait_index_name, doctype=portrait_index_type): """ uid: search users relate to uid query_fields_dict: defined search field weight fields: domain, topic_string, keywords, activity_geo, hashtag, character_sentiment, character_text for example: "domain": 2 domain, psycho_feature """ personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)["_source"] keys_list = query_fields_dict.keys() # 需要进行关联的键 keys_list.remove("field") keys_list.remove("size") search_dict = {} iter_list = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if personal_info[iter_key] == "" or not personal_info[iter_key]: query_fields_dict.pop(iter_key) else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split("&") if len(iter_list) == 0: return [] query_body = {"query": {"function_score": {"query": {"bool": {"must": []}}, "field_value_factor": {}}}} score_standard = {} score_standard["modifier"] = "log1p" if query_fields_dict["field"] == "activeness": score_standard["field"] = "activeness" score_standard["factor"] = 100 elif query_fields_dict["field"] == "importance": score_standard["field"] = "importance" score_standard["factor"] = 0.01 elif query_fields_dict["field"] == "influence": score_standard["field"] = "influence" score_standard["factor"] = 0.1 else: score_standard["field"] = "influence" score_standard["factor"] = 0 query_body["query"]["function_score"]["boost_mode"] = "sum" query_body["query"]["function_score"]["field_value_factor"] = score_standard query_fields_dict.pop("field") number = es.count(index=index_name, doc_type=doctype, body=query_body)["count"] query_body["size"] = 150 # default number query_number = query_fields_dict["size"] # required number query_fields_dict.pop("size") for (k, v) in query_fields_dict.items(): temp = {} temp_list = [] for iter_key in search_dict[k]: temp_list.append({"wildcard": {k: {"wildcard": "*" + iter_key + "*", "boost": v}}}) query_body["query"]["function_score"]["query"]["bool"]["must"].append({"bool": {"should": temp_list}}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)["hits"]["hits"] field_list = ["uid", "uname", "activeness", "importance", "influence"] evaluate_index_list = ["activeness", "importance", "influence"] return_list = [] count = 0 if number > 1 and result: if result[0]["_id"] != uid: top_score = result[0]["_score"] else: top_score = result[1]["_score"] # get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: if uid == item["_id"] or uid in filter_uid: score = item["_score"] continue info = [] for field in field_list: if field in evaluate_index_list: value = item["_source"][field] normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100 else: normal_value = item["_source"][field] info.append(normal_value) info.append(item["_score"] / top_score * 100) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: temp_list.append(personal_info[field]) results = [] results.append(temp_list) results.extend(return_list) return results
def imagine(uid, query_fields_dict, index_name=portrait_index_name, doctype=portrait_index_type): personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source'] keys_list = query_fields_dict.keys() #需要进行关联的键 keys_list.remove('size') search_dict = {} iter_list = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if personal_info[iter_key] == '' or not personal_info[iter_key]: query_fields_dict.pop(iter_key) else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') if len(iter_list) == 0: return [] query_body = { 'query': { 'function_score': { 'query': { 'bool': { 'must': [] } } } } } """ score_standard = {} score_standard["modifier"] = "log1p" if query_fields_dict['field'] == "activeness": score_standard['field'] = "activeness" score_standard['factor'] = 100 elif query_fields_dict['field'] == "importance": score_standard['field'] = "importance" score_standard['factor'] = 0.01 elif query_fields_dict['field'] == 'influence': score_standard['field'] = "influence" score_standard['factor'] = 0.1 else: score_standard['field'] = "influence" score_standard['factor'] = 0 query_body['query']['function_score']['boost_mode'] = "sum" query_body['query']['function_score']['field_value_factor'] = score_standard """ number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 150 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') for (k, v) in query_fields_dict.items(): temp = {} temp_list = [] for iter_key in search_dict[k]: temp_list.append({ 'wildcard': { k: { 'wildcard': '*' + iter_key + '*', 'boost': v } } }) query_body['query']['function_score']['query']['bool']['must'].append( {'bool': { 'should': temp_list }}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid', 'uname', 'activeness', 'importance', 'influence'] evaluate_index_list = ['activeness', 'importance', 'influence'] return_list = [] count = 0 if number > 1 and result: if result[0]['_id'] != uid: top_score = result[0]['_score'] else: top_score = result[1]['_score'] #get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: if uid == item['_id'] or uid in filter_uid: score = item['_score'] continue info = [] for field in field_list: if field in evaluate_index_list: value = item['_source'][field] normal_value = math.log( value / evaluate_max_dict[field] * 9 + 1, 10) * 100 else: normal_value = item['_source'][field] info.append(normal_value) info.append(item['_score'] / top_score * 100) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: if field in evaluate_index_list: value = personal_info[field] normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100 else: normal_value = personal_info[field] temp_list.append(normal_value) results = [] results.append(temp_list) results.extend(return_list) return results
def conclusion_on_influence(uid): # test index_name = copy_portrait_index_name index_type = copy_portrait_index_type total_number = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type)["count"] try: influ_result = es.get(index=index_name, doc_type=index_type, id=uid)['_source'] except: influ_result = {} result = [0, 0, 0, 0, 0, 0, total_number ] # aver_activeness, sorted, aver_influence, sorted return result aver_activeness = influ_result.get("aver_activeness", 0) aver_influence = influ_result.get("aver_influence", 0) aver_importance = influ_result.get('aver_importance', 0) influence_query_body = { "query": { "match_all": {} }, "sort": { "aver_influence": { "order": "desc" } }, "size": 1 } top_influence = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body)['hits']['hits'][0]['sort'][0] importance_query_body = { "query": { "match_all": {} }, "sort": { "aver_importance": { "order": "desc" } }, "size": 1 } top_importance = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body)['hits']['hits'][0]['sort'][0] activeness_query_body = { "query": { "match_all": {} }, "sort": { "aver_activeness": { "order": "desc" } }, "size": 1 } top_activeness = es.search( index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body)['hits']['hits'][0]['sort'][0] influence_query_body = { "query": { "filtered": { "filter": { "range": { "aver_influence": { "gt": aver_influence } } } } } } activeness_query_body = { "query": { "filtered": { "filter": { "range": { "aver_activeness": { "gt": aver_activeness } } } } } } importance_query_body = { "query": { "filtered": { "filter": { "range": { "aver_importance": { "gt": aver_importance } } } } } } influence_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body)['count'] activeness_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body)['count'] importance_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body)['count'] result = [ int(aver_activeness * 100.0 / top_activeness), activeness_count, int(aver_influence * 100.0 / top_influence), influence_count, int(aver_importance * 100.0 / top_importance), importance_count, total_number ] return result
es.index(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name, body=input_data) status = True return status # use to search attribute table def search_attribute(query_body, condition_num): result = [] if condition_num==0: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'match_all':{}}})['hits']['hits'] except Exception, e: raise e else: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'bool':{'must':query_body}}})['hits']['hits'] except Exception, e: raise e if result: for item in result: source = item['_source'] result.append(source) return result # use to change attribtue def change_attribute(attribute_name, value, user, state): status = False # identify the attribute_name is in ES - custom attribute try: result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except:
def imagine(uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type): personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source'] keys_list = query_fields_dict.keys() #需要进行关联的键 keys_list.remove('size') search_dict = {} iter_list = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if personal_info[iter_key] == '' or not personal_info[iter_key]: query_fields_dict.pop(iter_key) else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') if len(iter_list) == 0: return [] query_body = { 'query':{ 'function_score':{ 'query':{ 'bool':{ 'must':[ ] } } } } } """ score_standard = {} score_standard["modifier"] = "log1p" if query_fields_dict['field'] == "activeness": score_standard['field'] = "activeness" score_standard['factor'] = 100 elif query_fields_dict['field'] == "importance": score_standard['field'] = "importance" score_standard['factor'] = 0.01 elif query_fields_dict['field'] == 'influence': score_standard['field'] = "influence" score_standard['factor'] = 0.1 else: score_standard['field'] = "influence" score_standard['factor'] = 0 query_body['query']['function_score']['boost_mode'] = "sum" query_body['query']['function_score']['field_value_factor'] = score_standard """ number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 150 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') for (k,v) in query_fields_dict.items(): temp = {} temp_list = [] for iter_key in search_dict[k]: temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}}) query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid','uname', 'activeness','importance', 'influence'] evaluate_index_list = ['activeness', 'importance', 'influence'] return_list = [] count = 0 if number > 1 and result: if result[0]['_id'] != uid: top_score = result[0]['_score'] else: top_score = result[1]['_score'] #get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: if uid == item['_id'] or uid in filter_uid: score = item['_score'] continue info = [] for field in field_list: if field in evaluate_index_list: value = item['_source'][field] normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100 else: normal_value = item['_source'][field] info.append(normal_value) info.append(item['_score']/top_score*100) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: if field in evaluate_index_list: value = personal_info[field] normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100 else: normal_value = personal_info[field] temp_list.append(normal_value) results = [] results.append(temp_list) results.extend(return_list) return results
return status es_tag.delete(index=attribute_index_name, doc_type=attribute_index_type, id=id_attribute) # delete attribute in user_portrait # user_portrait中,以attribute_name-attribute_value给用户赋值 query = [] portrait_attribute_field = [] print "result", result attribute_value = result['attribute_value'].split('&') print "attribute_value", attribute_value for value in attribute_value: portrait_attribute_field.append(attribute_name + "-" + value) query.append({'terms': {submit_user_tag: portrait_attribute_field}}) try: attribute_user_result = es.search(index=user_index_name, doc_type=user_index_type, \ body={'query':{'bool':{'must':query}}, "size": 100000000})['hits']['hits'] except: attribute_user_result = [] if attribute_user_result == []: status = True return status bulk_action = [] count = 0 for user_dict in attribute_user_result: try: user_item = user_dict['_source'] except: next tmp = user_item[submit_user_tag] delete_set = set(tmp.split('&')) - set(portrait_attribute_field) print "delete_set", delete_set
def search_task(task_name, submit_date, state, status, submit_user): results = [] query = [] condition_num = 0 if task_name: task_name_list = task_name.split(' ') for item in task_name_list: query.append({'wildcard':{'task_name': '*' + item + '*'}}) condition_num += 1 if submit_date: submit_date_ts = datetime2ts(submit_date) submit_date_start = submit_date_ts submit_date_end = submit_date_ts + DAY query.append({'range':{'submit_date': {'gte': submit_date_start, 'lt': submit_date_end}}}) condition_num += 1 if state: state_list = state.split(' ') for item in state_list: query.append({'wildcard':{'state': '*' + item + '*'}}) condition_num += 1 if status: query.append({'match':{'status': status}}) condition_num += 1 if submit_user: query.append({'term':{'submit_user': submit_user}}) condition_num += 1 if condition_num > 0: query.append({'term':{'task_type': 'analysis'}}) try: source = es_group_result.search( index = group_index_name, doc_type = group_index_type, body = { 'query':{ 'bool':{ 'must':query } }, 'sort': [{'count':{'order': 'desc'}}], 'size': MAX_VALUE } ) except Exception as e: raise e else: query.append({'term':{'task_type': 'analysis'}}) source = es.search( index = group_index_name, doc_type = group_index_type, body = { 'query':{'bool':{ 'must':query } }, 'sort': [{'count': {'order': 'desc'}}], 'size': MAX_VALUE } ) try: task_dict_list = source['hits']['hits'] except: return None result = [] for task_dict in task_dict_list: try: state = task_dict['_source']['state'] except: state = '' try: status = task_dict['_source']['status'] except: status = 0 result.append([task_dict['_source']['task_name'], task_dict['_source']['submit_date'], task_dict['_source']['count'], state, status]) return result
result = es_tag.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except Exception, e: raise e return status es_tag.delete(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name) # delete attribute in user_portrait query = [] attribute_value = result['attribute_value'].split('&') for value in attribute_value: query.append({'match': {attribute_name: value}}) try: attribute_user_result = es.search(index=user_index_name, doc_type=user_index_type, \ body={'query':{'bool':{'should':query}}})['hits']['hits'] except: attribute_user_result = [] if attribute_user_result == []: status = True return status bulk_action = [] for user_dict in attribute_user_result: try: user_item = user_dict['_source'] except: next user_item.pop(attribute_name) user = user_item['uid'] action = {'index': {'_id': str(user)}} bulk_action.extend([action, user_item])
def imagine(submit_user, uid, query_fields_dict, index_name=portrait_index_name, doctype=portrait_index_type): default_setting_dict = query_fields_dict personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source'] user_tag = submit_user + "-tag" user_tag_string = personal_info.get(user_tag, "") if user_tag_string: tag_pairs_list = user_tag_string.split('&') else: tag_pairs_list = [] tag_dict = dict() if tag_pairs_list: for item in tag_pairs_list: iter_pair = item.split('-') tag_dict[iter_pair[0]] = iter_pair[1] keys_list = [] for k, v in query_fields_dict.iteritems(): if v: keys_list.append(k) #需要进行关联的键 keys_list.remove('size') search_dict = {} iter_list = [] tag_attri_vaule = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if iter_key in personal_info: if not personal_info[iter_key] or not query_fields_dict[iter_key]: query_fields_dict.pop(iter_key) continue else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') else: query_fields_dict.pop(iter_key) if tag_dict.get(iter_key, ''): tag_attri_vaule.append(iter_key + "-" + tag_dict[iter_key]) if len(iter_list) == 0 and len(tag_attri_vaule) == 0: return [] query_body = { 'query': { 'function_score': { 'query': { 'bool': { 'must': [] } } } } } number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 150 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') if tag_attri_vaule: query_body['query']['function_score']['query']['bool']['must'].append( {"terms": { user_tag: tag_attri_vaule }}) for (k, v) in query_fields_dict.items(): temp = {} temp_list = [] if k in personal_info and v != 0: for iter_key in search_dict[k]: temp_list.append({ 'wildcard': { k: { 'wildcard': '*' + iter_key + '*', 'boost': v } } }) query_body['query']['function_score']['query']['bool'][ 'must'].append({'bool': { 'should': temp_list }}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid', 'uname', 'activeness', 'importance', 'influence'] evaluate_index_list = ['activeness', 'importance', 'influence'] return_list = [] count = 0 if len(result) > 1 and result: if result[0]['_id'] != uid: top_score = result[0]['_score'] else: top_score = result[1]['_score'] #get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: if uid == item['_id'] or uid in filter_uid: score = item['_score'] continue info = [] for field in field_list: if field in evaluate_index_list: value = item['_source'][field] normal_value = math.log( value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100 else: normal_value = item['_source'][field] if not normal_value: normal_value = item['_id'] info.append(normal_value) info.append(item['_score'] / float(top_score) * 100) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: if field in evaluate_index_list: value = personal_info[field] normal_value = math.log( value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100 else: normal_value = personal_info[field] temp_list.append(normal_value) results = [] results.append(temp_list) results.extend(return_list) results.append(default_setting_dict) return results
def search_task(task_name, submit_date, state, status, submit_user): results = [] query = [] condition_num = 0 if task_name: task_name_list = task_name.split(' ') for item in task_name_list: query.append({'wildcard': {'task_name': '*' + item + '*'}}) condition_num += 1 if submit_date: submit_date_ts = datetime2ts(submit_date) submit_date_start = submit_date_ts submit_date_end = submit_date_ts + DAY query.append({ 'range': { 'submit_date': { 'gte': submit_date_start, 'lt': submit_date_end } } }) condition_num += 1 if state: state_list = state.split(' ') for item in state_list: query.append({'wildcard': {'state': '*' + item + '*'}}) condition_num += 1 if status: query.append({'match': {'status': status}}) condition_num += 1 if submit_user: query.append({'term': {'submit_user': submit_user}}) condition_num += 1 if condition_num > 0: query.append({'term': {'task_type': 'analysis'}}) try: source = es_group_result.search(index=group_index_name, doc_type=group_index_type, body={ 'query': { 'bool': { 'must': query } }, 'sort': [{ 'count': { 'order': 'desc' } }], 'size': MAX_VALUE }) except Exception as e: raise e else: query.append({'term': {'task_type': 'analysis'}}) source = es.search(index=group_index_name, doc_type=group_index_type, body={ 'query': { 'bool': { 'must': query } }, 'sort': [{ 'count': { 'order': 'desc' } }], 'size': MAX_VALUE }) try: task_dict_list = source['hits']['hits'] except: return None result = [] for task_dict in task_dict_list: try: state = task_dict['_source']['state'] except: state = '' try: status = task_dict['_source']['status'] except: status = 0 result.append([ task_dict['_source']['task_name'], task_dict['_source']['submit_date'], task_dict['_source']['count'], state, status ]) return result
def imagine(uid, query_fields_dict,index_name="user_portrait", doctype='user'): """ uid: search users relate to uid query_fields_dict: defined search field weight fields: domain, topic, keywords, psycho_status, psycho_feature, activity_geo, hashtag for example: "domain": 2 domain, psycho_feature """ personal_info = es.get(index="user_portrait", doc_type="user", id=uid, _source=True)['_source'] keys_list = query_fields_dict.keys() keys_list.remove('field') keys_list.remove('size') search_dict = {} iter_list = [] for iter_key in keys_list: if personal_info[iter_key] == '' or not personal_info[iter_key]: query_fields_dict.pop(iter_key) else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') if len(iter_list) == 0: return [] query_body = { 'query':{ 'function_score':{ 'query':{ 'bool':{ 'must':[ ] } }, "field_value_factor":{ } } } } score_standard = {} score_standard["modifier"] = "log1p" if query_fields_dict['field'] == "activeness": score_standard['field'] = "activeness" score_standard['factor'] = 100 elif query_fields_dict['field'] == "importance": score_standard['field'] = "importance" score_standard['factor'] = 0.01 elif query_fields_dict['field'] == 'influence': score_standard['field'] = "influence" score_standard['factor'] = 0.1 else: score_standard['field'] = "influence" score_standard['factor'] = 0 query_body['query']['function_score']['boost_mode'] = "sum" query_body['query']['function_score']['field_value_factor'] = score_standard query_fields_dict.pop('field') number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 100 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') for (k,v) in query_fields_dict.items(): temp = {} temp_list = [] for iter_key in search_dict[k]: temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}}) query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}}) filter_uid = all_delete_uid() result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid','uname', 'activeness','importance', 'influence'] return_list = [] count = 0 for item in result: if uid == item['_id'] or uid in filter_uid: score = item['_score'] continue info = [] for field in field_list: info.append(item['_source'][field]) info.append(item['_score']) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: temp_list.append(personal_info[field]) results = [] results.append(temp_list) results.extend(return_list) return results