def get_user_portrait(uid_list, specify_field=[]): if not uid_list: return [] results = [] max_result = get_evaluate_max() fields_list = ['uname','domain','topic_string','politics','fansnum','statusnum','friendsnum','location'] index_list = ["activeness", 'importance', 'influence', 'sensitive'] search_results = es.mget(index=portrait_index_name,doc_type=portrait_index_type,body={"ids":uid_list}, _source=False, \ fields=['uname','domain','topic_string','politics','fansnum','statusnum','friendsnum','location','activeness','importance','influence','sensitive'])["docs"] if specify_field: fields_list = specify_field for item in search_results: iter_result = [] iter_result.append(item['_id']) if item['found']: for iter_field in fields_list: if iter_field == "topic_string": iter_result.append(item['fields'][iter_field][0].split('&')) else: iter_result.append(item['fields'][iter_field][0]) for iter_field in index_list: index_value = item['fields'][iter_field][0] normal_value = normalize_index(index_value, max_result[iter_field]) iter_result.append(normal_value) else: iter_result.extend(['']*12) results.append(iter_result) return results
def get_user_portrait(uid_list, specify_field=[]): if not uid_list: return [] results = [] max_result = get_evaluate_max() fields_list = [ 'uname', 'domain', 'topic_string', 'politics', 'fansnum', 'statusnum', 'friendsnum', 'location' ] index_list = ["activeness", 'importance', 'influence', 'sensitive'] search_results = es.mget(index=portrait_index_name,doc_type=portrait_index_type,body={"ids":uid_list}, _source=False, \ fields=['uname','domain','topic_string','politics','fansnum','statusnum','friendsnum','location','activeness','importance','influence','sensitive'])["docs"] if specify_field: fields_list = specify_field for item in search_results: iter_result = [] iter_result.append(item['_id']) if item['found']: for iter_field in fields_list: if iter_field == "topic_string": iter_result.append( item['fields'][iter_field][0].split('&')) else: iter_result.append(item['fields'][iter_field][0]) for iter_field in index_list: index_value = item['fields'][iter_field][0] normal_value = normalize_index(index_value, max_result[iter_field]) iter_result.append(normal_value) else: iter_result.extend([''] * 12) results.append(iter_result) return results
def get_user_portrait_byidname(uid, isuid=True, specify_field=[]): uid_list = [uid] results = [] max_result = get_evaluate_max() fields_list = ['uname','domain','topic_string','politics','fansnum','statusnum','friendsnum','location', 'hashtag', 'activity_geo', 'keywords_string'] if specify_field: fields_list = specify_field if isuid: search_results = es.mget(index=portrait_index_name,doc_type=portrait_index_type,body={"ids":uid_list}, _source=False, \ fields=['uname','domain','topic_string','politics','fansnum','statusnum', 'hashtag_string', 'activity_geo', 'friendsnum','location','activeness','importance','influence','sensitive', 'keywords_dict'])["docs"] for item in search_results: iter_result = [] iter_result.append(item['_id']) if item['found']: for iter_field in fields_list: if iter_field == "topic_string": iter_result.append(item['fields'][iter_field][0]) #iter_result.append(item['fields'][iter_field][0].split('&')) elif iter_field == "keywords_dict": iter_result.append(json.loads(item['fields'][iter_field][0])) else: iter_result.append(item['fields'][iter_field][0]) else: iter_result = None results.append(iter_result) else: query_body = { "query":{ "bool": { "should": [ {"term": {"uname": uid}} ] } }, "size": 1 } search_results = es.search(index=portrait_index_name,doc_type=portrait_index_type,body=query_body, \ fields=['uname','domain','topic_string','politics','fansnum','statusnum', 'hashtag_string', 'activity_geo', 'friendsnum','location','activeness','importance','influence','sensitive', 'keywords_dict'])['hits']['hits'] if len(search_results) == 0: results.append(None) for item in search_results: iter_result = [] iter_result.append(item['_id']) for iter_field in fields_list: if iter_field == "topic_string": iter_result.append(item['fields'][iter_field][0]) #iter_result.append(item['fields'][iter_field][0].split('&')) elif iter_field == "keywords_dict": iter_result.append(json.loads(item['fields'][iter_field][0])) else: iter_result.append(item['fields'][iter_field][0]) results.append(iter_result) return results
def filter_in(top_user_set): results = [] try: in_results = es_sensitive_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':list(top_user_set)}) except Exception as e: print Exception, e filter_list = [item['_id'] for item in in_results['docs'] if item['found'] is True] results = set(top_user_set) - set(filter_list) return results
def filter_in(top_user_set): results = [] try: in_results = es_sensitive_user_portrait.mget(index='user_portrait', doc_type='user', body={'ids':list(top_user_set)}) except Exception as e: raise e filter_list = [item['_id'] for item in in_results['docs'] if item['found'] is True] print 'before filter in: ', len(top_user_set) print 'filter_list: ', len(filter_list) results = set(top_user_set) - set(filter_list) #print 'after filter in: ', len(results) return results
def filter_in(top_user_set): results = [] try: in_results = es_sensitive_user_portrait.mget( index=portrait_index_name, doc_type=portrait_index_type, body={'ids': list(top_user_set)}) except Exception as e: print Exception, e filter_list = [ item['_id'] for item in in_results['docs'] if item['found'] is True ] results = set(top_user_set) - set(filter_list) return results
def filter_in(top_user_set): results = [] try: in_results = es_sensitive_user_portrait.mget( index='user_portrait', doc_type='user', body={'ids': list(top_user_set)}) except Exception as e: raise e filter_list = [ item['_id'] for item in in_results['docs'] if item['found'] is True ] print 'before filter in: ', len(top_user_set) print 'filter_list: ', len(filter_list) results = set(top_user_set) - set(filter_list) #print 'after filter in: ', len(results) return results
def get_user_portrait_byidname(uid, isuid=True, specify_field=[]): uid_list = [uid] results = [] max_result = get_evaluate_max() fields_list = [ 'uname', 'domain', 'topic_string', 'politics', 'fansnum', 'statusnum', 'friendsnum', 'location', 'hashtag', 'activity_geo', 'keywords_string' ] if specify_field: fields_list = specify_field if isuid: search_results = es.mget(index=portrait_index_name,doc_type=portrait_index_type,body={"ids":uid_list}, _source=False, \ fields=['uname','domain','topic_string','politics','fansnum','statusnum', 'hashtag_string', 'activity_geo', 'friendsnum','location','activeness','importance','influence','sensitive', 'keywords_dict'])["docs"] for item in search_results: iter_result = [] iter_result.append(item['_id']) if item['found']: for iter_field in fields_list: if iter_field == "topic_string": iter_result.append(item['fields'][iter_field][0]) #iter_result.append(item['fields'][iter_field][0].split('&')) elif iter_field == "keywords_dict": iter_result.append( json.loads(item['fields'][iter_field][0])) else: iter_result.append(item['fields'][iter_field][0]) else: iter_result = None results.append(iter_result) else: query_body = { "query": { "bool": { "should": [{ "term": { "uname": uid } }] } }, "size": 1 } search_results = es.search(index=portrait_index_name,doc_type=portrait_index_type,body=query_body, \ fields=['uname','domain','topic_string','politics','fansnum','statusnum', 'hashtag_string', 'activity_geo', 'friendsnum','location','activeness','importance','influence','sensitive', 'keywords_dict'])['hits']['hits'] if len(search_results) == 0: results.append(None) for item in search_results: iter_result = [] iter_result.append(item['_id']) for iter_field in fields_list: if iter_field == "topic_string": iter_result.append(item['fields'][iter_field][0]) #iter_result.append(item['fields'][iter_field][0].split('&')) elif iter_field == "keywords_dict": iter_result.append( json.loads(item['fields'][iter_field][0])) else: iter_result.append(item['fields'][iter_field][0]) results.append(iter_result) return results