def search_specified_group(postname="报"): query_body = { "query":{ "bool": { "must": [ {"wildcard": { "uname": { "wildcard": "*" + postname } }}, {"range": { "fansnum": { "gte": 100000 } }} ] } }, "size": 10000 } search_results = es_profile.search(index="user_portrait_1222", doc_type="user", body=query_body)["hits"]["hits"] uid_list = [] for item in search_results: uid_list.append(item['_id']) print item['_id'], item['_source']['uname'], '\n' print "该群体有:", len(uid_list) return uid_list
def nickname2uid(nickname_list): uids_list = set() query_body = { 'query': { 'filtered': { 'filter': { 'terms': { 'nick_name': nickname_list } } } }, 'size': MAX_SEARCH_SIZE } es_results = es_user_profile.search(index=profile_index_name,doc_type=profile_index_type,\ body=query_body)['hits']['hits'] #print 'es_results:::',es_results if es_results: for result in es_results: result = result['_source'] uid = result['uid'] uids_list.add(uid) uids_list = list(uids_list) #print 'uids_list::',uids_list return uids_list
def get_fansnum_max(): query_body = { 'query':{ 'match_all':{} }, 'size': 1, 'sort': [{'fansnum': {'order': 'desc'}}] } try: fansnum_max_results = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body=query_body)['hits']['hits'] except Exception, e: raise e
def sort_total_number(prefix, uid_list, time, key_search, number): if prefix == "weibo_": if int(time) == 1: order = prefix + 'day_last' elif int(time) == 7: order = prefix + 'week_sum' elif int(time) == 30: order = prefix + 'month_sum' else: order = prefix if uid_list: query_body = { "query": { "filtered": { "filter": { "terms": { "uid": uid_list } } } }, "sort": { order: { "order": "desc" } }, "size": number } else: query_body = { "query": { "match_all": {} }, "sort": { order: { "order": "desc" } }, "size": number } search_results = es_user_profile.search(index="bci_history", doc_type="bci", body=query_body, _source=False)['hits']['hits'] uid_list = [] if search_results: for item in search_results: uid_list.append(item['_id']) return uid_list
def search_specified_group(postname="律师"): query_body = { "query": { "bool": { "must": [{ "wildcard": { "nick_name": { "wildcard": "*" + postname } } }, { "range": { "fansnum": { "gte": 10000 } } }] } }, "size": 10000 } search_results = es_profile.search(index="weibo_user", doc_type="user", body=query_body)["hits"]["hits"] uid_list = [] for item in search_results: uid_list.append(item['_id']) print item['_id'], item['_source']['nick_name'], '\n' print "该群体有:", len(uid_list) result = dict() result['social_sensors'] = json.dumps(uid_list) result['stop_time'] = 1460086441 result['create_at'] = 1377964800 result['task_name'] = "top lawers" result['remark'] = "粉丝10000以上的律师" result["history_status"] = json.dumps([]) result['burst_reason'] = '' result['processing_status'] = "1" result["warning_status"] = '0' result["finish"] = "0" result["create_by"] = 'admin' es_profile.index(index="manage_sensing_task", doc_type='task', id='admin-top lawers', body=result) print "1" return uid_list
def get_evaluate_max(index_name,index_type,field): query_body = { 'query':{ 'match_all':{} }, 'size':1, 'sort':[{field: {'order': 'desc'}}] } try: result = es_user_profile.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] max_evaluate = result[0]['_source'][field] except Exception, e: raise e max_evaluate = 1
def search_specified_group(postname="律师"): query_body = { "query":{ "bool": { "must": [ {"wildcard": { "nick_name": { "wildcard": "*" + postname } }}, {"range": { "fansnum": { "gte": 10000 } }} ] } }, "size": 10000 } search_results = es_profile.search(index="weibo_user", doc_type="user", body=query_body)["hits"]["hits"] uid_list = [] for item in search_results: uid_list.append(item['_id']) print item['_id'], item['_source']['nick_name'], '\n' print "该群体有:", len(uid_list) result = dict() result['social_sensors'] = json.dumps(uid_list) result['stop_time'] = 1460086441 result['create_at'] = 1377964800 result['task_name'] = "top lawers" result['remark'] = "粉丝10000以上的律师" result["history_status"] = json.dumps([]) result['burst_reason'] = '' result['processing_status'] = "1" result["warning_status"] = '0' result["finish"] = "0" result["create_by"] = 'admin' es_profile.index(index="manage_sensing_task", doc_type='task', id='admin-top lawers', body=result) print "1" return uid_list
def sort_total_number(prefix, uid_list, time, key_search, number): if prefix == "weibo_": if int(time) == 1: order = prefix + 'day_last' elif int(time) == 7: order = prefix + 'week_sum' elif int(time) == 30: order = prefix + 'month_sum' else: order = prefix if uid_list: query_body = { "query":{ "filtered": { "filter": { "terms":{ "uid": uid_list } } } }, "sort":{order: {"order": "desc"}}, "size": number } else: query_body = { "query":{ "match_all": {} }, "sort": { order: {"order": "desc"} }, "size": number } search_results = es_user_profile.search(index="bci_history", doc_type="bci", body=query_body, _source=False)['hits']['hits'] uid_list = [] if search_results: for item in search_results: uid_list.append(item['_id']) return uid_list
def count_minweibouser_influence(index_name): query_body = { 'query': { 'match_all': {} }, 'size': 1, 'sort': { 'user_index': { 'order': 'asc' } } } try: max_result = es_user_profile.search(index=index_name, doc_type=weibo_bci_index_type, body=query_body)['hits']['hits'] for item in max_result: max_user_index = item['_source']['user_index'] except: max_user_index = 0 return max_user_index
def lookup_active_weibouser(today_date_time): weibo_active_user_index_name = weibo_active_user_index_name_pre + ts2datetime( today_date_time) weibo_active_user_mappings(weibo_active_user_index_name) bci_index_name = weibo_bci_index_name_pre + ''.join( ts2datetime(today_date_time).split('-')) # userlist = lookup_weiboxnr_concernedusers(weiboxnr_id) user_max_index = count_maxweibouser_influence(bci_index_name) user_min_index = count_minweibouser_influence(bci_index_name) results = [] query_body = { 'query': { 'match_all': {} }, 'size': 100, #查询影响力排名前50的用户即可 'sort': { 'user_index': { 'order': 'desc' } } } try: flow_text_exist=es_user_portrait.search(index=bci_index_name,\ doc_type=weibo_bci_index_type,body=query_body)['hits']['hits'] search_uid_list = [item['_source']['user'] for item in flow_text_exist] print len(search_uid_list) weibo_user_exist = es_user_profile.search(index=profile_index_name,\ doc_type=profile_index_type,body={'query':{'terms':{'uid':search_uid_list}}})['hits']['hits'] weibo_user_dict = dict() #user_dict = dict() for item_i in weibo_user_exist: uid = item_i['_source']['uid'] weibo_user_dict[uid] = item_i['_source'] for item in flow_text_exist: user_dict = dict() #print 'item:', item['_source'] user_dict['influence'] = (item['_source']['user_index'] - user_min_index) / (user_max_index - user_min_index) user_dict['fans_num'] = item['_source']['user_fansnum'] user_dict['friends_num'] = item['_source']['user_friendsnum'] user_dict['total_number'] = item['_source']['total_number'] user_dict['uid'] = item['_source']['user'] try: uid = user_dict['uid'] weibo_user_info = weibo_user_dict[uid] user_dict['uname'] = weibo_user_info['nick_name'] user_dict['location'] = weibo_user_info['user_location'] user_dict['url'] = weibo_user_info['photo_url'] except: user_dict['uname'] = '' user_dict['location'] = '' user_dict['url'] = '' #es_xnr.index(index_name= weibo_active_user_index_name,doc_type= weibo_active_user_index_type,body=user_dict,id=user_dict['uid']) results.append(user_dict) except: results = [] print len(results) return results