def count_maxweibouser_influence(end_time): date_time = ts2datetimestr(end_time) index_name = weibo_bci_index_name_pre + date_time print 'max index_name:', index_name query_body = { 'query': { 'match_all': {} }, 'size': 1, 'sort': { 'user_index': { 'order': 'desc' } } } try: #if S_TYPE == 'test': # temp_index_name='bci_20161121' # max_result=es_user_profile.search(index=temp_index_name,doc_type=weibo_bci_index_type,body=query_body)['hits']['hits'] #else: max_result = es_user_profile.search(index=index_name, doc_type=weibo_bci_index_type, body=query_body)['hits']['hits'] for item in max_result: max_user_index = item['_source']['user_index'] except: max_user_index = 1 return max_user_index
def get_nick_name_unique(nick_name): query_body = { 'query':{ 'term':{'nick_name':nick_name} } } es_profile_results = es_user_profile.search(index=profile_index_name,doc_type=profile_index_type,body=query_body)['hits']['hits'] es_xnr_results = es.search(index=weibo_xnr_index_name,doc_type=weibo_xnr_index_type,body=query_body)['hits']['hits'] if es_profile_results and es_xnr_results: mark = False else: mark = True return mark
def lookup_active_weibouser(classify_id, weiboxnr_id, start_time, end_time): time_gap = end_time - start_time now_time = time.time() test_time_gap = datetime2ts( ts2datetime(now_time)) - datetime2ts(S_DATE_BCI) #print 'from, to:', ts2date(start_time), ts2date(end_time) today_date_time = end_time - DAY if S_TYPE == 'test': today_date_time = datetime2ts(S_DATE_BCI) start_time = start_time - test_time_gap end_time = end_time - test_time_gap from_date_ts = datetime2ts(ts2datetime(start_time)) to_date_ts = datetime2ts(ts2datetime(end_time)) #print 's_date_bci:', S_DATE_BCI #print 'from_date_ts, to_date_ts:', ts2date(from_date_ts), ts2date(to_date_ts) bci_index_name = weibo_bci_index_name_pre + ''.join( ts2datetime(today_date_time).split('-')) print 'bci_index_name:', bci_index_name print 'end_time:', ts2date(end_time) #step1: users condition #make sure the users range by classify choice userlist = lookup_weiboxnr_concernedusers(weiboxnr_id) if classify_id == 1: #concrenedusers condition_list = [{'bool': {'must': {'terms': {'uid': userlist}}}}] elif classify_id == 2: #unconcrenedusers condition_list = [{ 'bool': { 'must_not': [{ 'terms': { 'uid': userlist } }] } }] elif classify_id == 0: condition_list = [{'match_all': {}}] print userlist, classify_id, condition_list #step 2:lookup users user_max_index = count_maxweibouser_influence(end_time - DAY) results = [] for item in condition_list: query_body = { 'query': item, 'size': HOT_WEIBO_NUM, #查询影响力排名前50的用户即可 'sort': { 'user_index': { 'order': 'desc' } } } try: #print 'query_body:', query_body flow_text_exist=es_user_portrait.search(index=bci_index_name,\ doc_type=weibo_bci_index_type,body=query_body)['hits']['hits'] search_uid_list = [ item['_source']['user'] for item in flow_text_exist ] weibo_user_exist = es_user_profile.search(index=profile_index_name,\ doc_type=profile_index_type,body={'query':{'terms':{'uid':search_uid_list}}})['hits']['hits'] #print 'weibo_user_exist:', weibo_user_exist weibo_user_dict = dict() for item in weibo_user_exist: uid = item['_source']['uid'] weibo_user_dict[uid] = item['_source'] for item in flow_text_exist: #print 'item:', item['_source'] influence = item['_source']['user_index'] / user_max_index * 100 fans_num = item['_source']['user_fansnum'] friends_num = item['_source']['user_friendsnum'] total_number = item['_source']['total_number'] uid = item['_source']['user'] try: weibo_user_info = weibo_user_dict[uid] uname = weibo_user_info['nick_name'] location = weibo_user_info['user_location'] url = weibo_user_info['photo_url'] except: uname = '' location = '' url = '' #print 'uid:', uid results.append({'uid':uid, 'influence':influence, 'fans_num':fans_num, \ 'total_number':total_number, 'friends_num':friends_num,\ 'uname': uname, 'location':location, 'url': url}) #print 'results:', results ''' uid=item['_source']['uid'] #微博数 item['_source']['weibos_sum']=count_weibouser_weibosum(uid,end_time) #影响力 user_index=count_weibouser_index(uid,end_time) if user_max_index >0: item['_source']['influence']=user_index/user_max_index*100 else: item['_source']['influence']=0 if item['_source']['influence']>=INFLUENCE_MIN: results.append(item['_source']) ''' except: results = [] return results