def get_retweeted_top(): top_results = [] k = 100000 count = 0 #run_type if RUN_TYPE == 1: now_ts = time.time() else: now_ts = datetime2ts(RUN_TEST_TIME) date = ts2datetime(now_ts - DAY) index_time = ''.join(date.split('-')) index_type = 'bci' query_body = { 'query': { 'match_all': {} }, 'size': k, 'sort': [{ 'origin_weibo_retweeted_top_number': { 'order': 'desc' } }] } try: result = es_cluster.search(index='bci_' + index_time, doc_type=index_type, body=query_body)['hits']['hits'] except: return None for item in result: if count == 100: break uid = item['_id'] try: exist_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid) try: source = exist_result['_source'] count += 1 uname = source['uname'] top_mid = item['_source']['origin_weibo_top_retweeted_id'] top_retweeted_number = item['_source'][ 'origin_weibo_retweeted_top_number'] top_results.append([uid, uname, top_mid, top_retweeted_number]) except: continue except: continue top_results = sorted(top_results, key=lambda top_results: top_results[3], reverse=True) return {'top_retweeted_user': json.dumps(top_results)}
def search_from_es(date): index_time = 'bci_' + ''.join(date.split('-')) index_type = 'bci' query_body = { 'query':{ 'match_all':{} }, 'size':k, 'sort':[{'user_index':{'order':'desc'}}] } try: result = ES_DAILY_RANK.search(index=index_time, doc_type=index_type, body=query_body)['hits']['hits'] except: print 'cron/recommend_in/recommend_in.py&error-1&' return set([]), [] user_set = [] user_set = [user_dict['_id'] for user_dict in result] return set(user_set), result
def get_retweeted_top(): top_results = [] k = 100000 count = 0 now_ts = time.time() date = ts2datetime(now_ts-3600*24) index_time = ''.join(date.split('-')) # test index_time = '20130907' index_type = 'bci' query_body = { 'query':{ 'match_all':{} }, 'size':k, 'sort':[{'origin_weibo_retweeted_top_number':{'order':'desc'}}] } try: result = es_cluster.search(index=index_time, doc_type=index_type, body=query_body)['hits']['hits'] except: return None #print 'result:', len(result) for item in result: if count==100: break uid = item['_id'] try: exist_result = es.get(index='user_portrait', doc_type='user', id=uid) #print 'exist_result:', exist_result try: source = exist_result['_source'] count += 1 #print 'count:', count uname = source['uname'] top_mid = item['_source']['origin_weibo_top_retweeted_id'] top_retweeted_number = item['_source']['origin_weibo_retweeted_top_number'] top_results.append([uid, uname, top_mid, top_retweeted_number]) except: continue except: continue #print 'retweeted top user:'******'top_retweeted_user':json.dumps(top_results)}
def search_from_es(date): # test k = 10000 index_time = ''.join(date.split('-')) print 'index_time:', index_time index_type = 'bci' query_body = { 'query':{ 'match_all':{} }, 'size':k, 'sort':[{'user_index':{'order':'desc'}}] } try: result = ES_DAILY_RANK.search(index=index_time, doc_type=index_type, body=query_body)['hits']['hits'] except: print 'recommentation in: there is not %s es' % index_time return None, None user_set = [] user_set = [user_dict['_id'] for user_dict in result] print 'len user_set:',len(user_set) return set(user_set), result