Esempio n. 1
0
def get_retweeted_top():
    top_results = []
    k = 100000
    count = 0
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)

    date = ts2datetime(now_ts - DAY)
    index_time = ''.join(date.split('-'))

    index_type = 'bci'
    query_body = {
        'query': {
            'match_all': {}
        },
        'size': k,
        'sort': [{
            'origin_weibo_retweeted_top_number': {
                'order': 'desc'
            }
        }]
    }
    try:
        result = es_cluster.search(index='bci_' + index_time,
                                   doc_type=index_type,
                                   body=query_body)['hits']['hits']
    except:
        return None
    for item in result:
        if count == 100:
            break
        uid = item['_id']
        try:
            exist_result = es_user_portrait.get(index=portrait_index_name,
                                                doc_type=portrait_index_type,
                                                id=uid)
            try:
                source = exist_result['_source']
                count += 1
                uname = source['uname']
                top_mid = item['_source']['origin_weibo_top_retweeted_id']
                top_retweeted_number = item['_source'][
                    'origin_weibo_retweeted_top_number']
                top_results.append([uid, uname, top_mid, top_retweeted_number])
            except:
                continue
        except:
            continue
    top_results = sorted(top_results,
                         key=lambda top_results: top_results[3],
                         reverse=True)

    return {'top_retweeted_user': json.dumps(top_results)}
def search_from_es(date):
    index_time = 'bci_' + ''.join(date.split('-'))
    index_type = 'bci'
    query_body = {
        'query':{
            'match_all':{}
            },
        'size':k,
        'sort':[{'user_index':{'order':'desc'}}]
        }
    try:
        result = ES_DAILY_RANK.search(index=index_time, doc_type=index_type, body=query_body)['hits']['hits']
    except:
        print 'cron/recommend_in/recommend_in.py&error-1&'
        return set([]), []
    user_set = []
    user_set = [user_dict['_id'] for user_dict in result]
    return set(user_set), result
Esempio n. 3
0
def search_from_es(date):
    index_time = 'bci_' + ''.join(date.split('-'))
    index_type = 'bci'
    query_body = {
        'query':{
            'match_all':{}
            },
        'size':k,
        'sort':[{'user_index':{'order':'desc'}}]
        }
    try:
        result = ES_DAILY_RANK.search(index=index_time, doc_type=index_type, body=query_body)['hits']['hits']
    except:
        print 'cron/recommend_in/recommend_in.py&error-1&'
        return set([]), []
    user_set = []
    user_set = [user_dict['_id'] for user_dict in result]
    return set(user_set), result
Esempio n. 4
0
def get_retweeted_top():
    top_results = []
    k = 100000
    count = 0
    now_ts = time.time()
    date = ts2datetime(now_ts-3600*24)
    index_time = ''.join(date.split('-'))
    # test
    index_time = '20130907'
    index_type = 'bci'
    query_body = {
        'query':{
            'match_all':{}
            },
        'size':k,
        'sort':[{'origin_weibo_retweeted_top_number':{'order':'desc'}}]
        }
    try:
        result = es_cluster.search(index=index_time, doc_type=index_type, body=query_body)['hits']['hits']
    except:
        return None
    #print 'result:', len(result)
    for item in result:
        if count==100:
            break
        uid = item['_id']
        try:
            exist_result = es.get(index='user_portrait', doc_type='user', id=uid)
            #print 'exist_result:', exist_result
            try:
                source = exist_result['_source']
                count += 1
                #print 'count:', count
                uname = source['uname']
                top_mid = item['_source']['origin_weibo_top_retweeted_id']
                top_retweeted_number = item['_source']['origin_weibo_retweeted_top_number']
                top_results.append([uid, uname, top_mid, top_retweeted_number])
            except:
                continue
        except:
            continue
    #print 'retweeted top user:'******'top_retweeted_user':json.dumps(top_results)}
def search_from_es(date):
    # test
    k = 10000
    index_time = ''.join(date.split('-'))
    print 'index_time:', index_time
    index_type = 'bci'
    query_body = {
        'query':{
            'match_all':{}
            },
        'size':k,
        'sort':[{'user_index':{'order':'desc'}}]
        }
    try:
        result = ES_DAILY_RANK.search(index=index_time, doc_type=index_type, body=query_body)['hits']['hits']
    except:
        print 'recommentation in: there is not %s es' % index_time
        return None, None
    user_set = []
    user_set = [user_dict['_id'] for user_dict in result]
    print 'len user_set:',len(user_set)
    return set(user_set), result