コード例 #1
0
def count_maxweibouser_influence(end_time):
    date_time = ts2datetimestr(end_time)
    index_name = weibo_bci_index_name_pre + date_time
    print 'max index_name:', index_name
    query_body = {
        'query': {
            'match_all': {}
        },
        'size': 1,
        'sort': {
            'user_index': {
                'order': 'desc'
            }
        }
    }
    try:
        #if S_TYPE == 'test':
        #    temp_index_name='bci_20161121'
        #    max_result=es_user_profile.search(index=temp_index_name,doc_type=weibo_bci_index_type,body=query_body)['hits']['hits']
        #else:
        max_result = es_user_profile.search(index=index_name,
                                            doc_type=weibo_bci_index_type,
                                            body=query_body)['hits']['hits']
        for item in max_result:
            max_user_index = item['_source']['user_index']
    except:
        max_user_index = 1
    return max_user_index
コード例 #2
0
ファイル: utils.py プロジェクト: lvleilei/xnr1
def get_nick_name_unique(nick_name):
    query_body = {
        'query':{
            'term':{'nick_name':nick_name}
        }
    }
    es_profile_results = es_user_profile.search(index=profile_index_name,doc_type=profile_index_type,body=query_body)['hits']['hits']
    es_xnr_results = es.search(index=weibo_xnr_index_name,doc_type=weibo_xnr_index_type,body=query_body)['hits']['hits']

    if es_profile_results and es_xnr_results:
        mark = False
    else:
        mark = True
    return mark
コード例 #3
0
def lookup_active_weibouser(classify_id, weiboxnr_id, start_time, end_time):
    time_gap = end_time - start_time
    now_time = time.time()
    test_time_gap = datetime2ts(
        ts2datetime(now_time)) - datetime2ts(S_DATE_BCI)
    #print 'from, to:', ts2date(start_time), ts2date(end_time)
    today_date_time = end_time - DAY
    if S_TYPE == 'test':
        today_date_time = datetime2ts(S_DATE_BCI)
        start_time = start_time - test_time_gap
        end_time = end_time - test_time_gap

    from_date_ts = datetime2ts(ts2datetime(start_time))
    to_date_ts = datetime2ts(ts2datetime(end_time))
    #print 's_date_bci:', S_DATE_BCI
    #print 'from_date_ts, to_date_ts:', ts2date(from_date_ts), ts2date(to_date_ts)

    bci_index_name = weibo_bci_index_name_pre + ''.join(
        ts2datetime(today_date_time).split('-'))
    print 'bci_index_name:', bci_index_name
    print 'end_time:', ts2date(end_time)

    #step1: users condition
    #make sure the users range by classify choice
    userlist = lookup_weiboxnr_concernedusers(weiboxnr_id)

    if classify_id == 1:  #concrenedusers
        condition_list = [{'bool': {'must': {'terms': {'uid': userlist}}}}]
    elif classify_id == 2:  #unconcrenedusers
        condition_list = [{
            'bool': {
                'must_not': [{
                    'terms': {
                        'uid': userlist
                    }
                }]
            }
        }]
    elif classify_id == 0:
        condition_list = [{'match_all': {}}]
    print userlist, classify_id, condition_list

    #step 2:lookup users
    user_max_index = count_maxweibouser_influence(end_time - DAY)
    results = []
    for item in condition_list:
        query_body = {
            'query': item,
            'size': HOT_WEIBO_NUM,  #查询影响力排名前50的用户即可
            'sort': {
                'user_index': {
                    'order': 'desc'
                }
            }
        }
        try:
            #print 'query_body:', query_body
            flow_text_exist=es_user_portrait.search(index=bci_index_name,\
                    doc_type=weibo_bci_index_type,body=query_body)['hits']['hits']
            search_uid_list = [
                item['_source']['user'] for item in flow_text_exist
            ]
            weibo_user_exist = es_user_profile.search(index=profile_index_name,\
                    doc_type=profile_index_type,body={'query':{'terms':{'uid':search_uid_list}}})['hits']['hits']
            #print 'weibo_user_exist:', weibo_user_exist
            weibo_user_dict = dict()
            for item in weibo_user_exist:
                uid = item['_source']['uid']
                weibo_user_dict[uid] = item['_source']
            for item in flow_text_exist:
                #print 'item:', item['_source']
                influence = item['_source']['user_index'] / user_max_index * 100
                fans_num = item['_source']['user_fansnum']
                friends_num = item['_source']['user_friendsnum']
                total_number = item['_source']['total_number']
                uid = item['_source']['user']
                try:
                    weibo_user_info = weibo_user_dict[uid]
                    uname = weibo_user_info['nick_name']
                    location = weibo_user_info['user_location']
                    url = weibo_user_info['photo_url']
                except:
                    uname = ''
                    location = ''
                    url = ''
                #print 'uid:', uid
                results.append({'uid':uid, 'influence':influence, 'fans_num':fans_num, \
                        'total_number':total_number, 'friends_num':friends_num,\
                        'uname': uname, 'location':location, 'url': url})
                #print 'results:', results
                '''
                uid=item['_source']['uid']
                #微博数
                item['_source']['weibos_sum']=count_weibouser_weibosum(uid,end_time)
                #影响力
                user_index=count_weibouser_index(uid,end_time)
                if user_max_index >0:
                    item['_source']['influence']=user_index/user_max_index*100
                else:
                    item['_source']['influence']=0
                if item['_source']['influence']>=INFLUENCE_MIN:
                    results.append(item['_source'])
                '''
        except:
            results = []

    return results