def search_max_single_field(field, index_name, doctype, top_k=3):

    # field = "origin_weibo_retweeted_top_number", "origin_weibo_comment_top_number"
    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": [{field: {"order": "desc"}}],
        "size": top_k
    }

    
    return_list = []
    rank = 1
    count_c = 0
    start = 0

    while 1:
        search_list = []
        user_list = search_k(es, index_name, doctype, start, field, 100)
        start += 100
        for item in user_list:
            uid = item.get('user','0')
            search_list.append(uid) # uid list

        search_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids": search_list}, _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]

        for i in range(len(search_result)):
            if search_result[i]['found']:
                info = ['','','','','','','1']
                info[0] = rank
                info[2] = search_result[i].get('_id','')

                if profile_result[i]['found']:
                    info[1] = profile_result[i]['_source'].get('photo_url','')
                    info[3] = profile_result[i]['_source'].get('nick_name','')

                if 'retweeted' in field:
                    temp_mid = user_list[i]['origin_weibo_top_retweeted_id']
                    info[5] = weiboinfo2url(info[2], temp_mid)
                    info[4] = user_list[i]['origin_weibo_retweeted_top_number']
                else:
                    temp_mid = user_list[i]['origin_weibo_top_comment_id']
                    info[5] = weiboinfo2url(info[2], temp_mid)
                    info[4] = user_list[i]['origin_weibo_comment_top_number']

                rank += 1
                return_list.append(info)

                if rank >= int(top_k)+1:
                    return return_list
Example #2
0
def search_tag(es, number, active_index, active_type, portrait_index,
               portrait_type, tag):

    #field_dict = {"domain":"art"}
    return_list = []
    count_s = 0
    count_c = 0
    start = 0
    rank = 1

    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start,
                             "user_index", 10000)
        start += 10000
        for item in user_list:
            uid = item.get('user', '0')
            search_list.append(uid)  # uid list

        search_result = es_portrait.mget(index=portrait_index,
                                         doc_type=portrait_type,
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user",
                                         doc_type="user",
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        for item in search_result:
            count_s += 1
            if item['found'] and tag in item['_source']['domain']:
                info = ['', '', '', '', '', '', '']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get(
                        'photo_url', '')
                    info[3] = profile_result[index]['_source'].get(
                        'nick_name', '')
                info[2] = search_result[index].get('_id', '')
                info[4] = user_list[index]['user_index']
                info[5] = search_result[index]['_source'].get('activeness', '')
                info[6] = search_result[index]['_source'].get('importance', '')

                rank += 1
                return_list.append(info)

                if rank >= int(number) + 1:
                    return return_list

        if count_s > 100000:
            return return_list
def search_tag(es, number, active_index, active_type, portrait_index, portrait_type, field_dict):

    #field_dict = {"domain":"art"}
    return_list = []
    count_s = 0
    count_c = 0
    start = 0
    rank = 1

    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start, "user_index", 1000)
        start += 1000
        for item in user_list:
            uid = item.get('user', '0')
            search_list.append(uid) # uid list

        search_result = es_portrait.mget(index=portrait_index, doc_type=portrait_type, body={"ids": search_list}, _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]

        for item in search_result:
            count_s += 1
            if item['found'] and field_dict.values()[0] in item['_source'][field_dict.keys()[0]]:
                info = ['','','','','','','']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get('photo_url','')
                    info[3] = profile_result[index]['_source'].get('nick_name','')
                info[2] = search_result[index].get('_id','')
                info[4] = user_list[index]['user_index']
                info[5] = search_result[index]['_source'].get('activeness','')
                info[6] = search_result[index]['_source'].get('importance','')

                rank += 1
                return_list.append(info)

                if rank >= int(number)+1:
                   return return_list

        if count_s > 10000:
            return return_list
def search_max_single_field(field, index_name, doctype, top_k=3):

    # field = "origin_weibo_retweeted_top_number", "origin_weibo_comment_top_number"
    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": [{
            field: {
                "order": "desc"
            }
        }],
        "size": top_k
    }

    return_list = []
    rank = 1
    count_c = 0
    start = 0

    while 1:
        search_list = []
        user_list = search_k(es, index_name, doctype, start, field, 100)
        start += 100
        for item in user_list:
            uid = item.get('user', '0')
            search_list.append(uid)  # uid list

        search_result = es_portrait.mget(index=portrait_index_name,
                                         doc_type=portrait_index_type,
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        profile_result = es_profile.mget(index=profile_index_name,
                                         doc_type=profile_index_type,
                                         body={"ids": search_list},
                                         _source=True)["docs"]

        for i in range(len(search_result)):
            if search_result[i]['found']:
                info = ['', '', '', '', '', '', '1']
                info[0] = rank
                info[2] = search_result[i].get('_id', '')

                if profile_result[i]['found']:
                    info[1] = profile_result[i]['_source'].get('photo_url', '')
                    info[3] = profile_result[i]['_source'].get('nick_name', '')

                if 'retweeted' in field:
                    temp_mid = user_list[i]['origin_weibo_top_retweeted_id']
                    info[5] = weiboinfo2url(info[2], temp_mid)
                    info[4] = user_list[i]['origin_weibo_retweeted_top_number']
                else:
                    temp_mid = user_list[i]['origin_weibo_top_comment_id']
                    info[5] = weiboinfo2url(info[2], temp_mid)
                    info[4] = user_list[i]['origin_weibo_comment_top_number']

                rank += 1
                return_list.append(info)

                if rank >= int(top_k) + 1:
                    return return_list