Example #1
0
def search_specified_group(postname="报"):
    query_body = {
        "query":{
            "bool": {
                "must": [
                    {"wildcard": {
                        "uname": {
                            "wildcard": "*" + postname
                        }
                    }},
                    {"range": {
                        "fansnum": {
                            "gte": 100000
                        }
                    }}
                ]
            }
        },
        "size": 10000
    }

    search_results = es_profile.search(index="user_portrait_1222", doc_type="user", body=query_body)["hits"]["hits"]
    uid_list = []
    for item in search_results:
        uid_list.append(item['_id'])
        print item['_id'], item['_source']['uname'], '\n'
    print "该群体有:", len(uid_list)
    return uid_list
Example #2
0
def nickname2uid(nickname_list):
    uids_list = set()
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'terms': {
                        'nick_name': nickname_list
                    }
                }
            }
        },
        'size': MAX_SEARCH_SIZE
    }

    es_results = es_user_profile.search(index=profile_index_name,doc_type=profile_index_type,\
                    body=query_body)['hits']['hits']
    #print 'es_results:::',es_results
    if es_results:
        for result in es_results:
            result = result['_source']
            uid = result['uid']
            uids_list.add(uid)
    uids_list = list(uids_list)
    #print 'uids_list::',uids_list
    return uids_list
Example #3
0
def get_fansnum_max():
    query_body = {
        'query':{
            'match_all':{}
            },
        'size': 1,
        'sort': [{'fansnum': {'order': 'desc'}}]
        }
    try:
        fansnum_max_results = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body=query_body)['hits']['hits']
    except Exception, e:
        raise e
Example #4
0
def sort_total_number(prefix, uid_list, time, key_search, number):
    if prefix == "weibo_":
        if int(time) == 1:
            order = prefix + 'day_last'
        elif int(time) == 7:
            order = prefix + 'week_sum'
        elif int(time) == 30:
            order = prefix + 'month_sum'
    else:
        order = prefix

    if uid_list:
        query_body = {
            "query": {
                "filtered": {
                    "filter": {
                        "terms": {
                            "uid": uid_list
                        }
                    }
                }
            },
            "sort": {
                order: {
                    "order": "desc"
                }
            },
            "size": number
        }
    else:
        query_body = {
            "query": {
                "match_all": {}
            },
            "sort": {
                order: {
                    "order": "desc"
                }
            },
            "size": number
        }

    search_results = es_user_profile.search(index="bci_history",
                                            doc_type="bci",
                                            body=query_body,
                                            _source=False)['hits']['hits']
    uid_list = []
    if search_results:
        for item in search_results:
            uid_list.append(item['_id'])

    return uid_list
def search_specified_group(postname="律师"):
    query_body = {
        "query": {
            "bool": {
                "must": [{
                    "wildcard": {
                        "nick_name": {
                            "wildcard": "*" + postname
                        }
                    }
                }, {
                    "range": {
                        "fansnum": {
                            "gte": 10000
                        }
                    }
                }]
            }
        },
        "size": 10000
    }

    search_results = es_profile.search(index="weibo_user",
                                       doc_type="user",
                                       body=query_body)["hits"]["hits"]
    uid_list = []
    for item in search_results:
        uid_list.append(item['_id'])
        print item['_id'], item['_source']['nick_name'], '\n'
    print "该群体有:", len(uid_list)

    result = dict()
    result['social_sensors'] = json.dumps(uid_list)
    result['stop_time'] = 1460086441
    result['create_at'] = 1377964800
    result['task_name'] = "top lawers"
    result['remark'] = "粉丝10000以上的律师"
    result["history_status"] = json.dumps([])
    result['burst_reason'] = ''
    result['processing_status'] = "1"
    result["warning_status"] = '0'
    result["finish"] = "0"
    result["create_by"] = 'admin'

    es_profile.index(index="manage_sensing_task",
                     doc_type='task',
                     id='admin-top lawers',
                     body=result)
    print "1"

    return uid_list
Example #6
0
def get_evaluate_max(index_name,index_type,field):
    query_body = {
        'query':{
            'match_all':{}
            },
        'size':1,
        'sort':[{field: {'order': 'desc'}}]
        }
    try:
        result = es_user_profile.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']
        max_evaluate = result[0]['_source'][field]
    except Exception, e:
        raise e
        max_evaluate = 1
def search_specified_group(postname="律师"):
    query_body = {
        "query":{
            "bool": {
                "must": [
                    {"wildcard": {
                        "nick_name": {
                            "wildcard": "*" + postname
                        }
                    }},
                    {"range": {
                        "fansnum": {
                            "gte": 10000
                        }
                    }}
                ]
            }
        },
        "size": 10000
    }

    search_results = es_profile.search(index="weibo_user", doc_type="user", body=query_body)["hits"]["hits"]
    uid_list = []
    for item in search_results:
        uid_list.append(item['_id'])
        print item['_id'], item['_source']['nick_name'], '\n'
    print "该群体有:", len(uid_list)

    result = dict()
    result['social_sensors'] = json.dumps(uid_list)
    result['stop_time'] = 1460086441
    result['create_at'] = 1377964800
    result['task_name'] = "top lawers"
    result['remark'] = "粉丝10000以上的律师"
    result["history_status"] = json.dumps([])
    result['burst_reason'] = ''
    result['processing_status'] = "1"
    result["warning_status"] = '0'
    result["finish"] = "0"
    result["create_by"] = 'admin'

    es_profile.index(index="manage_sensing_task", doc_type='task', id='admin-top lawers', body=result)
    print "1"

    return uid_list
Example #8
0
def sort_total_number(prefix, uid_list, time, key_search, number):
    if prefix == "weibo_":
        if int(time) == 1:
            order = prefix + 'day_last'
        elif int(time) == 7:
            order = prefix + 'week_sum'
        elif int(time) == 30:
            order = prefix + 'month_sum'
    else:
        order = prefix

    if uid_list:
        query_body = {
            "query":{
                "filtered": {
                    "filter": {
                        "terms":{
                            "uid": uid_list
                        }
                    }
                }
            },
            "sort":{order: {"order": "desc"}},
            "size": number
        }
    else:
        query_body = {
            "query":{
                "match_all": {}
            },
            "sort": { order: {"order": "desc"} },
            "size": number
        }


    search_results = es_user_profile.search(index="bci_history", doc_type="bci", body=query_body, _source=False)['hits']['hits']
    uid_list = []
    if search_results:
        for item in search_results:
            uid_list.append(item['_id'])

    return uid_list
Example #9
0
def count_minweibouser_influence(index_name):

    query_body = {
        'query': {
            'match_all': {}
        },
        'size': 1,
        'sort': {
            'user_index': {
                'order': 'asc'
            }
        }
    }
    try:
        max_result = es_user_profile.search(index=index_name,
                                            doc_type=weibo_bci_index_type,
                                            body=query_body)['hits']['hits']
        for item in max_result:
            max_user_index = item['_source']['user_index']
    except:
        max_user_index = 0
    return max_user_index
Example #10
0
def lookup_active_weibouser(today_date_time):
    weibo_active_user_index_name = weibo_active_user_index_name_pre + ts2datetime(
        today_date_time)
    weibo_active_user_mappings(weibo_active_user_index_name)

    bci_index_name = weibo_bci_index_name_pre + ''.join(
        ts2datetime(today_date_time).split('-'))

    # userlist = lookup_weiboxnr_concernedusers(weiboxnr_id)

    user_max_index = count_maxweibouser_influence(bci_index_name)
    user_min_index = count_minweibouser_influence(bci_index_name)

    results = []

    query_body = {
        'query': {
            'match_all': {}
        },
        'size': 100,  #查询影响力排名前50的用户即可
        'sort': {
            'user_index': {
                'order': 'desc'
            }
        }
    }
    try:
        flow_text_exist=es_user_portrait.search(index=bci_index_name,\
                doc_type=weibo_bci_index_type,body=query_body)['hits']['hits']
        search_uid_list = [item['_source']['user'] for item in flow_text_exist]
        print len(search_uid_list)
        weibo_user_exist = es_user_profile.search(index=profile_index_name,\
                doc_type=profile_index_type,body={'query':{'terms':{'uid':search_uid_list}}})['hits']['hits']

        weibo_user_dict = dict()
        #user_dict = dict()
        for item_i in weibo_user_exist:
            uid = item_i['_source']['uid']
            weibo_user_dict[uid] = item_i['_source']
        for item in flow_text_exist:
            user_dict = dict()
            #print 'item:', item['_source']
            user_dict['influence'] = (item['_source']['user_index'] -
                                      user_min_index) / (user_max_index -
                                                         user_min_index)
            user_dict['fans_num'] = item['_source']['user_fansnum']
            user_dict['friends_num'] = item['_source']['user_friendsnum']
            user_dict['total_number'] = item['_source']['total_number']
            user_dict['uid'] = item['_source']['user']
            try:
                uid = user_dict['uid']
                weibo_user_info = weibo_user_dict[uid]
                user_dict['uname'] = weibo_user_info['nick_name']
                user_dict['location'] = weibo_user_info['user_location']
                user_dict['url'] = weibo_user_info['photo_url']
            except:
                user_dict['uname'] = ''
                user_dict['location'] = ''
                user_dict['url'] = ''

            #es_xnr.index(index_name= weibo_active_user_index_name,doc_type= weibo_active_user_index_type,body=user_dict,id=user_dict['uid'])
            results.append(user_dict)

    except:
        results = []
    print len(results)
    return results