def search_sensitive_weibo(index_name):
    # sensitive weibo user recommend
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "should": [{
                            "range": {
                                "s_retweeted_weibo_number": {
                                    "gt": 0
                                }
                            }
                        }, {
                            "range": {
                                "s_origin_weibo_number": {
                                    "gt": 0
                                }
                            }
                        }]
                    }
                }
            }
        },
        "size": 10000000
    }

    result = es_cluster.search(index=index_name,
                               doc_type="bci",
                               body=query_body)['hits']['hits']
    sensitive_uid = []
    for item in result:
        sensitive_uid.append(item['_source']['uid'])

    return sensitive_uid
def search_rank(index_name, start_point, size, index_type="bci"):
    query_body={
        "query": {
            "match_all": {}
            },
        "sort": [{"user_index": {"order": "desc"}}],
        "from": start_point,
        "size": size

    }

    result = es.search(index=index_name, doc_type=index_type, body=query_body, _source=False)['hits']['hits']

    return result
Example #3
0
def search_sensitive_weibo(index_name):
    query_body={
        "query":{
            "match_all":{}
        },
        "sort":{'s_origin_weibo_comment_top_number':{"order": "desc"}},
        "size":2000
    }

    result = es_cluster.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits']
    sensitive_uid = []
    for item in result:
        sensitive_uid.append(item['_source']['uid'])

    return sensitive_uid
def search_from_es(date):
    index_time = 'bci_' + date.replace('-', '')
    index_type = 'bci'
    print index_time
    query_body = {
        'query':{
            'match_all':{}
            },
        'size':k,
        'sort':[{'user_index':{'order':'desc'}}]
        }
    result = es_bci.search(index=index_time, doc_type=index_type, body=query_body, _source=False, fields=['user_index'])['hits']['hits']
    user_set = []
    user_set = [user_dict['_id'] for user_dict in result]
    return set(user_set), result
def search_top_k(index_name, top_k):
    # top_k recommend
    query_body={
        "query":{
            "match_all":{}
        },
        "size":top_k,
        "sort": [{"user_index": {"order": "desc"}}]
    }

    result = es_cluster.search(index=index_name,doc_type="bci", body=query_body)["hits"]["hits"]
    sensitive_uid = []
    for item in result:
        sensitive_uid.append(item['_source']['uid'])

    return sensitive_uid
Example #6
0
def get_evaluate_max(index_name):
    max_result = {}
    index_type = 'bci'
    evaluate_index = ['user_index']
    for evaluate in evaluate_index:
        query_body = {
        'query':{
            'match_all':{}
            },
            'size':1,
            'sort':[{evaluate: {'order': 'desc'}}]
            }
        try:
            result = es_cluster.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']
        except Exception, e:
            raise e
        max_evaluate = result[0]['_source'][evaluate]
        max_result[evaluate] = max_evaluate
def search_rank(index_name, start_point, size, index_type="bci"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": [{
            "user_index": {
                "order": "desc"
            }
        }],
        "from": start_point,
        "size": size
    }

    result = es.search(index=index_name,
                       doc_type=index_type,
                       body=query_body,
                       _source=False)['hits']['hits']

    return result
def search_top_k(index_name, top_k):
    # top_k recommend
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{
            "user_index": {
                "order": "desc"
            }
        }]
    }

    result = es_cluster.search(index=index_name,
                               doc_type="bci",
                               body=query_body)["hits"]["hits"]
    sensitive_uid = []
    for item in result:
        sensitive_uid.append(item['_source']['uid'])

    return sensitive_uid
def search_from_es(date):
    index_time = 'bci_' + date.replace('-', '')
    index_type = 'bci'
    print index_time
    query_body = {
        'query': {
            'match_all': {}
        },
        'size': k,
        'sort': [{
            'user_index': {
                'order': 'desc'
            }
        }]
    }
    result = es_bci.search(index=index_time,
                           doc_type=index_type,
                           body=query_body,
                           _source=False,
                           fields=['user_index'])['hits']['hits']
    user_set = []
    user_set = [user_dict['_id'] for user_dict in result]
    return set(user_set), result
def search_sensitive_weibo(index_name):
    # sensitive weibo user recommend
    query_body={
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "should":[
                            {"range":{"s_retweeted_weibo_number":{"gt":0}}},
                            {"range":{"s_origin_weibo_number":{"gt":0}}}
                        ]
                    }
                }
            }
        },
        "size":10000000
    }

    result = es_cluster.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits']
    sensitive_uid = []
    for item in result:
        sensitive_uid.append(item['_source']['uid'])

    return sensitive_uid