def all_makeup_info(id , sort_norm , time):
    item = {}
    query = {"query":{"bool":{"must":[{"term":{"user.uid":id}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{},"fields":["uid","nick_name","user_location","fansnum","statusnum"]}
    result = es.search(index=WEBUSER_INDEX_NAME , doc_type=WEBUSER_INDEX_TYPE , body=query)['hits']
    if result['total'] != 0 :
        item['uid'] = result['hits'][0]['fields']['uid'][0]
        item['fans'] = result['hits'][0]['fields']['fansnum'][0]
        item['location'] = result['hits'][0]['fields']['user_location'][0]
        item['uname'] = result['hits'][0]['fields']['nick_name'][0]
        item['weibo_count'] = result['hits'][0]['fields']['statusnum'][0]
    else :
        item['uid'] = None
        item['fans'] = None
        item['location'] = None
        item['uname'] = None
        item['weibo_count'] = None
    
    item['uid'] = id
    query = {"query":{"bool":{"must":[{"term":{"user.uid":id}}],"must_not":[],"should":[]}},"size":10,"sort":[],"facets":{},"fields":[]}
    result = es.search(index=USER_INDEX_NAME , doc_type=USER_INDEX_TYPE , body=query)['hits']
    if result['total'] != 0 :
        item['is_warehousing'] = True
    else :
        item['is_warehousing'] = False
    

    field_bci ,field_sen = get_all_filed(sort_norm , time) 

    item['bci'] = history_info(BCIHIS_INDEX_NAME,BCIHIS_INDEX_TYPE,id,field_bci)
    item['sen'] = history_info(SESHIS_INDEX_NAME,SESHIS_INDEX_TYPE,id,field_sen)
    return item
def in_makeup_info(id , sort_norm , time):
    item = {}
    query = {"query":{"bool":{"must":[{"term":{"user.uid":id}}],"must_not":[],"should":[]}},"size":10,"sort":[],"facets":{},"fields":["uid","uname","location","topic_string","domain","fansnum"]}
    result = es.search(index=USER_INDEX_NAME , doc_type=USER_INDEX_TYPE , body=query)['hits']
    if result['total'] != 0 :
        item['domain'] = result['hits'][0]['fields']['domain'][0]
        item['uid'] = result['hits'][0]['fields']['uid'][0]
        item['topic'] = result['hits'][0]['fields']['topic_string'][0]
        item['location'] = result['hits'][0]['fields']['location'][0]
        item['uname'] = result['hits'][0]['fields']['uname'][0]
        item['fans'] = result['hits'][0]['fields']['fansnum'][0]
    else :
        item['domain'] = None
        item['uid'] = None
        item['topic'] = None
        item['location'] = None
        item['uname'] = None
        item['fans'] = None

    item['uid'] = id
    field_bci , field_sen ,field_imp ,field_act = get_in_filed(sort_norm,time)
    
    item['bci'] = history_info(BCI_INDEX_NAME,BCI_INDEX_TYPE,id,field_bci)
    item['sen'] = history_info(SES_INDEX_NAME,SES_INDEX_TYPE,id,field_sen)
    item['imp'] = history_info(IMP_INDEX_NAME,IMP_INDEX_TYPE,id,field_imp)
    item['act'] = history_info(ACT_INDEX_NAME,ACT_INDEX_TYPE,id,field_act)
    return item
def find_domain():
    #domain = []
    #for item in domains:
    #    domain.append([domains[item]])
    #print domain

    index_name = 'user_portrait_1222'
    task_doc_type = 'user'
    uid = ''
    domain = ''
    uid_domain = []
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"terms":{"domain":["媒体","高校","法律机构及人士","政府机构及人士"]}}
                        ]
                    }
                }
            }
        },
        "size":1000
    }
    search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits']
    print len(search_results)
    for i in range(0,len(search_results)):
        uid = search_results[i]['_source']['uid']
        domain = search_results[i]['_source']['domain']
        uid_domain.append([uid,domain])
    write_json(uid_domain)
def history_info(index_name , index_type , uid , fields):
    
    length = len(fields)
    
    query = {
                "query": {
                    "bool": {
                        "must": [
                            {
                                "term": {
                                    "uid": uid
                                }
                            }
                        ]
                    }
                },
                "fields": fields
            }
    try:
        result = es.search(index = index_name , doc_type = index_type , body = query)
        if result['timed_out'] == False and result['hits']['total'] != 0 :
            item = result['hits']['hits'][0]['fields']
            return item[fields][0]
        else :
            return None
    except Exception , e:
        print "Exception : " + str(e)
        return None
Esempio n. 5
0
def search_user_task(user_name):
    c_result = {}
    query = {"query":{"bool":{"must":[{"term":{"submit_user":str(user_name)}}]}},"size":MAX_ITEMS,"sort":[{"create_time":{"order":"desc"}}],"fields":["status","search_type","keyword","submit_user","sort_scope","sort_norm","start_time","user_ts","end_time","create_time",'number']}#"sort":[{"create_time":{"order":"desc"}}],;;field:"create_time", 'number'
    if 1:
        return_list = []
        result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX , doc_type=USER_RANK_KEYWORD_TASK_TYPE , body=query)['hits']
        c_result['flag'] = True
        for item in result['hits']:
            result_temp = {}
            result_temp['submit_user'] = item['fields']['submit_user'][0]
            result_temp['search_type'] = item['fields']['search_type'][0]
            #jln
            #result_temp['keyword'] = json.loads(item['fields']['keyword'][0])
            result_temp['keyword'] = json.loads(item['fields']['keyword'][0])
            result_temp['sort_scope'] = item['fields']['sort_scope'][0]
            result_temp['sort_norm'] = item['fields']['sort_norm'][0]
            # result_temp['start_time'] = ts2datetime(item['fields']['start_time'][0])
            # result_temp['end_time'] = ts2datetime(item['fields']['end_time'][0])
            result_temp['start_time'] = item['fields']['start_time'][0]
            result_temp['end_time'] = item['fields']['end_time'][0]

            result_temp['status'] = item['fields']['status'][0]
            result_temp['create_time'] = ts2date(item['fields']['create_time'][0])
            result_temp['search_id'] = item['fields']['user_ts'][0]
            tmp = item['fields'].get('number', 0)
            if tmp:
                result_temp['number'] = int(tmp[0])
            else:
                result_temp['number'] = 100
            return_list.append(result_temp)
        c_result['data'] = return_list
        return c_result
def search_user_task(user_name):
    c_result = {}
    query = {"query":{"bool":{"must":[{"term":{"user_rank_task.submit_user":user_name}}],"must_not":[],"should":[]}},"from":0,"size":MAX_ITEMS,"sort":[],"facets":{},"fields":["status","search_type","keyword","submit_user","sort_scope","sort_norm","start_time","user_ts","end_time"]}
    try:
        return_list = []
        result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX , doc_type=USER_RANK_KEYWORD_TASK_TYPE , body=query)['hits']
        c_result['flag'] = True
        for item in result['hits']:
            result_temp = {}
            result_temp['submit_user'] = item['fields']['submit_user'][0]
            result_temp['search_type'] = item['fields']['search_type'][0]
            result_temp['keyword'] = item['fields']['keyword'][0]
            result_temp['sort_scope'] = item['fields']['sort_scope'][0]
            result_temp['sort_norm'] = item['fields']['sort_norm'][0]
            result_temp['start_time'] = item['fields']['start_time'][0]
            result_temp['end_time'] = item['fields']['end_time'][0]
            result_temp['status'] = item['fields']['status'][0]
            result_temp['search_id'] = item['fields']['user_ts'][0]
            return_list.append(result_temp)
        c_result['data'] = return_list
        return c_result
    except Exception , e1 :
        c_result['flag'] = False
        c_result['data'] = e1
        print e1
        return c_result
Esempio n. 7
0
def delOfflineTask(search_id):
    query = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "user_rank_task.user_ts": search_id
                    }
                }],
                "must_not": [],
                "should": []
            }
        },
        "from": 0,
        "size": 10,
        "sort": [],
        "facets": {}
    }
    result = es.searresult = es.search(index=USER_RANK_KEYWORD_TASK_INDEX,
                                       doc_type=USER_RANK_KEYWORD_TASK_TYPE,
                                       body=query)['hits']['hits'][0]
    task_id = result['_id']
    es.delete(index=USER_RANK_KEYWORD_TASK_INDEX,
              doc_type=USER_RANK_KEYWORD_TASK_TYPE,
              id=task_id)
    return True
Esempio n. 8
0
def history_info(index_name, index_type, uid, fields):

    length = len(fields)

    query = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "uid": uid
                    }
                }]
            }
        },
        "fields": fields
    }
    try:
        result = es.search(index=index_name, doc_type=index_type, body=query)
        if result['timed_out'] == False and result['hits']['total'] != 0:
            item = result['hits']['hits'][0]['fields']
            return item[fields][0]
        else:
            return None
    except Exception, e:
        print "Exception : " + str(e)
        return None
Esempio n. 9
0
def getResult(search_id):
    query = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "user_rank_task.user_ts": search_id
                    }
                }],
                "must_not": [],
                "should": []
            }
        },
        "from": 0,
        "size": 10,
        "sort": [],
        "facets": {}
    }
    result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX,
                       doc_type=USER_RANK_KEYWORD_TASK_TYPE,
                       body=query)['hits']
    item = result['hits'][0]
    if item['_source']['status'] == 1:
        result_obj = {}
        result_obj['keyword'] = item['_source']['keyword']
        result_obj['sort_scope'] = item['_source']['sort_scope']
        result_obj['sort_norm'] = item['_source']['sort_norm']
        result_obj['start_time'] = item['_source']['start_time']
        result_obj['end_time'] = item['_source']['end_time']
        result_obj['result'] = json.loads(item['_source']['result'])
        return result_obj
    else:
        return []
Esempio n. 10
0
def attribute_pattern_detect(input_dict):
    results = {}
    task_information_dict = input_dict['task_information']
    task_name = task_information_dict['task_name']
    task_exist_mark = identify_task_exist(task_name)
    if task_exist_mark == False:
        return 'task is not exist'
    query_condition_dict = input_dict['query_condition']
    filter_dict = query_condition_dict['filter']
    attribute_list = query_condition_dict['attribute']
    pattern_list = query_condition_dict['pattern']
    if len(attribute_list) != 0:
        #type1:have attribute condition and filter by pattern
        #step1: search user_portrait by attribute condition and filter condition
        count = MAX_DETECT_COUNT
        for filter_item in filter_dict:
            if filter_item == 'count':
                count = filter_dict[filter_item] * DETECT_COUNT_EXPAND
            else:
                filter_value_from = filter_dict[filter_item]['gte']
                filter_value_to = filter_dict[filter_item]['lt']
                attribute_list.append({'range':{filter_item: {'gte': filter_value_from, 'lt': filter_value_to}}})
        try:
            user_portrait_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type ,\
                    body={'query':{'bool':{'should': attribute_list}}, 'size':count}, _source=False)['hits']['hits']
        except:
            user_portrait_result = []
        #step1.2:change process proportion
        process_mark = change_process_proportion(task_name, 30)
        if process_mark == 'task is not exist':
            return 'task is not exist'
        elif process_mark == False:
            return process_mark
        if len(pattern_list) != 0:
            #step2: filter user by pattern condition
            filter_user_result = attribute_filter_pattern(user_portrait_result, pattern_list)
        else:
            #step2: get user_list from user_portrait_result
            filter_user_result = [item['_id'] for item in user_portrait_result]
        #change process mrak
        process_mark = change_process_proportion(task_name, 60)
        if process_mark == 'task is not exist':
            return 'task is not exist'
        elif process_mark == False:
            return process_mark
    else:
        #type2: no attribute condition, just use pattern condition
        #step1: search pattern list and filter by in-user_portrait and filter_dict
        filter_user_result = pattern_filter_attribute(pattern_list, filter_dict)
        #step2.2: change process proportion
        process_mark = change_process_proportion(task_name, 60)
        if process_mark == 'task is not exist':
            return 'task is not exist'
        elif process_mark == False:
            return process_mark
    
    #step3: filter user list by filter count
    count = filter_dict['count']
    results = filter_user_result[:count]
    return results
def search_low_number(low_range, index_name=index_destination, index_type=index_destination_doctype):
    query_body = {
        "query": {
            "filtered": {
                "query": {
                    "match_all": {}
                 },
                "filter": {
                    "range": {
                        "low_number":{
                            "gt": low_range
                        }
                    }
                }
            }
        },
        "size": 1000
    }

    results = es.search(index=index_name, doc_type=index_type, body=query_body)["hits"]["hits"]

    user_list = []
    if results:
        for item in results:
            user_list.append(item['_id'])

    return user_list
Esempio n. 12
0
def export_random_user():
    import random
    query_body={
        'query':{
            'match_all':{}
        },
        'size':50000
    }
    result=es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits']
    id_list = [user['_id'] for user in result]
    random.shuffle(id_list)
    print type(id_list), len(id_list)
    id_list = id_list[:9000]
    
    print len(id_list)
    final_results = []
    for idx, uid in enumerate(id_list):
        try:
            user_bci = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid)['_source']
            user_profile = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type, id=uid)['_source']
            hb = dict(user_bci.items() + user_profile.items())
            final_results.append(hb)
            print idx, 'over!!'
        except:
            print 'not found', uid
    print 'final len', len(final_results)

    fw = file('random_user.json', 'w')
    fw.write(json.dumps(final_results))
    fw.close()
Esempio n. 13
0
def export_date():
    query_body={
        'query':{
            'match_all':{}
        },
        'size':1000,
        'sort':{'influence':{'order':'desc'}}
    }
    result=es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits']
    id_list = [user['_id'] for user in result]
    print len(id_list) 
    final_results = []
    for idx, uid in enumerate(id_list):
        print idx, 'over!!'
        try:
            user_bci = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid)['_source']
            user_profile = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type, id=uid)['_source']
            hb = dict(user_bci.items() + user_profile.items())
            final_results.append(hb)
        except:
            print 'not found', uid
    print 'final len', len(final_results)
    fw = file('high_influence_user.json', 'w')
    fw.write(json.dumps(final_results))
    fw.close()
Esempio n. 14
0
def filter_top_influence_user(index_name, domain=[], topic=[], size=1000, influence=0):
    query_body = {
        "query": {
            "filtered":{
                "filter": {
                    "bool": {
                        "must": [
                            #{"terms": {"topic_string": topic}},
                            #{"terms": {"domain": domain}},
                            {"range": {
                                "influence": {
                                    "gte": influence
                                }
                            }}
                        ]
                    }
                }
            }
        },
        "sort": {"influence": {"order": "desc"}},
        "size": size
    }

    if domain:
        query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"terms": {"domain": domain}})
    if topic:
        query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"terms": {"topic_string": topic}})

    search_results = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)["hits"]["hits"]
    uid_list = []
    for item in search_results:
        uid_list.append(item['_id'])

    print len(uid_list)
    return uid_list
Esempio n. 15
0
def history_sort( prefix ,index_name , index_type , uid_list , time , ischange = False ,key_search = False, number=100):
    es = es_user_profile # 全网是81的es
    sort_field = prefix
    ts = datetime2ts(ts2datetime(TIME.time()-DAY))
    if time == 1 :
        if ischange:
            sort_field += "day_change"
        else:
            if prefix == "bci_":
                sort_field = "bci_day_last" 
            else:
                sort_field = "sensitive_score_%s" %ts
    elif time == 7:
        if ischange:
            sort_field += "week_change"
        else:
            sort_field += "week_ave"
    else:
        if ischange:
            sort_field += "month_change"
        else:
            if sort_field == "sensitive_":
                sort_field = "senstiive_month_ave"
            else:
                sort_field += "month_ave"

    query = {}
    if key_search:
        query = {
                "query": {
                    "filtered": {
                        "filter": {
                            "terms": {
                                "uid": uid_list
                            }
                        }       
                    }           
                },
                "sort": [{ sort_field : { "order": "desc" } }],
                "size" : number
            }
    else :
        query = {
            "query":{
                "match_all":{}},
            "sort": [{ sort_field : { "order": "desc" } }],
            "size" : number
        }
    result = es.search(index = index_name , doc_type = index_type , body = query, _source=False)['hits']['hits']
    sorted_uid_list = []
    for item in result :
        sorted_uid_list.append(item['_id'].encode("utf-8") )
    #jln
    #none_in_list = set(uid_list) - set(sorted_uid_list)
    # if none_in_list:
    #     sorted_uid_list.extend(list(none_in_list))
    return sorted_uid_list
Esempio n. 16
0
def get_influence_top():
    result = []
    index_name = 'user_portrait'
    index_type = 'user'
    query_body = {'query':{'match_all':{}}, 'sort':[{'influence':{'order':'desc'}}], 'size':100}
    try:
        es_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']
    except Exception, e:
        raise e
Esempio n. 17
0
def search_user_task(user_name):
    c_result = {}
    query = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "submit_user": str(user_name)
                    }
                }]
            }
        },
        "size":
        MAX_ITEMS,
        "sort": [{
            "create_time": {
                "order": "desc"
            }
        }],
        "fields": [
            "status", "search_type", "keyword", "submit_user", "sort_scope",
            "sort_norm", "start_time", "user_ts", "end_time", "create_time",
            'number'
        ]
    }  #"sort":[{"create_time":{"order":"desc"}}],;;field:"create_time", 'number'
    if 1:
        return_list = []
        result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX,
                           doc_type=USER_RANK_KEYWORD_TASK_TYPE,
                           body=query)['hits']
        c_result['flag'] = True
        for item in result['hits']:
            result_temp = {}
            result_temp['submit_user'] = item['fields']['submit_user'][0]
            result_temp['search_type'] = item['fields']['search_type'][0]
            #jln
            #result_temp['keyword'] = json.loads(item['fields']['keyword'][0])
            result_temp['keyword'] = json.loads(item['fields']['keyword'][0])
            result_temp['sort_scope'] = item['fields']['sort_scope'][0]
            result_temp['sort_norm'] = item['fields']['sort_norm'][0]
            # result_temp['start_time'] = ts2datetime(item['fields']['start_time'][0])
            # result_temp['end_time'] = ts2datetime(item['fields']['end_time'][0])
            result_temp['start_time'] = item['fields']['start_time'][0]
            result_temp['end_time'] = item['fields']['end_time'][0]

            result_temp['status'] = item['fields']['status'][0]
            result_temp['create_time'] = ts2date(
                item['fields']['create_time'][0])
            result_temp['search_id'] = item['fields']['user_ts'][0]
            tmp = item['fields'].get('number', 0)
            if tmp:
                result_temp['number'] = int(tmp[0])
            else:
                result_temp['number'] = 100
            return_list.append(result_temp)
        c_result['data'] = return_list
        return c_result
def get_domain_top_user(domain_top):
    result = {}
    domain_user = {}
    #test user list
    """
    test_user_list = [['2803301701', '1639498782', '2656274875', '1402977920', '3114175427'], \
                      ['3575186384', '1316683401', '1894603174', '1641542052', '1068248497'], \
                      ['1729736051', '1396715380', '2377610962', '1828183230', '2718018210'], \
                      ['1250748474', '3270699555', '1417037145', '1193111400', '1403915120'], \
                      ['1671342103', '1255849511', '1647497355', '1989660417', '1189729754'], \
                      ['1182391231', '1670071920', '1689618340', '1494850741', '1708942053'],\
                      ['3400918220', '2685504141', '2056115850', '1768001547', '3317008062'],\
                      ['2001627641', '1773489534', '2458194884', '1822155333', '1799201635'],\
                      ['1709157165', '2074370833', '2167425990', '3204839810', '3690518992'],\
                      ['1664065962', '3299094722', '1942531237', '2799434700', '1784404677'],\
                      ['1218353337', '1761179351', '3482911112', '1220291284', '2504433601'],\
                      ['3682473195', '1627673351', '1779065471', '3316144700', '1896701827']]
    """
    count = 0
    k = 5
    for item in domain_top:
        domain = item[0]
        #test
        #user_list = test_user_list[count]
        result[domain] = []
        query_body = {
            'query': {
                'filtered': {
                    'filter': {
                        'term': {
                            'domain': domain
                        }
                    }
                }
            },
            'size': k,
            'sort': [{
                'influence': {
                    'order': 'desc'
                }
            }]
        }
        profile_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \
                body=query_body, _source=False, fields=['uid', 'uname', 'photo_url'])['hits']['hits']
        for profile in profile_result:
            uid = profile['_id']
            try:
                uname = profile['fields']['uname'][0]
                photo_url = profile['fields']['photo_url'][0]
            except:
                uname = 'unknown'
                photo_url = 'unknown'
            result[domain].append([uid, uname, photo_url])
        count += 1
    return result
Esempio n. 19
0
def history_sort(prefix,
                 index_name,
                 index_type,
                 uid_list,
                 time,
                 ischange=False,
                 key_search=False):
    es = es_user_profile
    sort_field = prefix
    if time == 1:
        sort_field += "day_change"
    elif time == 7:
        if ischange:
            sort_field += "week_change"
        else:
            sort_field += "week_ave"
    else:
        if ischange:
            sort_field += "month_change"
        else:
            sort_field += "month_ave"

    query = {}
    if key_search:
        query = {
            "query": {
                "filtered": {
                    "filter": {
                        "terms": {
                            "uid": uid_list
                        }
                    }
                }
            },
            "sort": {
                sort_field: {
                    "order": "desc"
                }
            },
            "size": MAX_SIZE
        }
    else:
        query = {"sort": {sort_field: {"order": "desc"}}, "size": MAX_SIZE}
    try:
        result = es.search(index=index_name, doc_type=index_type,
                           body=query)['hits']['hits']
        uid_list = []
        for item in result:
            uid_list.append(item['_id'].encode("utf-8"))
        return uid_list
    except Exception, e:
        print e
        raise Exception(index_name + " " + index_type + " " +
                        str(query).replace("\'", "\""))
Esempio n. 20
0
def sort_task(user, keyword, status, start_time, end_time, submit_time):
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term":{"submit_user": user}}
                        ]
                    }
                }
            }
        },
        "size": 10000,
        "sort":{"submit_time":{"order":"desc"}}
    }

    query_list = []
    if keyword:
        keyword_list = keyword.split(',')
        query_list.append({"terms":{"keyword_string":keyword_list}})
    if status != 2:
        query_list.append({"term":{"status": status}})
    if start_time and end_time:
        start_ts = datetime2ts(start_time)
        end_ts = datetime2ts(end_time)
        query_list.append({"range":{"start_time":{"gte":start_ts, "lte":end_ts}}})
        query_list.append({"range":{"end_time":{"gte":start_ts, "lte":end_ts}}})
    if submit_time:
        query_list.append({"term":{"submit_time": submit_time}})

    if query_list:
        query_body["query"]["filtered"]["filter"]["bool"]["must"].extend(query_list)

    #print query_body
    search_results = es.search(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, body=query_body)["hits"]["hits"]
    results = []
    if search_results:
        for item in search_results:
            iter_item = item['_source']
            tmp = []
            tmp.append(iter_item['search_type'])
            tmp.append(json.loads(iter_item['keyword']))
            tmp.append(ts2datetime(iter_item['start_time']))
            tmp.append(ts2datetime(iter_item['end_time']))
            tmp.append(iter_item['range'])
            tmp.append(ts2date(iter_item['create_time']))
            tmp.append(iter_item['status'])
            tmp.append(iter_item['sort_norm'])
            tmp.append(iter_item['sort_scope'])
            tmp.append(item['_id']) # task_name
            results.append(tmp)

    return results
def get_tag_history(admin_user, now_date):
    results = set()
    now_ts = datetime2ts(now_date)
    search_tag_list = []
    query_date_list = []
    for i in range(RECOMMEND_IN_AUTO_DATE, 0, -1):
        iter_date = ts2datetime(now_ts - i * DAY)
        query_date_list.append(iter_date)
    attribute_query_body = {
        'query':{
            'filtered':{
                'filter':{
                    'bool':{
                        'must':[
                            #{'terms': {'date': query_date_list}},
                            {'term': {'user': admin_user}}
                            ]
                        }
                    }
                }
            }
        }
    try:
        attribute_result = es_tag.get(index=attribute_index_name, doc_type=attribute_index_type,\
                body=attribute_query_body)['hits']['hits']
    except:
        attribute_result = []
    tag_query_list = []
    for attribute_item in attribute_result:
        attribute_item_source = attribute_item['_source']
        attribute_name = attribute_item_source['attribute_name']
        attribute_value_string = attribute_item_source['attribute_value']
        item_tag_list = [attribute_name + '-' + attribute_value for attribute_value in attribute_value_string]
        tag_query_list.extend(item_tag_list)
    submit_user_attribute = admin_user + '-tag'
    portrait_query_body = {
        'query':{
            'filtered':{
                'filter':{
                    'terms': {submit_user_attribute: tag_query_list}
                    }
                }
            },
        'size': RECOMMEND_IN_AUTO_SIZE
        }
    try:
        portrait_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\
                body=portrait_query_body, _source=False)['hits']['hits']
    except:
        portrait_result = []
    results = set([item['_id'] for item in portrait_result])

    return results
Esempio n. 22
0
def get_max_index(term):
    query_body = {
        'query':{
            'match_all':{}
            },
        'size':1,
        'sort':[{term: {'order': 'desc'}}]
        }
    try:
        iter_max_value = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \
                body=query_body)['hits']['hits']
    except Exception, e:
        raise e
Esempio n. 23
0
def get_influence_vary_top():
    result = []
    query_body = {
        'query':{
            'match_all':{}
            },
        'size': 10000,
        'sort':[{'vary':{'order': 'desc'}}]
        }
    try:
        es_result =  es.search(index='vary', doc_type='bci', body=query_body)['hits']['hits']
    except Exception, e:
        raise e
def create_task_list():
    # 1. search from manage_sensing_task
    # 2. push to redis list-----task_work

    # print start info
    current_path = os.getcwd()
    file_path = os.path.join(current_path, 'task_list.py')
    if RUN_TYPE == 0:
        now_ts = 1463241600 # 1378008000
    else:
        i = int(sys.argv[1])
        now_ts = 1463241600 + 3600 * i
        #now_ts = date_hour2ts(ts2date_hour(time.time()))
    print_log = "&".join([file_path, "start", ts2date(now_ts)])
    print print_log
    #ts = ts - 3600

    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term":{"finish": "0"}},
                            {"term":{"processing_status": "1"}}
                        ]
                    }
                }
            }
        }
    }

    search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits']

    count = 0
    if search_results:
        for iter_item in search_results:
            _id = iter_item['_id']
            item = iter_item['_source']
            task = []
            task.append(item['task_name']) # task_name
            task.append(item['keywords']) # keywords
            task.append(item['stop_time']) # stop time
            task.append(item['create_by'])
            task.append(now_ts)
            r.lpush('task_name', json.dumps(task))
            count += 1

    print count
    print_log = "&".join([file_path, "end", ts2date(time.time())])
    print print_log
def get_topic_top_user(topic_top):
    result = {}
    topic_user = {}
    #test user list
    """
    test_user_list = [['1499104401', '1265965213', '3270699555', '2073915493', '1686474312'],\
                      ['2803301701', '2105426467', '1665372775', '3716504593', '2892376557'],\
                      ['1457530250', '1698513182', '2793591492', '2218894100', '1737961042'],\
                      ['1656818110', '1660127070', '1890124610', '1182391230', '1243861100'],\
                      ['1680430844', '2998045524', '2202896360', '1639498782', '3494698730'],\
                      ['2587093162', '1677675054', '1871767009', '1193111400', '1672418622'],\
                      ['1730726640', '1752502540', '1868725480', '1262486750', '1235733080'],\
                      ['1250041100', '2275231150', '1268642530', '1658606270', '1857599860'],\
                      ['1929496477', '2167425990', '1164667670', '2417139911', '1708853044'],\
                      ['1993292930', '1645823930', '1890926610', '1641561810', '2023833990'],\
                      ['2005471590', '1233628160', '2074684140', '1396715380', '1236762250'],\
                      ['1423592890', '2612799560', '1926127090', '2684951180', '1760607220']]
    """
    count = 0
    k = 5
    for item in topic_top:
        topic = item[0]
        #test
        #user_list = test_user_list[count]
        result[topic] = []
        query_body = {
            'query': {
                'wildcard': {
                    'topic_string': '*' + topic + '*'
                }
            },
            'size': k,
            'sort': [{
                'influence': {
                    'order': 'desc'
                }
            }]
        }
        profile_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \
                body=query_body, _source=False, fields=['uid', 'uname', 'photo_url'])['hits']['hits']
        for profile in profile_result:
            uid = profile['_id']
            try:
                uname = profile['fields']['uname'][0]
                photo_url = profile['fields']['photo_url'][0]
            except:
                uname = 'unknown'
                photo_url = 'unknown'
            result[topic].append([uid, uname, photo_url])
        count += 1
    return result
Esempio n. 26
0
def search_user_task(user_name):
    c_result = {}
    query = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "user_rank_task.submit_user": user_name
                    }
                }],
                "must_not": [],
                "should": []
            }
        },
        "from":
        0,
        "size":
        MAX_ITEMS,
        "sort": [],
        "facets": {},
        "fields": [
            "status", "search_type", "keyword", "submit_user", "sort_scope",
            "sort_norm", "start_time", "user_ts", "end_time"
        ]
    }
    try:
        return_list = []
        result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX,
                           doc_type=USER_RANK_KEYWORD_TASK_TYPE,
                           body=query)['hits']
        c_result['flag'] = True
        for item in result['hits']:
            result_temp = {}
            result_temp['submit_user'] = item['fields']['submit_user'][0]
            result_temp['search_type'] = item['fields']['search_type'][0]
            result_temp['keyword'] = item['fields']['keyword'][0]
            result_temp['sort_scope'] = item['fields']['sort_scope'][0]
            result_temp['sort_norm'] = item['fields']['sort_norm'][0]
            result_temp['start_time'] = item['fields']['start_time'][0]
            result_temp['end_time'] = item['fields']['end_time'][0]
            result_temp['status'] = item['fields']['status'][0]
            result_temp['search_id'] = item['fields']['user_ts'][0]
            return_list.append(result_temp)
        c_result['data'] = return_list
        return c_result
    except Exception, e1:
        c_result['flag'] = False
        c_result['data'] = e1
        print e1
        return c_result
Esempio n. 27
0
def create_task_list(ts):
    # 1. search from manage_sensing_task
    # 2. push to redis list-----task_work

    # print start info
    current_path = os.getcwd()
    file_path = os.path.join(current_path, 'task_list.py')
    now_ts = str(int(time.time()))
    print_log = "&".join([file_path, "start", now_ts])
    print print_log

    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term":{"finish": "0"}},
                            {"term":{"processing_status": "1"}}
                        ]
                    }
                }
            }
        }
    }

    search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits']

    count = 0
    if search_results:
        for iter_item in search_results:
            item = iter_item['_source']
            task = []
            task.append(item['task_name']) # task_name
            task.append(json.loads(item['social_sensors'])) # social sensors
            task.append(json.loads(item['keywords'])) # filter keywords
            task.append(json.loads(item['sensitive_words'])) #load sensitive_words
            task.append(item['stop_time']) # stop time
            task.append(item['warning_status']) # last step status
            task.append(item['task_type']) # task type
            task.append(ts)
            task.append(item['create_by'])
            r.lpush('task_name', json.dumps(task))
            count += 1

    print count
    now_ts = str(int(time.time()))
    print_log = "&".join([file_path, "end", now_ts])
    print print_log
def getResult(search_id):
    query = {"query":{"bool":{"must":[{"term":{"user_rank_task.user_ts":search_id}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{}}
    result = es.search(index=USER_RANK_KEYWORD_TASK_INDEX , doc_type=USER_RANK_KEYWORD_TASK_TYPE , body=query)['hits']
    item = result['hits'][0]
    if item['_source']['status'] == 1:
        result_obj = {}
        result_obj['keyword'] = item['_source']['keyword']
        result_obj['sort_scope'] = item['_source']['sort_scope']
        result_obj['sort_norm'] = item['_source']['sort_norm']
        result_obj['start_time'] = item['_source']['start_time']
        result_obj['end_time'] =item['_source']['end_time']
        result_obj['result'] = json.loads(item['_source']['result'])
        return result_obj
    else :
        return []    
def modify_evaluate_index(filter_from, filter_to, evaluate_index):
    abnormal_filter_from = 0
    abnormal_filter_to = 0
    #step1: get evaluate_index max value
    query_body = {
        'query':{
            'match_all':{},
            },
        'size': 1,
        'sort': [{evaluate_index: {'order': 'desc'}}]
        }
    try:
        evaluate_index_max = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type ,\
                body=query_body)['hits']['hits']
    except Exception, e:
        raise e
Esempio n. 30
0
def es_get_userlist_by_all(fieldname, uid, key_search=False):
    sort = {fieldname: {"order": "desc"}}
    query = {}
    if key_search:
        query = {
            "query": {
                "filtered": {
                    "filter": {
                        "terms": {
                            "uid": uid
                        }
                    }
                }
            },
            "sort": sort,
            "fields": ["uid"],
            "size": MAX_SIZE
        }
    else:
        print "aa"
        query = {
            "query": {
                "bool": {
                    "must": [],
                    "must_not": [],
                    "should": []
                }
            },
            "sort": sort,
            "facets": {},
            "fields": ["uid"],
            "size": MAX_SIZE
        }

    try:
        print str(query).replace("\'", "\"")
        es = es_user_profile
        result = es.search(index=WEIBO_USER_INDEX_NAME,
                           doc_type=WEIBO_USER_INDEX_TYPE,
                           body=query)['hits']['hits']
        uid_list = []
        for item in result:
            uid_list.append(item['_id'].encode("utf-8"))
        return uid_list
    except Exception, e:
        print e
        raise Exception('user_list failed!')
def es_get_userlist_by_all(fieldname , uid, key_search = False):
    sort = { fieldname : { "order": "desc" } }
    query = {}
    if key_search:
        query = {
            "query": {
                "filtered": {
                    "filter": {
                        "terms": {
                            "uid": uid
                            }
                        }
                    }
                },
                "sort": sort,
                "fields": [ "uid" ],
                "size" : MAX_SIZE
        }    
    else :
        print "aa"
        query = {
            "query": {
                "bool": {
                    "must": [],
                    "must_not": [],
                    "should": []
                }
            },
            "sort": sort , 
            "facets": {},
            "fields": [
                "uid"
            ],
            "size" : MAX_SIZE
        }

    try:
        print str(query).replace("\'","\"")
        es = es_user_profile
        result = es.search(index = WEIBO_USER_INDEX_NAME , doc_type = WEIBO_USER_INDEX_TYPE , body = query)['hits']['hits']
        uid_list = []
        for item in result :
            uid_list.append(item['_id'].encode("utf-8") )
        return uid_list
    except Exception,e:
        print e
        raise  Exception('user_list failed!')  
Esempio n. 32
0
def scan_offline_task():

    query = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "status": 0
                    }
                }]
            }
        },
        "size": 1000
    }
    results = es_user_portrait.search(index=USER_RANK_KEYWORD_TASK_INDEX,
                                      doc_type=USER_RANK_KEYWORD_TASK_TYPE,
                                      body=query)['hits']['hits']
    print USER_RANK_KEYWORD_TASK_INDEX, USER_RANK_KEYWORD_TASK_TYPE
    if results:
        for item in results:
            task_id = item['_id']
            iter_item = item['_source']
            search_type = iter_item['search_type']
            pre = iter_item['pre']
            during = iter_item['during']
            start_time = iter_item['start_time']
            keyword = json.loads(iter_item['keyword'])
            search_key = iter_item['user_ts']
            number = iter_item['number']
            sort_norm = iter_item['sort_norm']
            sort_scope = iter_item['sort_scope']
            time = iter_item['time']
            isall = iter_item['isall']
            print redis_task
            redis_task.lpush(
                "task_user_rank",
                json.dumps([
                    task_id, search_type, pre, during, start_time, keyword,
                    search_key, sort_norm, sort_scope, time, isall, number
                ]))
            iter_item['status'] = -1
            task_id = item['_id']
            #print item
            es_user_portrait.index(index=USER_RANK_KEYWORD_TASK_INDEX,
                                   doc_type=USER_RANK_KEYWORD_TASK_TYPE,
                                   id=task_id,
                                   body=iter_item)
def scan_offlice_task():
    
    query = {"query":{"bool":{"must":[{"term":{"user_rank_task.status":"0"}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{}}
    results = es_9200.search(index = USER_RANK_KEYWORD_TASK_INDEX , doc_type = USER_RANK_KEYWORD_TASK_TYPE,body=query)['hits']
    if results['total'] > 0 :
        for item in results['hits']:
            search_type = item['_source']['search_type']          
            pre = item['_source']['pre']
            during =  item['_source']['during'] 
            start_time =  item['_source']['start_time']  
            keyword = item['_source']['keyword'] 
            search_key = item['_source']['user_ts']
            sort_norm = item['_source']['sort_norm']
            sort_scope = item['_source']['sort_scope']
            time = item['_source']['time']
            isall = item['_source']['isall']
            key_words_search( search_type , pre , during , start_time , keyword , search_key , sort_norm , sort_scope  ,time , isall)
def get_single_user_portrait(seed_user_dict):
    if 'uid' in seed_user_dict:
        uid = seed_user_dict['uid']
        try:
            user_portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid)['_source']
        except:
            user_portrait_result = {}
    else:
        uname = seed_user_dict['uname']
        query = {'term':{'uname': uname}}
        try:
            user_portrait_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type ,\
                    body={'query':{'bool':{'must': quuery}}})['_source']
        except:
            user_portrait_result = {}

    return user_portrait_result
Esempio n. 35
0
def history_sort( prefix ,index_name , index_type , uid_list , time , ischange = False ,key_search = False):
    sort_field = prefix
    if time == 1 :
        sort_field += "day_change"
    elif time == 7:
        if ischange:
            sort_field += "week_change"
        else:
            sort_field += "week_ave"
    else:
        if ischange:
            sort_field += "month_change"
        else:
            sort_field += "month_ave"

    query = {}
    if key_search:
        query = {
                "query": {
                    "filtered": {
                        "filter": {
                            "terms": {
                                "uid": uid_list
                            }
                        }       
                    }           
                },
                "sort": [{ sort_field : { "order": "desc" } }],
                "fields" : [],
                "size" : MAX_SIZE
            }
    else :
        query = {  
            "sort": [{ sort_field : { "order": "desc" } }],
            "fields" : [],
            "size" : MAX_SIZE 
        }
    try:
        result = es.search(index = index_name , doc_type = index_type , body = query)['hits']['hits']
        uid_list = []
        for item in result :
            uid_list.append(item['_id'].encode("utf-8") )
        return uid_list
    except Exception,e:
        print e
        raise  Exception(index_name  + " " + index_type + " " + str(query).replace("\'","\""))    
def find_domain():
    index_name = 'user_portrait_1222'
    task_doc_type = 'user'
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"terms":{"domain": ['媒体','高校','法律']}}
                        ]
                    }
                }
            }
        }
    }
    search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits']
    uid = search_results[0]['_source']['uid']
Esempio n. 37
0
def get_evaluate_max():
    max_result = {}
    index_name = 'user_portrait'
    index_type = 'user'
    evaluate_index = ['activeness', 'importance', 'influence']
    for evaluate in evaluate_index:
        query_body = {
            'query': {
                'match_all':{}
                },
            'size': 1,
            'sort': [{evaluate:{'order': 'desc'}}]
            }
        try:
            result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']
        except Exception, e:
            raise e
        max_evaluate = result[0]['_source'][evaluate]
        max_result[evaluate] = max_evaluate
Esempio n. 38
0
def search_get_portrait():
    query_body = {
    'query':{
        'wildcard':{'keywords_string': '*' + '文革' + '*'}
        },
    'size': 1000
    }
    #try:
    result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\
           body=query_body)['hits']['hits']
    #except:
    #    result = []
    f = open('/home/user_portrait_0320/revised_user_portrait/user_portrait/user_portrait/attribute/uid_list_0520.txt', 'w')
    for item in result:
        source = item['_source']
        uid = source['uid']
        f.write("%s\n" % uid )
        #print 'source:', source
    f.close()
def key_words_search( search_type , pre , during , start_time , keyword , search_key = '' , sort_norm = '', sort_scope = ''  ,time = 1 , isall = False):
    query = {"query":{"bool":{"must":[{"term":{"user_rank_task.user_ts":search_key}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{}}
    result = es_9200.search(index = USER_RANK_KEYWORD_TASK_INDEX , doc_type = USER_RANK_KEYWORD_TASK_TYPE , body = query)['hits']['hits']
    search_id = result[0]['_id']
    item = result[0]['_source']
    item['status'] = -1 # 任务
    item['result'] = json.dumps(results)
    es_9200.index(index = USER_RANK_KEYWORD_TASK_INDEX , doc_type=USER_RANK_KEYWORD_TASK_TYPE , id=search_id,  body=item)

    keywords = keyword.split(",")
    should = []
    for key in keywords:
        if search_type == "hashtag":
            should.append({"prefix":{"text.text": "#" +  key + "#"}})
        else:    
            should.append({"prefix":{"text.text":key}})    
    date = start_time 
    index_name = pre + start_time
    while not es_9206.indices.exists(index= index_name) :
        new_time = datetime2ts(date) + DAY
        date = ts2datetime(new_time)
        index_name = pre + date
        during -= 1

    
    uid_set = set()
    for i in range(during):
        print index_name
        query = {"query":{"bool":{"must":[],"must_not":[],"should":should}},"size":MAX_ITEMS,"sort":[],"facets":{},"fields":['uid']}
        try :
            temp = es_9206.search(index = index_name , doc_type = 'text' , body = query)
            result = temp['hits']['hits']
            print "Fetch " + str(len(result))
            for item in result :
                uid_set.add(item['fields']['uid'][0].encode("utf-8") )
        except Exception,e:
            print e
            raise  Exception('user_list failed!')        
        new_time = datetime2ts(date) + DAY
        date = ts2datetime(new_time)
        index_name = pre + date
        i += 1
def search_low_number(low_range, index_name=copy_user_portrait, index_type="bci"):
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "range": {
                        "low_number":{
                            "gt": low_range
                        }
                    }
                }
            }
        },
        "size": 1000
    }

    results = es.search(index=index_name, doc_type=index_type, body=query_body)["hits"]["hits"]

    user_list = []
    if results:
        for item in results:
            user_list.append(item['_id'])

    return user_list
Esempio n. 41
0
def scan_offline_task():
    
    query = {"query":{"bool":{"must":[{"term":{"status":0}}]}},"size":1000}
    results = es_user_portrait.search(index = USER_RANK_KEYWORD_TASK_INDEX , doc_type = USER_RANK_KEYWORD_TASK_TYPE,body=query)['hits']['hits']
    if results :
        for item in results:
            task_id = item['_id']
            iter_item = item['_source']
            search_type = iter_item['search_type']          
            pre = iter_item['pre']
            during =  iter_item['during'] 
            start_time =  iter_item['start_time']  
            keyword = json.loads(iter_item['keyword'])
            search_key = iter_item['user_ts']
            number = iter_item['number']
            sort_norm = iter_item['sort_norm']
            sort_scope = iter_item['sort_scope']
            time = iter_item['time']
            isall = iter_item['isall']
            redis_task.lpush("task_user_rank", json.dumps([task_id, search_type , pre , during , start_time , keyword , search_key , sort_norm , sort_scope  ,time , isall, number]))
            iter_item['status'] = -1 
            task_id = item['_id']
            #print item
            es_user_portrait.index(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, id=task_id, body=iter_item)
def es_search( pre , scope , arg , index_name , type_name  , time , ischange = False , uid_list = [] ,key_search = False):
    today = TIME.time()
    
    print pre 
    print time
    sort_field = ''
    if time == 1:
        sort_field = pre + 'day_' + 'change'
    elif time == 7 :
        if ischange :
            sort_field = pre + 'week_' + 'change'
        else :
            sort_field = pre + 'week_' + 'ave'
    elif time == 30 :
        if ischange :
            sort_field = pre + 'month_' + 'change'
        else :
            sort_field = pre + 'month_' + 'ave'
    print sort_field
    must = []
    if arg :
        must = [{"prefix": {scope: arg }} ]
    sort = []
    if sort_field:
        sort = [{ sort_field : { "order": "desc" } }]
    print sort   
    if not key_search:
        query = {
            "query": {
                "bool": {
                    "must": must,
                    "must_not": [],
                    "should": []
                }
            },
            "sort": sort , 
            "facets": {},
            "fields": [
                "uid"
            ],
            "size" : MAX_SIZE
        }
    else :
        query = {
            "query": {
                 "filtered": {
                    "filter": {
                        "terms": {
                            "uid": uid_list
                            }
                       }       
                  }           
                },
            "sort": sort,
            "fields" : [],
            "size" : MAX_SIZE
        }        
    try:
        print index_name
        print type_name
        print str(query).replace("\'","\"")
        result = es.search(index = index_name , doc_type = type_name , body = query)['hits']['hits']
        uid_list = []
        for item in result :
            uid_list.append(item['_id'].encode("utf-8") )
        return uid_list
    except Exception,e:
        print e
        raise  Exception(index_name + " " + type_name + " " + str(query).replace("\'","\""))