コード例 #1
0
def query_vary_top_k(index_name, doctype, top_k, sort_index="vary"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{sort_index: {"order": "desc"}}]
    }

    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    uid_list = []
    for item in result:
        uid_list.append(item['_id'])

    portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids":uid_list}, _source=True)['docs']
    profile_result = es_profile.mget(index="weibo_user",doc_type="user", body={"ids":uid_list}, _source=True)['docs']

    return_list = []
    rank = 1
    for i in range(len(result)):
        info = ['','','','','']
        info[0] = rank
        if profile_result[i]['found']:
            info[1] = profile_result[i]['_source'].get('photo_url','')
            info[3] = profile_result[i]['_source'].get('nick_name','')
        info[2] = result[i].get('_id','')
        info[4] = result[i]['_source']['vary']
        if portrait_result[i]['found']:
            info.append('1')
        else:
            info.append('0')
        return_list.append(info)
        rank += 1

    return return_list
コード例 #2
0
def ajax_get_group_detail():
    task_name = request.args.get('task_name','') # task_name
    user = request.args.get('user', '')
    _id = user + '-' + task_name
    portrait_detail = []
    top_activeness = get_top_influence("activeness")
    top_influence = get_top_influence("influence")
    top_importance = get_top_influence("importance")
    search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {})
    if search_result:
        try:
            uid_list = json.loads(search_result['uid_list'])
        except:
            uid_list = search_result['uid_list']
        if uid_list:
            search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, fields=SOCIAL_SENSOR_INFO)['docs']
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["fields"][iter_item][0].split('&'))
                            temp.append(item["fields"][iter_item][0].split('&'))
                        elif iter_item == "activeness":
                            temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100)
                        elif iter_item == "importance":
                            temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100)
                        elif iter_item == "influence":
                            temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100)
                        else:
                            temp.append(item["fields"][iter_item][0])
                    portrait_detail.append(temp)

    return json.dumps(portrait_detail)
コード例 #3
0
ファイル: utils.py プロジェクト: ystone1025/info_consume
def search_history_delete(date):
    return_list = []
    now_date = date

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")
    fields = ['uid','uname','domain','topic_string','influence','importance','activeness']
    temp = r_out.hget("decide_delete_list", now_date)
    if temp:
        history_uid_list = json.loads(r_out.hget("decide_delete_list", now_date))
        if history_uid_list != []:
            detail = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":history_uid_list}, _source=True)['docs']
            for i in range(len(history_uid_list)):
                detail_info = []
                for item in fields:
                    if item == "topic_string":
                        detail_info.append(','.join(detail[i]['_source'][item].split("&")))
                    elif item == "influence":
                        detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_influence)*100))
                    elif item == "importance":
                        detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_importance)*100))
                    elif item == "activeness":
                        detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_activeness)*100))
                    else:
                        detail_info.append(detail[i]['_source'][item])
                return_list.append(detail_info)

    return json.dumps(return_list)
コード例 #4
0
def compare_user_portrait(uid_list):
    user_portrait_result = {}
    index_name = 'user_portrait_1222'
    index_type = 'user'
    user_result = es.mget(index=index_name,
                          doc_type=index_type,
                          body={'ids': uid_list})['docs']
    for item in user_result:
        uid = item['_id']
        user_portrait_result[uid] = {}
        try:
            source = item['_source']
        except:
            next
        try:
            psycho_status = json.loads(source['psycho_status'])
        except:
            pasycho_status = {}
        try:
            psycho_feature = json.loads(source['psycho_feature'])
        except:
            psycho_feature = {}
        try:
            activity_geo_dict = json.loads(source['activity_geo_dict'])
            sort_activity_geo = sorted(activity_geo_dict.items(),
                                       key=lambda x: x[1],
                                       reverse=True)
            activity_geo_list = sort_activity_geo[:2]
            activity_list = []
            for item in activity_geo_list:
                city_list = item[0].split('\t')
                city = city_list[len(city_list) - 1]
                activity_list.append(city)
        except:
            activity_geo = []
        try:
            hashtag_dict = json.loads(source['hashtag_dict'])
        except:
            hashtag_dict = {}
        user_portrait_result[uid] = {
            'uname': source['uname'],
            'gender': source['gender'],
            'location': source['location'],
            'importance': source['importance'],
            'activeness': source['activeness'],
            'influence': source['influence'],
            'fansnum': source['fansnum'],
            'statusnum': source['statusnum'],
            'friendsnum': source['friendsnum'],
            'domain': source['domain'],
            'topic': json.loads(source['topic']),
            'keywords': json.loads(source['keywords']),
            'psycho_status': psycho_status,
            'psycho_feature': psycho_feature,
            'activity_geo': activity_list,
            'hashtag_dict': hashtag_dict
        }

    #print 'user_portrait_result:', user_portrait_result
    return user_portrait_result
コード例 #5
0
def show_out_uid(fields):
    out_list = []
    recommend_dict = r_out.hgetall("recommend_delete_list")
    recommend_keys = recommend_dict.keys()
    for iter_key in recommend_keys:
        out_list.extend(json.loads(r_out.hget("recommend_delete_list",iter_key)))
    if not out_list:
        return out_list # no one is recommended to out

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")
    out_list = list(set(out_list))
    return_list = []
    detail = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":out_list}, _source=True)['docs']
            # extract the return dict with the field '_source'
    filter_uid = all_delete_uid()
    for i in range(len(out_list)):
        if detail[i]['_source']['uid'] in filter_uid:
            continue
        detail_info = []
        for item in fields:
            if item == "topic":
                detail_info.append(','.join(detail[i]['_source']['topic_string'].split("&")))
            elif item == "influence":
                detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_influence)*100))
            elif item == "importance":
                detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_importance)*100))
            elif item == "activeness":
                detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_activeness)*100))
            else:
                detail_info.append(detail[i]['_source'][item])
        return_list.append(detail_info)

    return return_list
コード例 #6
0
ファイル: utils.py プロジェクト: pengyuange/user_portrait
def search_group_sentiment_weibo(task_name, start_ts, sentiment):
    weibo_list = []
    #step1:get task_name uid
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                        id=task_name, _source=False, fields=['uid_list'])
    except:
        group_result = {}
    if group_result == {}:
        return 'task name invalid'
    try:
        uid_list = group_result['fields']['uid_list']
    except:
        uid_list = []
    if uid_list == []:
        return 'task uid list null'
    #step3: get ui2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                        body={'ids':uid_list}, _source=False, fields=['uname'])['docs']
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
    #step4:iter date to search weibo
    weibo_list = []
    iter_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + str(iter_date)
    #step4: get query_body
    if sentiment != '2':
        query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \
                {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}]
    else:
        query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\
                {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}]
    try:
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits']
    except:
        flow_text_result = []
    for flow_text_item in flow_text_result:
        source = flow_text_item['_source']
        weibo = {}
        weibo['uid'] = source['uid']
        weibo['uname'] = uid2uname[weibo['uid']]
        weibo['ip'] = source['ip']
        try:
            weibo['geo'] = '\t'.join(source['geo'].split('&'))
        except:
            weibo['geo'] = ''
        weibo['text'] = source['text']
        weibo['timestamp'] = source['timestamp']
        weibo['sentiment'] = source['sentiment']
        weibo_list.append(weibo)

    return weibo_list
コード例 #7
0
def get_vary_detail_info(vary_detail_dict, uid_list):
    results = {}
    #get uname
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                            body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    uname_dict = {}
    for portrait_item in user_portrait_result:
        uid = portrait_item['_id']
        if portrait_item['found']==True:
            uname = portrait_item['_source']['uname']
            uname_dict[uid] = uname
        else:
            uname_dict[uid] = uid

    #get new vary detail information
    for vary_pattern in vary_detail_dict:
        user_info_list = vary_detail_dict[vary_pattern]
        new_pattern_list = []
        for user_item in user_info_list:
            uid = user_item[0]
            uname= uname_dict[uid]
            start_date = ts2datetime(int(user_item[1]))
            end_date = ts2datetime(int(user_item[2]))
            new_pattern_list.append([uid, uname, start_date, end_date])
        results[vary_pattern] = new_pattern_list

    return results
コード例 #8
0
ファイル: utils.py プロジェクト: taozhiiq/user_portrait
def search_history_delete(date):
    return_list = []
    if not date:
        now_date = time.strftime('%Y%m%d',time.localtime(time.time()))
    elif date:
        now_date = date
    else:
        pass

    fields = ['uid','uname','domain','topic_string','influence','importance','activeness']
    temp = r_out.hget("decide_delete_list", now_date)
    if temp:
        history_uid_list = json.loads(r_out.hget("decide_delete_list", now_date))
        if history_uid_list != []:
            detail = es.mget(index="user_portrait", doc_type="user", body={"ids":history_uid_list}, _source=True)['docs']
            for i in range(len(history_uid_list)):
                detail_info = []
                for item in fields:
                    if item == "topic_string":
                        detail_info.append(','.join(detail[i]['_source'][item].split("&")))
                    else:
                        detail_info.append(detail[i]['_source'][item])
                return_list.append(detail_info)

    return json.dumps(return_list)
コード例 #9
0
def get_group_list(task_name, submit_user):
    results = []
    task_id = submit_user + '-' + task_name
    try:
        es_results = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source']
    except:
        return results
    uid_list = es_results['uid_list']
    user_portrait_attribute = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list})['docs']
    evaluate_max = get_evaluate_max()
    for item in user_portrait_attribute:
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['uname']
            gender = source['gender']
            location = source['location']
            importance = source['importance']
            normal_importance = math.log(importance / evaluate_max['importance'] * 9 + 1, 10) * 100
            influence = source['influence']
            normal_influence = math.log(influence / evaluate_max['influence'] * 9 + 1, 10) * 100
            results.append([uid, uname, gender, location, normal_importance, normal_influence])
        except:
            results.append([uid, '', '', '', '', ''])
    return results
コード例 #10
0
def get_group_member_name(task_name, submit_user):
    results = []
    task_id = submit_user + '-' + task_name
    #print es_group_result,group_index_name,group_index_type
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return results
    uid_list = group_result['uid_list']
    print len(uid_list)
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                body={'ids':uid_list})['docs']
    except:
        return results
    print len(user_portrait_result)
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            source = item['_source']
            uname = source['uname']
        else:
            uname = 'unknown'
        #results[uid] = uname
        dic = {}
        dic['ID'] = uid
        dic['name'] = uname
        results.append(dic)

    return results
コード例 #11
0
ファイル: utils.py プロジェクト: taozhiiq/user_portrait
def show_out_uid(fields):
    out_list = []
    recommend_dict = r_out.hgetall("recommend_delete_list")
    recommend_keys = recommend_dict.keys()
    for iter_key in recommend_keys:
        out_list.extend(json.loads(r_out.hget("recommend_delete_list",iter_key)))
    if not out_list:
        return out_list # no one is recommended to out

    return_list = []
    detail = es.mget(index="user_portrait", doc_type="user", body={"ids":out_list}, _source=True)['docs']
            # extract the return dict with the field '_source'
    filter_uid = all_delete_uid()
    for i in range(len(out_list)):
        if detail[i]['_source']['uid'] in filter_uid:
            continue
        detail_info = []
        for item in fields:
            if item == "topic":
                detail_info.append(','.join(detail[i]['_source']['topic_string'].split("&")))
            else:
                detail_info.append(detail[i]['_source'][item])
        return_list.append(detail_info)

    return return_list
コード例 #12
0
def search_history_delete(date):
    return_list = []
    now_date = date

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")
    fields = ['uid','uname','domain','topic_string','influence','importance','activeness']
    temp = r_out.hget("decide_delete_list", now_date)
    if temp:
        history_uid_list = json.loads(r_out.hget("decide_delete_list", now_date))
        if history_uid_list != []:
            detail = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":history_uid_list}, _source=True)['docs']
            for i in range(len(history_uid_list)):
                detail_info = []
                for item in fields:
                    if item == "topic_string":
                        detail_info.append(','.join(detail[i]['_source'][item].split("&")))
                    elif item == "influence":
                        detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_influence)*100))
                    elif item == "importance":
                        detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_importance)*100))
                    elif item == "activeness":
                        detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_activeness)*100))
                    else:
                        detail_info.append(detail[i]['_source'][item])
                return_list.append(detail_info)

    return json.dumps(return_list)
コード例 #13
0
def submit_identify_in_uname(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    upload_data = input_data['upload_data']
    # get uname list from upload data
    uname_list = upload_data.split('\n')
    uid_list = []
    #step1: get uid list from uname
    profile_exist_result = es_user_profile.search(
        index=profile_index_name,
        doc_type=profile_index_type,
        body={'query': {
            'terms': {
                'nick_name': uname_list
            }
        }},
        _source=False)['hits']['hits']
    for profile_item in profile_exist_result:
        uid = profile_item['_id']
        uid_list.append(uid)
    if not uid_list:
        return 'uname list valid'
    #step2: filter user not in user_portrait and compute
    #step2.1: identify in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name,
                                                  doc_type=portrait_index_type,
                                                  body={'ids':
                                                        uid_list})['docs']
    new_uid_list = [
        exist_item['_id'] for exist_item in exist_portrait_result
        if exist_item['found'] == False
    ]
    if not new_uid_list:
        return 'uname list all in'
    #step2.2: identify in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_list = list(new_uid_set - compute_set)
    if not in_uid_list:
        return 'uname list all in'
    #step3: save submit
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(
        r.hkeys(hashname_sensitive))
    for in_item in in_uid_list:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system': '0', 'operation': submit_user}
        r.hset(hashname_submit, in_item, json.dumps(tmp))
        r.hset(submit_user_recomment, in_item, '0')
    return True
コード例 #14
0
def get_vary_detail_info(vary_detail_dict, uid_list):
    results = {}
    #get uname
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                            body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    uname_dict = {}
    for portrait_item in user_portrait_result:
        uid = portrait_item['_id']
        if portrait_item['found'] == True:
            uname = portrait_item['_source']['uname']
            uname_dict[uid] = uname
        else:
            uname_dict[uid] = uid

    #get new vary detail information
    for vary_pattern in vary_detail_dict:
        user_info_list = vary_detail_dict[vary_pattern]
        new_pattern_list = []
        for user_item in user_info_list:
            uid = user_item[0]
            uname = uname_dict[uid]
            start_date = ts2datetime(int(user_item[1]))
            end_date = ts2datetime(int(user_item[2]))
            new_pattern_list.append([uid, uname, start_date, end_date])
        results[vary_pattern] = new_pattern_list

    return results
コード例 #15
0
ファイル: utils.py プロジェクト: taozhiiq/user_portrait
def get_group_list(task_name):
    results = []
    try:
        es_results = es.get(index=index_name, doc_type=index_type, id=task_name)['_source']
    except:
        return results
    #print 'es_result:', es_results['uid_list'], type(es_results['uid_list'])
    uid_list = es_results['uid_list']
    user_portrait_attribute = es.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs']
    evaluate_max = get_evaluate_max()
    for item in user_portrait_attribute:
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['uname']
            gender = source['gender']
            location = source['location']
            importance = source['importance']
            normal_importance = math.log(importance / evaluate_max['importance'] * 9 + 1, 10) * 100
            influence = source['influence']
            normal_influence = math.log(influence / evaluate_max['influence'] * 9 + 1, 10) * 100
            results.append([uid, uname, gender, location, normal_importance, normal_influence])
        except:
            results.append([uid])
    return results
コード例 #16
0
ファイル: utils.py プロジェクト: huxiaoqian/user_portrait
def search_group_sentiment_weibo(task_name, start_ts, sentiment):
    weibo_list = []
    #step1:get task_name uid
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                        id=task_name, _source=False, fields=['uid_list'])
    except:
        group_result = {}
    if group_result == {}:
        return 'task name invalid'
    try:
        uid_list = group_result['fields']['uid_list']
    except:
        uid_list = []
    if uid_list == []:
        return 'task uid list null'
    #step3: get ui2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                        body={'ids':uid_list}, _source=False, fields=['uname'])['docs']
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found']==True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
    #step4:iter date to search weibo
    weibo_list = []
    iter_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + str(iter_date)
    #step4: get query_body
    if sentiment != '2':
        query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \
                {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}]
    else:
        query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\
                {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}]
    try:
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits']
    except:
        flow_text_result = []
    for flow_text_item in flow_text_result:
        source = flow_text_item['_source']
        weibo = {}
        weibo['uid'] = source['uid']
        weibo['uname'] = uid2uname[weibo['uid']]
        weibo['ip'] = source['ip']
        try:
            weibo['geo'] = '\t'.join(source['geo'].split('&'))
        except:
            weibo['geo'] = ''
        weibo['text'] = source['text']
        weibo['timestamp'] = source['timestamp']
        weibo['sentiment'] = source['sentiment']
        weibo_list.append(weibo)

    return weibo_list
コード例 #17
0
def ajax_get_group_detail():
    task_name = request.args.get('task_name','') # task_name
    user = request.args.get('user', '')
    _id = user + '-' + task_name
    portrait_detail = []
    top_activeness = get_top_influence("activeness")
    top_influence = get_top_influence("influence")
    top_importance = get_top_influence("importance")
    search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {})
    if search_result:
        try:
            uid_list = json.loads(search_result['uid_list'])
        except:
            uid_list = search_result['uid_list']
        if uid_list:
            search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, fields=SOCIAL_SENSOR_INFO)['docs']
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["fields"][iter_item][0].split('&'))
                            temp.append(item["fields"][iter_item][0].split('&'))
                        elif iter_item == "activeness":
                            temp.append(math.ceil(item["fields"][iter_item][0]/float(top_activeness)*100))
                        elif iter_item == "importance":
                            temp.append(math.ceil(item["fields"][iter_item][0]/float(top_importance)*100))
                        elif iter_item == "influence":
                            temp.append(math.ceil(item["fields"][iter_item][0]/float(top_influence)*100))
                        else:
                            temp.append(item["fields"][iter_item][0])
                    portrait_detail.append(temp)

    return json.dumps(portrait_detail)
コード例 #18
0
def get_group_list(task_name, submit_user):
    results = []
    task_id = submit_user + '-' + task_name
    try:
        es_results = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source']
    except:
        return results
    uid_list = es_results['uid_list']
    user_portrait_attribute = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list})['docs']
    evaluate_max = get_evaluate_max()
    for item in user_portrait_attribute:
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['uname']
            gender = source['gender']
            location = source['location']
            importance = source['importance']
            normal_importance = math.log(importance / evaluate_max['importance'] * 9 + 1, 10) * 100
            influence = source['influence']
            normal_influence = math.log(influence / evaluate_max['influence'] * 9 + 1, 10) * 100
            activeness = source['activeness']
            normal_activeness = math.log(activeness / evaluate_max['activeness']* 9 + 1, 10) * 100
            sensitive = source['sensitive']
            normal_sensitive = math.log(sensitive/ evaluate_max['sensitive'] * 9 + 1, 10) * 100
            results.append([uid, uname, gender, location, normal_importance, normal_influence, normal_activeness, normal_sensitive])
        except:
            results.append([uid, '', '', '', '', '', '', ''])
    return results
コード例 #19
0
def compare_user_portrait_new(uid_list):
    try:
        user_portrait_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    if user_portrait_result == []:
        return 'uid_list not exist'
    #get max evaluate:
    max_result = get_evaluate_max()
    user_result = {}
    #get user psycho status from flow_text
    user_psycho_status_result = get_psycho_status(uid_list)
    #iter to get user attr
    for item in user_portrait_result:
        if item['found'] != True:
            return 'uid_list not exist'
        uid = item['_id']
        user_result[uid] = {}
        source = item['_source']
        #attr: uname
        user_result[uid]['uname'] = source['uname']
        #attr: location
        user_result[uid]['location'] = source['location']
        #attr: evaluate index
        importance = source['importance']
        normal_importance = math.log(importance/ max_result['importance'] * 9 + 1, 10)
        user_result[uid]['importance'] = int(normal_importance * 100)
        influence = source['influence']
        normal_influence = math.log(influence / max_result['influence'] * 9 + 1, 10)
        user_result[uid]['influence'] = int(normal_influence * 100)
        activeness = source['activeness']
        normal_activeness = math.log(activeness / max_result['activeness'] * 9 + 1, 10)
        user_result[uid]['activeness'] = int(normal_activeness * 100)
        #attr: domain
        user_result[uid]['domain'] = source['domain']
        #attr: topic
        topic_string = source['topic_string']
        user_result[uid]['topic'] = topic_string.split('&')
        #attr: activity geo dict
        activity_geo_dict_list = json.loads(source['activity_geo_dict'])
        week_activity_geo_list = activity_geo_dict_list[-7:]
        week_geo_result = {}
        for day_geo_dict in week_activity_geo_list:
            for geo_item in day_geo_dict:
                try:
                    week_geo_result[geo_item] += 1
                except:
                    week_geo_result[geo_item] = 1
        sort_week_geo_result = sorted(week_geo_result.items(), key=lambda x:x[1], reverse=True)
        user_result[uid]['activity_geo'] = [geo_item[0] for geo_item in sort_week_geo_result[:2]]
        #attr: keywords
        user_result[uid]['keywords'] = json.loads(source['keywords'])
        #attr: hashtag
        user_result[uid]['hashtag'] = json.loads(source['hashtag_dict'])
        #attr: psycho status
        user_result[uid]['psycho_status'] = user_psycho_status_result[uid]
        
    return user_result
コード例 #20
0
ファイル: utils.py プロジェクト: huxiaoqian/user_portrait
def compare_user_portrait_new(uid_list):
    try:
        user_portrait_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    if user_portrait_result == []:
        return 'uid_list not exist'
    #get max evaluate:
    max_result = get_evaluate_max()
    user_result = {}
    #get user psycho status from flow_text
    user_psycho_status_result = get_psycho_status(uid_list)
    #iter to get user attr
    for item in user_portrait_result:
        if item['found'] != True:
            return 'uid_list not exist'
        uid = item['_id']
        user_result[uid] = {}
        source = item['_source']
        #attr: uname
        user_result[uid]['uname'] = source['uname']
        #attr: location
        user_result[uid]['location'] = source['location']
        #attr: evaluate index
        importance = source['importance']
        normal_importance = math.log(importance/ max_result['importance'] * 9 + 1, 10)
        user_result[uid]['importance'] = int(normal_importance * 100)
        influence = source['influence']
        normal_influence = math.log(influence / max_result['influence'] * 9 + 1, 10)
        user_result[uid]['influence'] = int(normal_influence * 100)
        activeness = source['activeness']
        normal_activeness = math.log(activeness / max_result['activeness'] * 9 + 1, 10)
        user_result[uid]['activeness'] = int(normal_activeness * 100)
        #attr: domain
        user_result[uid]['domain'] = source['domain']
        #attr: topic
        topic_string = source['topic_string']
        user_result[uid]['topic'] = topic_string.split('&')
        #attr: activity geo dict
        activity_geo_dict_list = json.loads(source['activity_geo_dict'])
        week_activity_geo_list = activity_geo_dict_list[-7:]
        week_geo_result = {}
        for day_geo_dict in week_activity_geo_list:
            for geo_item in day_geo_dict:
                try:
                    week_geo_result[geo_item] += 1
                except:
                    week_geo_result[geo_item] = 1
        sort_week_geo_result = sorted(week_geo_result.items(), key=lambda x:x[1], reverse=True)
        user_result[uid]['activity_geo'] = [geo_item[0] for geo_item in sort_week_geo_result[:2]]
        #attr: keywords
        user_result[uid]['keywords'] = json.loads(source['keywords'])
        #attr: hashtag
        user_result[uid]['hashtag'] = json.loads(source['hashtag_dict'])
        #attr: psycho status
        user_result[uid]['psycho_status'] = user_psycho_status_result[uid]
        
    return user_result
コード例 #21
0
def search_portrait_user(es,
                         number,
                         active_index,
                         active_type,
                         portrait_index,
                         portrait_type,
                         field="user_index"):

    return_list = []
    index_exist = es.indices.exists(index=active_index)
    if not index_exist:
        return "no active_index exist"
        sys.exit(0)

    count_s = 0
    count_c = 0
    start = 0
    rank = 1
    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start, field, 100)
        start += 100
        for item in user_list:
            if field == "vary":
                uid = item.get('uid',
                               '0')  # obtain uid, notice "uid" or "user"
            else:
                uid = item.get('user', '0')
            search_list.append(uid)  # uid list

        search_result = es_portrait.mget(index=portrait_index,
                                         doc_type=portrait_type,
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user",
                                         doc_type="user",
                                         body={"ids": search_list},
                                         _source=True)["docs"]

        for item in search_result:
            if item["found"]:
                info = ['', '', '', '', '', '']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get(
                        'photo_url', '')
                    info[3] = profile_result[index]['_source'].get(
                        'nick_name', '')
                info[2] = search_result[index].get('_id', '')
                info[4] = user_list[index][field]
                info[5] = "1"
                return_list.append(info)
                rank += 1
                count_c += 1

                if count_c >= int(number):
                    return return_list
コード例 #22
0
def search_portrait_user_in_activity(es, number, active_index, active_type, portrait_index, portrait_type, field="user_index"):

    return_list = []
    index_exist = es.indices.exists(index=active_index)
    if not index_exist:
        return "no active_index exist"
        sys.exit(0)

    count_s = 0
    count_c = 0
    start = 0
    rank = 1
    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start, field, 100)
        start += 100
        for item in user_list:
            if field == "vary":
                uid = item.get('uid', '0') # obtain uid, notice "uid" or "user"
            else:
                uid = item.get('user', '0')
            search_list.append(uid) # uid list

        search_result = es_portrait.mget(index=portrait_index, doc_type=portrait_type, body={"ids": search_list}, _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]

        key_list = ["origin_weibo_retweeted_total_number", "origin_weibo_retweeted_average_number", "origin_weibo_retweeted_top_number", "origin_weibo_retweeted_brust_average", \
                   "origin_weibo_comment_total_number", "origin_weibo_comment_average_number", "origin_weibo_comment_top_number", "origin_weibo_retweeted_brust_average", \
                   "retweeted_weibo_retweeted_total_number", "retweeted_weibo_retweeted_average_number", "retweeted_weibo_retweeted_top_number", "retweeted_weibo_retweeted_brust_average", \
                   "retweeted_weibo_comment_total_number", "retweeted_weibo_comment_average_number", "retweeted_weibo_comment_top_number", "retweeted_weibo_retweeted_brust_average"]
        for item in search_result:
            if item["found"]:
                info = ['','','','','','']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get('photo_url','')
                    info[3] = profile_result[index]['_source'].get('nick_name','')
                info[2] = search_result[index].get('_id','')
                info[4] = user_list[index]['user_index']
                info[5] = "1"
                if field == 'origin_weibo_retweeted_brust_average':
                    info.append(user_list[index]['origin_weibo_retweeted_brust_average'])
                    for key in key_list:
                        info.append(user_list[index][key])
                elif field == 'origin_weibo_comment_brust_average':
                    info.append(user_list[index]['origin_weibo_comment_brust_average'])
                    for key in key_list:
                        info.append(user_list[index][key])
                else:
                    pass
                return_list.append(info)
                rank += 1
                count_c += 1

                if count_c >= int(number):
                    return return_list
コード例 #23
0
def compare_user_portrait(uid_list):
    user_portrait_result = {}
    index_name = 'user_portrait'
    index_type = 'user'
    user_result = es.mget(index=index_name, doc_type=index_type, body={'ids':uid_list})['docs']
    #user_portrait_result = [item['_source'] for item in user_result]
    #print 'user_result:', user_portrait_result
    for item in user_result:
        uid = item['_id']
        user_portrait_result[uid] = {}
        try:
            source = item['_source']
        except:
            next
        try:
            psycho_status = json.loads(source['psycho_status'])
        except:
            pasycho_status = {}
        try:
            psycho_feature = json.loads(source['psycho_feature'])
        except:
            psycho_feature = {}
        try:
            activity_geo_dict = json.loads(source['activity_geo_dict'])
            sort_activity_geo = sorted(activity_geo_dict.items(), key=lambda x:x[1], reverse=True)
            activity_geo_list = sort_activity_geo[:2]
            activity_list = []
            for item in activity_geo_list:
                city_list = item[0].split('\t')
                city = city_list[len(city_list)-1]
                activity_list.append(city)
        except:
            activity_geo = []
        try:
            hashtag_dict = json.loads(source['hashtag_dict'])
        except:
            hashtag_dict = {}
        user_portrait_result[uid] = {
                'uname': source['uname'],
                'gender': source['gender'],
                'location': source['location'],
                'importance': source['importance'],
                'activeness': source['activeness'],
                'influence': source['influence'],
                'fansnum':source['fansnum'],
                'statusnum':source['statusnum'],
                'friendsnum': source['friendsnum'],
                'domain': source['domain'],
                'topic': json.loads(source['topic']),
                'keywords': json.loads(source['keywords']),
                'psycho_status': psycho_status,
                'psycho_feature': psycho_feature,
                'activity_geo': activity_list,
                'hashtag_dict': hashtag_dict
                }

    #print 'user_portrait_result:', user_portrait_result
    return user_portrait_result
コード例 #24
0
def submit_identify_in_uname(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    operation_type = input_data['operation_type']
    upload_data = input_data['upload_data']
    # get uname list from upload data
    uname_list_pre = upload_data.split('\n')
    uname_list = [item.split('\r')[0] for item in uname_list_pre]
    uid_list = []
    have_in_user_list = []
    invalid_user_list = []
    valid_uname_list = []
    #step1: get uid list from uname
    profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False, fields=['nick_name'])['hits']['hits']
    for profile_item in profile_exist_result:
        uid = profile_item['_id']
        uid_list.append(uid)
        uname = profile_item['fields']['nick_name'][0]
        valid_uname_list.append(uname)
    invalid_user_list = list(set(uname_list) - set(valid_uname_list))
    if len(invalid_user_list) != 0:
        return False, 'invalid user info', invalid_user_list
    #step2: filter user not in user_portrait and compute
    #step2.1: identify in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs']
    new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False]
    have_in_user_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True]
    if not new_uid_list:
        return False, 'all user in'
    #step2.2: identify in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_list = list(new_uid_set - compute_set)
    if not in_uid_list:
        return False, 'all user in'
    #step3: save submit
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    #identify final submit user list
    final_submit_user_list = []
    for in_item in in_uid_list:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system':'0', 'operation': submit_user}
        if operation_type == 'submit':
            r.hset(hashname_submit, in_item, json.dumps(tmp))
            r.hset(submit_user_recomment, in_item, '0')
        final_submit_user_list.append(in_item)
    return True, invalid_user_list, have_in_user_list, final_submit_user_list
コード例 #25
0
def show_keywords_rank(task_id, sort_type, count):
    try:
        task_found = es_network_task.get(index=network_keywords_index_name, \
                doc_type=network_keywords_index_type, id=task_id)['_source']
    except:
        task_found = {}
        return task_found
    
    search_results = json.loads(task_found['results'])
    sort_results = search_results[sort_type]
    results = []
    uid_list = []
    sort_list = []
    for source_uid, sort_value in sort_results:
        uid_list.append(source_uid)
        sort_list.append(sort_value)
    
    # 查看背景信息
    if uid_list:
        profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"]
        for item in profile_result:
            _id = item['_id']
            index = profile_result.index(item)
            tmp = []
            if item['found']:
                item = item['_source']
                tmp.append(item['uid'])
                tmp.append(item['nick_name'])
                tmp.append(item['user_location'])
            else:
                tmp.extend([_id,'',''])
            value = sort_list[index]
            tmp.append(value)
            results.append(tmp)
    
    if uid_list:
        count = 0
        history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list})["docs"]
        for item in history_result:
            if item['found']:
                item = item['_source']
                results[count].extend([item['user_fansnum'], item['weibo_month_sum']])
            else:
                results[count].extend(['',''])
            count += 1
    
    if uid_list:
        count = 0
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"]
        for item in portrait_result:
            if item['found']:
                results[count].append("1")
            else:
                results[count].append("0")
            count += 1

    return results
コード例 #26
0
def search_top_index(index_name, top_k=1, index_type="bci", top=False, sort_order="user_index"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{sort_order: {"order": "desc"}}]
    }

    if top:
        result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'][0]['_source'][sort_order]
    else:
        search_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']

        uid_list = []
        for item in search_result:
            uid_list.append(item['_id'])
        profile_result = es_profile.mget(index="weibo_user",doc_type="user", body={"ids":uid_list}, _source=True)['docs']
        portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids":uid_list}, _source=True)['docs']

        result = []
        rank = 1
        for i in range(len(search_result)):
            info = ['','','','']
            info[0] = rank
            if profile_result[i]['found']:
                info[1] = profile_result[i]['_source'].get('photo_url','')
                info[3] = profile_result[i]['_source'].get('nick_name','')

            info[2] = search_result[i].get('_id','')
            if sort_order in ["user_index","origin_weibo_retweeted_brust_average","origin_weibo_comment_brust_average"]:
                info.append(search_result[i]['_source'][sort_order])
                if portrait_result[i]['found']:
                    info.append("1")
                else:
                    info.append("0")
            elif sort_order == "origin_weibo_retweeted_top_number":
               info.append(search_result[i]['_source']['origin_weibo_retweeted_top_number']) 
               mid = search_result[i]['_source']['origin_weibo_top_retweeted_id']
               info.append(weiboinfo2url(info[2],mid))
               if portrait_result[i]['found']:
                   info.append("1")
               else:
                   info.append("0")
            elif sort_order == "origin_weibo_comment_top_number":
                info.append(search_result[i]['_source']['origin_weibo_comment_top_number'])
                mid = search_result[i]['_source']['origin_weibo_top_comment_id']
                info.append(weiboinfo2url(info[2],mid))
                if portrait_result[i]['found']:
                    info.append("1")
                else:
                    info.append("0")

            rank += 1
            result.append(info)

    return result
コード例 #27
0
def show_daily_rank(period, sort_type, count):
    index_name = 'user_portrait_network'
    index_type = 'network'
    if (len(sort_type.split('_')) > 1):
        sort = 'rank_' + sort_type + '_' + str(period)   #pr_0
    else:
        sort = sort_type + '_' + str(period)   #pr_0
    query_body = {
        'sort':[{sort:{'order': 'desc'}}],
        'size': count
        }

    try:
        search_results = es_network_task.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']
    except:
        search_results = []
    results = []
    uid_list = []
    sort_list = []
    for item in search_results:
        source = item['_source']
        if sort in source:
            uid_list.append(source['uid'])
            sort_list.append(source[sort])
    
    # 查看背景信息
    if uid_list:
        profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"]
        for item in profile_result:
            _id = item['_id']
            index = profile_result.index(item)
            tmp = []
            if item['found']:
                item = item['_source']
                tmp.append(item['uid'])
                tmp.append(item['nick_name'])
                tmp.append(item['statusnum'])
                tmp.append(item['user_location'])
                tmp.append(item['fansnum'])
            else:
                tmp.extend([_id,'','','',''])
            value = sort_list[index]
            tmp.append(value)
            results.append(tmp)
    
    if uid_list:
        count = 0
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"]
        for item in portrait_result:
            if item['found']:
                results[count].append("1")
            else:
                results[count].append("0")
            count += 1

    return results
コード例 #28
0
ファイル: views.py プロジェクト: huxiaoqian/user_portrait
def ajax_get_task_detail_info():
    task_name = request.args.get('task_name','') # task_name
    task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=task_name)['_source']
    task_detail["social_sensors"] = json.loads(task_detail["social_sensors"])
    task_detail['keywords'] = json.loads(task_detail['keywords'])
    task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"])
    history_status = json.loads(task_detail['history_status'])
    if history_status:
        temp_list = []
        temp_list.append(history_status[-1])
        for item in history_status[:-1]:
            if int(item[-1]) != 0:
                temp_list.append(item)
        sorted_list = sorted(temp_list, key=lambda x:x[0], reverse=True)
        task_detail['history_status'] = sorted_list
    else:
        task_detail['history_status'] = history_status
    task_detail['social_sensors_portrait'] = []
    portrait_detail = []

    if task_detail["social_sensors"]:
        search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs']
        if search_results:
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["_source"][iter_item].split('&'))
                        elif iter_item == "influence":
                            top_influence = get_top_influence("influence")
                            influence = math.log(item["_source"][iter_item]/top_influence*9+1, 10)*100
                            if not influence:
                                influence = 0
                            temp.append(influence)
                        elif iter_item == "importance":
                            top_importance = get_top_influence("importance")
                            importance = math.log(item["_source"][iter_item]/top_importance*9+1, 10)*100
                            if not importance:
                                importance = 0
                            temp.append(importance)
                        elif iter_item == "activeness":
                            top_activeness = get_top_influence("activeness")
                            activeness = math.log(item["_source"][iter_item]/top_activeness*9+1, 10)*100
                            if not activeness:
                                activeness = 0
                            temp.append(activeness)
                        else:
                            temp.append(item["_source"][iter_item])
                    portrait_detail.append(temp)
        if portrait_detail:
            portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True)
    task_detail['social_sensors_portrait'] = portrait_detail

    #print task_detail
    return json.dumps(task_detail)
コード例 #29
0
ファイル: utils.py プロジェクト: ystone1025/info_consume
def submit_identify_in_url(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    operation_type = input_data['operation_type']
    upload_data = input_data['upload_data']
    #step1: get uid list from input_data url
    url_list_pre = upload_data.split('\n')
    url_list = [item.split('\r')[0] for item in url_list_pre]
    uid_list = []
    invalid_uid_list = []
    have_in_uid_list = []
    for url_item in url_list:
        try:
            #url_item = 'http://weibo.com/p/1002065727942146/album?.....'
            url_list = url_item.split('/')
            uid = url_list[4][-10:]
            uid_list.append(uid)
        except:
            invalid_uid_list.append(url_item)
    if len(invalid_uid_list)!=0:
        return False, 'invalid user info', invalid_uid_list
    #step2: identify uid list is not exist in user_portrait and compute
    #step2.1: identify in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=True)['docs']
    new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False]
    have_in_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True]
    #step2.2: identify in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_list = list(new_uid_set - compute_set)
    if len(in_uid_list)==0:
        return False, 'all user in'
    #step3: save
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    #identify the final submit user
    final_submit_user_list = []
    for in_item in in_uid_list:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system': '0', 'operation': submit_user}
        if operation_type == 'submit':
            r.hset(hashname_submit, in_item, json.dumps(tmp))
            r.hset(submit_user_recomment, in_item, '0')
        final_submit_user_list.append(in_item)
    return True, invalid_uid_list, have_in_uid_list, final_submit_user_list
コード例 #30
0
def submit_identify_in_url(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    operation_type = input_data['operation_type']
    upload_data = input_data['upload_data']
    #step1: get uid list from input_data url
    url_list_pre = upload_data.split('\n')
    url_list = [item.split('\r')[0] for item in url_list_pre]
    uid_list = []
    invalid_uid_list = []
    have_in_uid_list = []
    for url_item in url_list:
        try:
            #url_item = 'http://weibo.com/p/1002065727942146/album?.....'
            url_list = url_item.split('/')
            uid = url_list[4][-10:]
            uid_list.append(uid)
        except:
            invalid_uid_list.append(url_item)
    if len(invalid_uid_list)!=0:
        return False, 'invalid user info', invalid_uid_list
    #step2: identify uid list is not exist in user_portrait and compute
    #step2.1: identify in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, _source=True)['docs']
    new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False]
    have_in_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True]
    #step2.2: identify in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_list = list(new_uid_set - compute_set)
    if len(in_uid_list)==0:
        return False, 'all user in'
    #step3: save
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    #identify the final submit user
    final_submit_user_list = []
    for in_item in in_uid_list:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system': '0', 'operation': submit_user}
        if operation_type == 'submit':
            r.hset(hashname_submit, in_item, json.dumps(tmp))
            r.hset(submit_user_recomment, in_item, '0')
        final_submit_user_list.append(in_item)
    return True, invalid_uid_list, have_in_uid_list, final_submit_user_list
コード例 #31
0
ファイル: utils.py プロジェクト: huxiaoqian/user_portrait
def get_activity_weibo(task_name, start_ts):
    results = []
    #step1: get task_name uid
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\
                id=task_name, _source=False, fields=['uid_list'])
    except:
        group_result = {}
    if group_result == {}:
        return 'task name invalid'
    try:
        uid_list = group_result['fields']['uid_list']
    except:
        uid_list = []
    if uid_list == []:
        return 'task uid list null'
    #step2: get uid2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                body = {'ids':uid_list}, _source=False, fields=['uname'])['docs']
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found']==True:
            uname = item['fields']['uname'][0]
        uid2uname[uid] = uname
    #step3: search time_segment weibo
    time_segment = FOUR_HOUR
    end_ts = start_ts + time_segment
    time_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + time_date
    query = []
    query.append({'terms':{'uid': uid_list}})
    query.append({'range':{'timestamp':{'gte':start_ts, 'lt':end_ts}}})
    try:
        flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \
                body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits']
    except:
        flow_text_es_result = []
    for item in flow_text_es_result:
        weibo = {}
        source = item['_source']
        weibo['timestamp'] = ts2date(source['timestamp'])
        weibo['ip'] = source['ip']
        weibo['text'] = source['text']
        if source['geo']:
            weibo['geo'] = '\t'.join(source['geo'])
        else:
            weibo['geo'] = ''
        results.append(weibo)

    return results
コード例 #32
0
ファイル: utils.py プロジェクト: pengyuange/user_portrait
def get_activity_weibo(task_name, start_ts):
    results = []
    #step1: get task_name uid
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\
                id=task_name, _source=False, fields=['uid_list'])
    except:
        group_result = {}
    if group_result == {}:
        return 'task name invalid'
    try:
        uid_list = group_result['fields']['uid_list']
    except:
        uid_list = []
    if uid_list == []:
        return 'task uid list null'
    #step2: get uid2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                body = {'ids':uid_list}, _source=False, fields=['uname'])['docs']
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
        uid2uname[uid] = uname
    #step3: search time_segment weibo
    time_segment = FOUR_HOUR
    end_ts = start_ts + time_segment
    time_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + time_date
    query = []
    query.append({'terms': {'uid': uid_list}})
    query.append({'range': {'timestamp': {'gte': start_ts, 'lt': end_ts}}})
    try:
        flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \
                body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits']
    except:
        flow_text_es_result = []
    for item in flow_text_es_result:
        weibo = {}
        source = item['_source']
        weibo['timestamp'] = ts2date(source['timestamp'])
        weibo['ip'] = source['ip']
        weibo['text'] = source['text']
        if source['geo']:
            weibo['geo'] = '\t'.join(source['geo'])
        else:
            weibo['geo'] = ''
        results.append(weibo)

    return results
コード例 #33
0
def show_all_out():
    delete_dict = r_out.hgetall('decide_delete_list')
    delete_keys_list = delete_dict.keys()
    recommend_out_list = []
    for iter_key in delete_keys_list:
        try:
            temp = json.loads(r_out.hget('decide_delete_list', iter_key))
        except:
            temp = []
        recommend_out_list.extend(temp)
    recommend_out_list = list(set(recommend_out_list))
    #print recommend_out_list

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")
    return_list = []
    fields = [
        'uid', 'uname', 'domain', 'topic_string', 'influence', 'importance',
        'activeness'
    ]
    if recommend_out_list:
        detail = es.mget(index=portrait_index_name,
                         doc_type=portrait_index_type,
                         body={"ids": recommend_out_list},
                         _source=True)['docs']
        for i in range(len(detail)):
            detail_info = []
            if detail[i]['found']:
                for item in fields:
                    if item == "topic_string":
                        detail_info.append(','.join(
                            detail[i]['_source'][item].split('&')))
                    elif item == "influence":
                        detail_info.append(
                            math.ceil(detail[i]["_source"][item] /
                                      float(top_influence) * 100))
                    elif item == "importance":
                        detail_info.append(
                            math.ceil(detail[i]["_source"][item] /
                                      float(top_importance) * 100))
                    elif item == "activeness":
                        detail_info.append(
                            math.ceil(detail[i]["_source"][item] /
                                      float(top_activeness) * 100))
                    else:
                        detail_info.append(detail[i]['_source'][item])
            else:
                detail_info = [detail[i]['_id'], [], [], [], [], [], []]

            return_list.append(detail_info)

    return json.dumps(return_list)
コード例 #34
0
def get_social_inter_content(uid1, uid2, type_mark):
    weibo_list = []
    #get two type relation about uid1 and uid2
    #search weibo list
    now_ts = int(time.time())
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    #uid2uname
    uid2uname = {}
    try:
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                                body={'ids': [uid1, uid2]}, _source=False, fields=['uid', 'uname'])['docs']
    except:
        portrait_result = []
    
    for item in portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
        else:
            uid2uname[uid] = 'unknown'
    #iter date to search weibo list
    for i in range(7, 0, -1):
        iter_date_ts = now_date_ts - i*DAY
        iter_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + str(iter_date)
        query = []
        query.append({'bool':{'must':[{'term':{'uid':uid1}}, {'term':{'directed_uid': int(uid2)}}]}})
        if type_mark=='out':
            query.append({'bool':{'must':[{'term':{'uid':uid2}}, {'term':{'directed_uid': int(uid1)}}]}})
        try:
            flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query': {'bool':{'should': query}}, 'sort':[{'timestamp':{'order': 'asc'}}], 'size':MAX_VALUE})['hits']['hits']
        except:
            flow_text_result = []
        for flow_text in flow_text_result:
            source = flow_text['_source']
            weibo = {}
            weibo['timestamp'] = source['timestamp']
            weibo['ip'] = source['ip']
            weibo['geo'] = source['geo']
            weibo['text'] = '\t'.join(source['text'].split('&'))
            weibo['uid'] =  source['uid']
            weibo['uname'] = uid2uname[weibo['uid']]
            weibo['directed_uid'] = str(source['directed_uid'])
            weibo['directed_uname'] = uid2uname[str(source['directed_uid'])]
            weibo_list.append(weibo)

    return weibo_list
コード例 #35
0
def get_social_inter_content(uid1, uid2, type_mark):
    weibo_list = []
    #get two type relation about uid1 and uid2
    #search weibo list
    now_ts = int(time.time())
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    #uid2uname
    uid2uname = {}
    try:
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                                body={'ids': [uid1, uid2]}, _source=False, fields=['uid', 'uname'])['docs']
    except:
        portrait_result = []
    
    for item in portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
        else:
            uid2uname[uid] = 'unknown'
    #iter date to search weibo list
    for i in range(7, 0, -1):
        iter_date_ts = now_date_ts - i*DAY
        iter_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + str(iter_date)
        query = []
        query.append({'bool':{'must':[{'term':{'uid':uid1}}, {'term':{'directed_uid': int(uid2)}}]}})
        if type_mark=='out':
            query.append({'bool':{'must':[{'term':{'uid':uid2}}, {'term':{'directed_uid': int(uid1)}}]}})
        try:
            flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query': {'bool':{'should': query}}, 'sort':[{'timestamp':{'order': 'asc'}}], 'size':MAX_VALUE})['hits']['hits']
        except:
            flow_text_result = []
        for flow_text in flow_text_result:
            source = flow_text['_source']
            weibo = {}
            weibo['timestamp'] = source['timestamp']
            weibo['ip'] = source['ip']
            weibo['geo'] = source['geo']
            weibo['text'] = '\t'.join(source['text'].split('&'))
            weibo['uid'] =  source['uid']
            weibo['uname'] = uid2uname[weibo['uid']]
            weibo['directed_uid'] = str(source['directed_uid'])
            weibo['directed_uname'] = uid2uname[str(source['directed_uid'])]
            weibo_list.append(weibo)

    return weibo_list
コード例 #36
0
def search_max_single_field(field, index_name, doctype, top_k=3):

    # field = "origin_weibo_retweeted_top_number", "origin_weibo_comment_top_number"
    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": [{field: {"order": "desc"}}],
        "size": top_k
    }

    
    return_list = []
    rank = 1
    count_c = 0
    start = 0

    while 1:
        search_list = []
        user_list = search_k(es, index_name, doctype, start, field, 100)
        start += 100
        for item in user_list:
            uid = item.get('user','0')
            search_list.append(uid) # uid list

        search_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids": search_list}, _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]

        for i in range(len(search_result)):
            if search_result[i]['found']:
                info = ['','','','','','','1']
                info[0] = rank
                info[2] = search_result[i].get('_id','')

                if profile_result[i]['found']:
                    info[1] = profile_result[i]['_source'].get('photo_url','')
                    info[3] = profile_result[i]['_source'].get('nick_name','')

                if 'retweeted' in field:
                    temp_mid = user_list[i]['origin_weibo_top_retweeted_id']
                    info[5] = weiboinfo2url(info[2], temp_mid)
                    info[4] = user_list[i]['origin_weibo_retweeted_top_number']
                else:
                    temp_mid = user_list[i]['origin_weibo_top_comment_id']
                    info[5] = weiboinfo2url(info[2], temp_mid)
                    info[4] = user_list[i]['origin_weibo_comment_top_number']

                rank += 1
                return_list.append(info)

                if rank >= int(top_k)+1:
                    return return_list
コード例 #37
0
def search_tag(es, number, active_index, active_type, portrait_index,
               portrait_type, tag):

    #field_dict = {"domain":"art"}
    return_list = []
    count_s = 0
    count_c = 0
    start = 0
    rank = 1

    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start,
                             "user_index", 10000)
        start += 10000
        for item in user_list:
            uid = item.get('user', '0')
            search_list.append(uid)  # uid list

        search_result = es_portrait.mget(index=portrait_index,
                                         doc_type=portrait_type,
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user",
                                         doc_type="user",
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        for item in search_result:
            count_s += 1
            if item['found'] and tag in item['_source']['domain']:
                info = ['', '', '', '', '', '', '']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get(
                        'photo_url', '')
                    info[3] = profile_result[index]['_source'].get(
                        'nick_name', '')
                info[2] = search_result[index].get('_id', '')
                info[4] = user_list[index]['user_index']
                info[5] = search_result[index]['_source'].get('activeness', '')
                info[6] = search_result[index]['_source'].get('importance', '')

                rank += 1
                return_list.append(info)

                if rank >= int(number) + 1:
                    return return_list

        if count_s > 100000:
            return return_list
コード例 #38
0
def delete_group_results(task_name, submit_user):
    task_id = submit_user + '-' + task_name
    #step1: get group uid list
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return False
    uid_list = group_result['uid_list']
    #step2: update group_tag in user_portrait
    query_body = {'query': {'term': {'group': task_id}}}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                body={'ids': uid_list})['docs']
    except:
        user_portrait_result = []
    bulk_action = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            try:
                source = item['_source']
            except:
                source = {}
            try:
                group_tag = source['group']
            except:
                group_tag = ''
            if group_tag != '':
                new_group_tag_list = []
                group_tag_list = group_tag.split('&')
                for group_tag_item in group_tag_list:
                    if group_tag_item != task_id and group_tag_item != '[email protected]':
                        new_group_tag_list.append(group_tag_item)
                new_group_tag = '&'.join(new_group_tag_list)
            else:
                new_group_tag = ''
            action = {'update': {'_id': uid}}
            bulk_action.extend([action, {'doc': {'group': new_group_tag}}])
    if bulk_action:
        print 'bulk_action:', bulk_action
        es_user_portrait.bulk(bulk_action,
                              index=portrait_index_name,
                              doc_type=portrait_index_type)
    #step3: delete group results in group_manage
    try:
        print 'yes delete'
        result = es.delete(index=index_name, doc_type=index_type, id=task_id)
    except:
        return False
    return True
コード例 #39
0
def show_detect_result(task_name, submit_user):
    user_result = []
    #step1:identify the task name id exist
    task_id = submit_user + '-' + task_name
    try:
        task_exist_result = es_group_result.get(index=group_index_name,
                                                doc_type=group_index_type,
                                                id=task_id)['_source']
    except:
        task_exist_result = {}
    if task_exist_result == {}:
        return 'task name is not exist'
    #step2:get uid list
    uid_list = json.loads(task_exist_result['uid_list'])
    #step3:get user evaluation information---uid/uname/activeness/importance/influence
    iter_count = 0
    uid_count = len(uid_list)
    while iter_count < uid_count:
        iter_user_list = uid_list[iter_count:iter_count + DETECT_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                                                    body={'ids':iter_user_list}, _source=True)['docs']
        except:
            portrait_result = []
        for item in portrait_result:
            uid = item['_id']
            if item['found'] == True:
                source = item['_source']
                uname = source['uname']
                evaluate_max = get_evaluate_max()
                activeness = math.log(
                    source['activeness'] / evaluate_max['activeness'] * 9 + 1,
                    10) * 100
                importance = math.log(
                    source['importance'] / evaluate_max['importance'] * 9 + 1,
                    10) * 100
                influence = math.log(
                    source['influence'] / evaluate_max['influence'] * 9 + 1,
                    10) * 100

            else:
                uname = u'未知'
                activeness = u'未知'
                importance = u'未知'
                influence = u'未知'
            user_result.append([uid, uname, activeness, importance, influence])
        iter_count += DETECT_ITER_COUNT
    sort_user_result = sorted(user_result, key=lambda x: x[4], reverse=True)

    return sort_user_result
コード例 #40
0
def identify_user_out(input_uid_list):
    out_user_list = []
    in_user_list = []
    input_len = len(input_uid_list)
    iter_count = 0
    print 'identify user out'
    #get user list who is out user_portrait
    while iter_count < input_len:
        iter_user_list = input_uid_list[iter_count: iter_count+DETECT_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':iter_user_list}, _source=False)['docs']
        except:
            portrait_result = []
        for item in portrait_result:
            uid = item['_id']
            if item['found'] != True:
                out_user_list.append(uid)
            else:
                in_user_list.append(uid)
        iter_count += DETECT_ITER_COUNT
    print 'get out user portrait information'
    #get user profile information for out user_portrait
    iter_count = 0
    out_user_count = len(out_user_list)
    out_user_result = []
    while iter_count < out_user_count:
        iter_user_list = out_user_list[iter_count: iter_count+DETECT_ITER_COUNT]
        try:
            profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':iter_user_list}, _source=True)['docs']
        except:
            profile_result = []
        for item in profile_result:
            uid = item['_id']
            if item['found']==True:
                source = item['_source']
                uname = source['nick_name']
                fansnum = source['fansnum']
                statusnum = source['statusnum']
                friendsnum = source['friendsnum']
            else:
                uname =  u'未知'
                fansnum =  u'未知'
                statusnum =  u'未知'
                friendsnum =  u'未知'
            out_user_result.append([uid, uname, fansnum, statusnum, friendsnum])
        iter_count += DETECT_ITER_COUNT 
    
    sort_out_user_result = sorted(out_user_result, key=lambda x:x[2], reverse=True)

    return in_user_list, sort_out_user_result
コード例 #41
0
def get_sensor_detail(task_name, ts, user):
    index_name = task_name
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task,
                         doc_type=task_doc_type,
                         id=_id)["_source"]
    social_sensors = json.loads(task_detail['social_sensors'])
    portrait_detail = []

    top_importance = get_top_influence("importance")
    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")

    if social_sensors:
        search_results = es.mget(index=portrait_index_name,
                                 doc_type=portrait_index_type,
                                 body={"ids": social_sensors},
                                 fields=SOCIAL_SENSOR_INFO)['docs']
        for item in search_results:
            temp = []
            if item['found']:
                for iter_item in SOCIAL_SENSOR_INFO:
                    if iter_item == "topic_string":
                        temp.append(item["fields"][iter_item][0].split('&'))
                    elif iter_item == "activeness":
                        temp.append(
                            math.log(
                                item['fields']['activeness'][0] /
                                float(top_activeness) * 9 + 1, 10) * 100)
                    elif iter_item == "importance":
                        temp.append(
                            math.log(
                                item['fields']['importance'][0] /
                                float(top_importance) * 9 + 1, 10) * 100)
                    elif iter_item == "influence":
                        temp.append(
                            math.log(
                                item['fields']['influence'][0] /
                                float(top_influence) * 9 + 1, 10) * 100)
                    else:
                        temp.append(item["fields"][iter_item][0])
                portrait_detail.append(temp)

        portrait_detail = sorted(portrait_detail,
                                 key=lambda x: x[5],
                                 reverse=True)
    else:
        portrait_detail = []

    return portrait_detail
コード例 #42
0
def identify_user_out(input_uid_list):
    out_user_list = []
    in_user_list = []
    input_len = len(input_uid_list)
    iter_count = 0
    print 'identify user out'
    #get user list who is out user_portrait
    while iter_count < input_len:
        iter_user_list = input_uid_list[iter_count: iter_count+DETECT_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':iter_user_list}, _source=False)['docs']
        except:
            portrait_result = []
        for item in portrait_result:
            uid = item['_id']
            if item['found'] != True:
                out_user_list.append(uid)
            else:
                in_user_list.append(uid)
        iter_count += DETECT_ITER_COUNT
    print 'get out user portrait information'
    #get user profile information for out user_portrait
    iter_count = 0
    out_user_count = len(out_user_list)
    out_user_result = []
    while iter_count < out_user_count:
        iter_user_list = out_user_list[iter_count: iter_count+DETECT_ITER_COUNT]
        try:
            profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':iter_user_list}, _source=True)['docs']
        except:
            profile_result = []
        for item in profile_result:
            uid = item['_id']
            if item['found']==True:
                source = item['_source']
                uname = source['nick_name']
                fansnum = source['fansnum']
                statusnum = source['statusnum']
                friendsnum = source['friendsnum']
            else:
                uname =  u'未知'
                fansnum =  u'未知'
                statusnum =  u'未知'
                friendsnum =  u'未知'
            out_user_result.append([uid, uname, fansnum, statusnum, friendsnum])
        iter_count += DETECT_ITER_COUNT 
    
    sort_out_user_result = sorted(out_user_result, key=lambda x:x[2], reverse=True)

    return in_user_list, sort_out_user_result
コード例 #43
0
def delete_group_results(task_name, submit_user):
    task_id = submit_user + '-' + task_name
    #step1: get group uid list
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return False
    uid_list = group_result['uid_list']
    #step2: update group_tag in user_portrait
    query_body = {'query':{'term':{'group': task_id}}}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                body={'ids': uid_list})['docs']
    except:
        user_portrait_result = []
    bulk_action = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            try:
                source = item['_source']
            except:
                source = {}
            try:
                group_tag = source['group']
            except:
                group_tag = ''
            if group_tag != '':
                new_group_tag_list = []
                group_tag_list = group_tag.split('&')
                for group_tag_item in group_tag_list:
                    if group_tag_item != task_id and group_tag_item != '[email protected]':
                        new_group_tag_list.append(group_tag_item)
                new_group_tag = '&'.join(new_group_tag_list)
            else:
                new_group_tag = ''
            action = {'update':{'_id': uid}}
            bulk_action.extend([action, {'doc': {'group': new_group_tag}}])
    if bulk_action:
        print 'bulk_action:', bulk_action
        es_user_portrait.bulk(bulk_action, index=portrait_index_name, doc_type=portrait_index_type)
    #step3: delete group results in group_manage
    try:
        print 'yes delete'
        result = es.delete(index=index_name, doc_type=index_type, id=task_id)
    except:
        return False
    return True
コード例 #44
0
def show_vary_detail(task_name, submit_user, vary_pattern):
    results = []
    task_id = submit_user + '-' + task_name
    #identify the task_id exist
    try:
        source = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return 'group task is not exist'
    #identify the task status=1
    status = source['status']
    if status != 1:
        return 'group task is not completed'
    #get vary detail geo
    try:
        vary_detail_geo = json.loads(source['vary_detail_geo'])
    except:
        vary_detail_geo = {}
    if vary_detail_geo == {}:
        return 'vary detail geo none'
    #get vary_detail
    vary_pattern_list = vary_pattern.split('-')
    vary_pattern_key = '&'.join(vary_pattern_list)
    uid_ts_list = vary_detail_geo[vary_pattern_dict]
    uid_list = [item[0] for item in uid_ts_list]
    #get user name
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    uname_dict = {}
    for portrait_item in user_portrait_result:
        uid = portrait_item['_id']
        if portrait_item['found']==True:
            uname = portrait_item['_source']['uname']
            uname_dict[uid] = uname
        else:
            uname_dict[uid] = uid
    #get vary detail
    new_detail = []
    for vary_item in uid_ts_list:
        uname = uname_dict[vary_item[0]]
        start_date = ts2datetime(vary_item[1])
        end_date = ts2datetime(vary_item[2])
        new_detail.append([vary_item[0], uname, start_date, end_date])
    
    return new_detail
コード例 #45
0
def show_vary_detail(task_name, submit_user, vary_pattern):
    results = []
    task_id = submit_user + '-' + task_name
    #identify the task_id exist
    try:
        source = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return 'group task is not exist'
    #identify the task status=1
    status = source['status']
    if status != 1:
        return 'group task is not completed'
    #get vary detail geo
    try:
        vary_detail_geo = json.loads(source['vary_detail_geo'])
    except:
        vary_detail_geo = {}
    if vary_detail_geo == {}:
        return 'vary detail geo none'
    #get vary_detail
    vary_pattern_list = vary_pattern.split('-')
    vary_pattern_key = '&'.join(vary_pattern_list)
    uid_ts_list = vary_detail_geo[vary_pattern_dict]
    uid_list = [item[0] for item in uid_ts_list]
    #get user name
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    uname_dict = {}
    for portrait_item in user_portrait_result:
        uid = portrait_item['_id']
        if portrait_item['found'] == True:
            uname = portrait_item['_source']['uname']
            uname_dict[uid] = uname
        else:
            uname_dict[uid] = uid
    #get vary detail
    new_detail = []
    for vary_item in uid_ts_list:
        uname = uname_dict[vary_item[0]]
        start_date = ts2datetime(vary_item[1])
        end_date = ts2datetime(vary_item[2])
        new_detail.append([vary_item[0], uname, start_date, end_date])

    return new_detail
コード例 #46
0
ファイル: views.py プロジェクト: ystone1025/info_consume
def ajax_get_task_detail_info():
    task_name = request.args.get('task_name', '')  # task_name
    user = request.args.get('user', 'admin')
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task,
                         doc_type=task_doc_type,
                         id=_id)['_source']
    task_detail["social_sensors"] = json.loads(task_detail["social_sensors"])
    #task_detail['keywords'] = json.loads(task_detail['keywords'])
    #task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"])
    history_status = json.loads(task_detail['history_status'])
    if history_status:
        temp_list = []
        """
        temp_list.append(history_status[-1])
        print history_status
        for item in history_status[:-1]:
            temp_list.append(item)
        """
        sorted_list = sorted(history_status, key=lambda x: x, reverse=True)
        task_detail['history_status'] = sorted_list
    else:
        task_detail['history_status'] = []
    task_detail['social_sensors_portrait'] = []
    portrait_detail = []

    if task_detail["social_sensors"]:
        search_results = es.mget(index=portrait_index_name,
                                 doc_type=portrait_index_type,
                                 body={"ids":
                                       task_detail["social_sensors"]})['docs']
        if search_results:
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["_source"][iter_item].split('&'))
                        else:
                            temp.append(item["_source"][iter_item])
                    portrait_detail.append(temp)
        if portrait_detail:
            portrait_detail = sorted(portrait_detail,
                                     key=lambda x: x[5],
                                     reverse=True)
    task_detail['social_sensors_portrait'] = portrait_detail

    return json.dumps(task_detail)
コード例 #47
0
def portrait_user_vary(es, number, active_index, active_type, portrait_index, portrait_type, field="vary"):

    return_list = []
    index_exist = es.indices.exists(index=active_index)
    if not index_exist:
        return "no active_index exist"
        sys.exit(0)

    count_s = 0
    count_c = 0
    start = 0
    rank = 1
    try:
        while 1:
            search_list = []
            user_list = search_k(es, active_index, active_type, start, field, 100)
            start += 100
            for item in user_list:
                uid = item.get('uid', '0') # obtain uid, notice "uid" or "user"
                search_list.append(uid) # uid list
            search_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids": search_list}, _source=True)["docs"]
            profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]

            for item in search_result:
                count_c += 1
                if item["found"]:
                    info = ['','','','','','1']
                    info[0] = rank
                    index = search_result.index(item)

                    if profile_result[index]['found']:
                        info[1] = profile_result[index]['_source'].get('photo_url','')
                        info[3] = profile_result[index]['_source'].get('nick_name','')
                    info[2] = search_result[index].get('_id','')
                    info[4] = user_list[index]['vary']
                    return_list.append(info)
                    rank += 1
                    if rank == int(number)+1:
                        return return_list

            if count_c > 10000:
                break
    except RequestError:
        print "timeout"

    return return_list
コード例 #48
0
ファイル: utils.py プロジェクト: huxiaoqian/user_portrait
def get_user_tag(uid_list):
    result = {}
    user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list})['docs']
    for user_item in user_result:
        uid = user_item['_id']
        result[uid] = []
        try:
            source = user_item['_source']
        except:
            source = {}
        for key in source:
            if key not in identify_attribute_list:
                value = source[key]
                tag_string = key+':'+value
                result[uid].append(tag_string)

    return result
コード例 #49
0
def query_vary_top_k(index_name, doctype, top_k, sort_index="vary"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{
            sort_index: {
                "order": "desc"
            }
        }]
    }

    result = es.search(index=index_name, doc_type=doctype,
                       body=query_body)['hits']['hits']
    uid_list = []
    for item in result:
        uid_list.append(item['_id'])

    portrait_result = es_portrait.mget(index="user_portrait",
                                       doc_type="user",
                                       body={"ids": uid_list},
                                       _source=True)['docs']
    profile_result = es_profile.mget(index="weibo_user",
                                     doc_type="user",
                                     body={"ids": uid_list},
                                     _source=True)['docs']

    return_list = []
    rank = 1
    for i in range(len(result)):
        info = ['', '', '', '', '']
        info[0] = rank
        if profile_result[i]['found']:
            info[1] = profile_result[i]['_source'].get('photo_url', '')
            info[3] = profile_result[i]['_source'].get('nick_name', '')
        info[2] = result[i].get('_id', '')
        info[4] = result[i]['_source']['vary']
        if portrait_result[i]['found']:
            info.append('1')
        else:
            info.append('0')
        return_list.append(info)
        rank += 1

    return return_list
コード例 #50
0
def get_user_tag(uid_list):
    result = {}
    user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list})['docs']
    for user_item in user_result:
        uid = user_item['_id']
        result[uid] = []
        try:
            source = user_item['_source']
        except:
            source = {}
        for key in source:
            if key not in identify_attribute_list:
                value = source[key]
                tag_string = key+':'+value
                result[uid].append(tag_string)

    return result
コード例 #51
0
def submit_identify_in_uname(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    upload_data = input_data['upload_data']
    # get uname list from upload data
    uname_list = upload_data.split('\n') 
    uid_list = []
    #step1: get uid list from uname
    profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False)['hits']['hits']
    for profile_item in profile_exist_result:
        uid = profile_item['_id']
        uid_list.append(uid)
    if not uid_list:
        return 'uname list valid'
    #step2: filter user not in user_portrait and compute
    #step2.1: identify in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs']
    new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False]
    if not new_uid_list:
        return 'uname list all in'
    #step2.2: identify in compute
    new_uid_set = set(new_uid_list)
    compute_set = r.hkeys('compute')
    in_uid_list = list(new_uid_set - compute_set)
    if not in_uid_list:
        return 'uname list all in'
    #step3: save submit
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive))
    for in_item in in_uid_list:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashname_submit, uid))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system':'0', 'operation': submit_user}
        r.hset(hashname_submit, uid, json.dumps(tmp))
        r.hset(submit_user_recomment, uid, '0')
    return True
コード例 #52
0
def get_user_tag(uid_list, submit_user):
    result = {}
    user_result = es.mget(index=user_index_name, doc_type=user_index_type, body={'ids':uid_list})['docs']
    for user_item in user_result:
        uid = user_item['_id']
        result[uid] = []
        try:
            source = user_item['_source']
        except:
            source = {}
        submit_user_tag = submit_user + '-tag'
        submit_user_attribute = source.get(submit_user_tag, '')
        if submit_user_attribute:
            attribute_list = submit_user_attribute.split('&')
            for item in attribute_list:
                result[uid].append(item.replace('-',':'))

    return result
コード例 #53
0
def filter_in_uid(input_dict):
    input_uid = input_dict.keys()
    all_count = len(input_uid)
    iter_count = 0
    in_portrait_result = []
    while iter_count < all_count:
        iter_user_list = input_uid[iter_count: iter_count+FILTER_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                    body={'ids': iter_user_list}, _source=False, fields=['photo_url', 'uname'])['docs']
        except:
            portrait_result = []
        if portrait_result:
            iter_in_portrait = [[item['_id'], item['fields']['uname'][0], item['fields']['photo_url'][0],input_dict[item['_id']]] for item in portrait_result if item['found']==True]
        in_portrait_result.extend(iter_in_portrait)
        iter_count += FILTER_ITER_COUNT
    
    return in_portrait_result
コード例 #54
0
def filter_in_uid(input_dict):
    input_uid = input_dict.keys()
    all_count = len(input_uid)
    iter_count = 0
    in_portrait_result = []
    while iter_count < all_count:
        iter_user_list = input_uid[iter_count: iter_count+FILTER_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                    body={'ids': iter_user_list}, _source=False, fields=['photo_url', 'uname'])['docs']
        except:
            portrait_result = []
        if portrait_result:
            iter_in_portrait = [[item['_id'], item['fields']['uname'][0], item['fields']['photo_url'][0],input_dict[item['_id']]] for item in portrait_result if item['found']==True]
        in_portrait_result.extend(iter_in_portrait)
        iter_count += FILTER_ITER_COUNT
    
    return in_portrait_result
コード例 #55
0
def submit_identify_in_uid(input_data):
    date = input_data['date']
    submit_user = input_data['user']
    hashname_submit = 'submit_recomment_' + date
    hashname_influence = 'recomment_' + date + '_influence'
    hashname_sensitive = 'recomment_' + date + '_sensitive'
    submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(
        r.hkeys(hashname_sensitive))
    upload_data = input_data['upload_data']
    line_list = upload_data.split('\n')
    uid_list = []
    for line in line_list:
        uid = line[:10]
        if len(uid) == 10:
            uid_list.append(uid)
    #identify the uid is not exist in user_portrait and compute
    #step1: filter in user_portrait
    new_uid_list = []
    exist_portrait_result = es_user_portrait.mget(index=portrait_index_name,
                                                  doc_type=portrait_index_type,
                                                  body={'ids': uid_list},
                                                  _source=False)['docs']
    for exist_item in exist_portrait_result:
        if exist_item['found'] == False:
            new_uid_list.append(exist_item['_id'])
    #step2: filter in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_set = list(new_uid_set - compute_set)
    for in_item in in_uid_set:
        if in_item in auto_recomment_set:
            tmp = json.loads(r.hget(hashtname_submit, in_item))
            recommentor_list = tmp['operation'].split('&')
            recommentor_list.append(str(submit_user))
            new_list = list(set(recommentor_list))
            tmp['operation'] = '&'.join(new_list)
        else:
            tmp = {'system': '0', 'operation': submit_user}
        r.hset(hashname_submit, in_item, json.dumps(tmp))
        r.hset(submit_user_recomment, in_item, '0')
    return True
コード例 #56
0
ファイル: utils.py プロジェクト: ystone1025/info_consume
def get_user_tag(uid_list, submit_user):
    result = {}
    user_result = es.mget(index=user_index_name,
                          doc_type=user_index_type,
                          body={'ids': uid_list})['docs']
    for user_item in user_result:
        uid = user_item['_id']
        result[uid] = []
        try:
            source = user_item['_source']
        except:
            source = {}
        submit_user_tag = submit_user + '-tag'
        submit_user_attribute = source.get(submit_user_tag, '')
        if submit_user_attribute:
            attribute_list = submit_user_attribute.split('&')
            for item in attribute_list:
                result[uid].append(item.replace('-', ':'))

    return result
コード例 #57
0
ファイル: utils.py プロジェクト: ystone1025/info_consume
def show_out_uid():
    fields = ["uid", "uname", "location", "statusnum", "fansnum", "domain", "topic_string", "importance", "influence", "activeness", "sensitive"]
    out_list = []
    recommend_dict = r_out.hgetall("recommend_delete_list")
    recommend_keys = recommend_dict.keys()
    for iter_key in recommend_keys:
        out_list.extend(json.loads(r_out.hget("recommend_delete_list",iter_key)))
    if not out_list:
        return out_list # no one is recommended to out

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")
    top_sensitive = get_top_influence("sensitive")
    out_list = list(set(out_list))
    return_list = []
    #bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":out_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs']
    detail = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":out_list}, _source=True)['docs']
    # extract the return dict with the field '_source'
    filter_uid = all_delete_uid()
    if out_list:
        for i in range(len(out_list)):
            if detail[i]['_id'] in filter_uid:
                continue
            detail_info = []
            for item in fields:
                if item == "topic_string":
                    detail_info.append(','.join(detail[i]['_source']['topic_string'].split("&")))
                elif item == "influence":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_influence) * 9 + 1, 10)*100)
                elif item == "importance":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_importance)*9 + 1, 10)*100)
                elif item == "activeness":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_activeness)*9 + 1, 10)*100)
                elif item == "sensitive":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_sensitive)*9 + 1, 10)*100)
                else:
                    detail_info.append(detail[i]['_source'][item])
            return_list.append(detail_info)

    return return_list