예제 #1
0
파일: utils.py 프로젝트: SwoJa/ruman
def get_sort(uid,fe):
    result = {}
    try:
        u_bci = es.get(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE, id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
        #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
        result['in_score'] = u_bci
    except:
        result['in_score']="";
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':{
                        'must':[
                            {'range':{'bci_week_ave':{'gte':u_bci}}},
                            {'term':{'topic_string':fe}}]
                    }
                }
            }
        }
    }
    result['in_top'] = es.search(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE,body=query_body)['hits']['total']
    print 'essearch'
    print es.search(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE,body=query_body)

    try:
        u_bci = es.get(index='bci_history', doc_type='bci', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
        #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0]        
        print "trymax"
        bci_max = get_max_value(es_user_profile, "bci_history", "bci")
        print "max",bci_max
        result['all_score'] = math.log(u_bci/float(bci_max)*9+1,10)*100 
    except:
        result['all_score']=""
        result ['all_top']=""
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':{
                        'must':[
                            {'range':{'bci_week_ave':{'gte':u_bci}}}]
                    }
                }
            }
        }
    }
    result['all_top'] = es.search(index='bci_history', doc_type='bci',body=query_body)['hits']['total']

    
    #result = es.search(index='user_portrait_1222', doc_type='user',body=query_body)
    # return json.dumps([result['hits']['total'],u_bci])
    return json.dumps(result)
예제 #2
0
파일: utils.py 프로젝트: SwoJa/ruman
def get_person_value(uid):
    #认证类型
    #print es_user_profile,profile_index_name,profile_index_type,uid
    try:
        value_static = es_bci_history.get(index = bci_history_index_name,doc_type = bci_history_index_type,id=uid)
        value_inf = es_user_portrait.get(index = portrait_index_name,doc_type = portrait_index_type,id=uid)
        static = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)
    except:
        return 'no'
    fans_max = es_bci_history.search(index = bci_history_index_name,doc_type = bci_history_index_type,body={'query':{'match_all':{}},'sort':{'user_fansnum':{'order':'desc'}},'size':1})['hits']['hits'][0]['_source']['user_fansnum']
    print 'max:',fans_max
    #print static['found']
    if static['found']==False:
        return 'no'
    else:
        static = static['_source']
    #print "static",static
    try:
        ver_calue = verified_value[static['verified_type']]
    except:
        ver_calue = 0
    #账号创建时间
    times = math.ceil((time.time()-int(static['create_at']))/31536000)
    #粉丝数
    #person = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)['_source']
    fans_value = math.log(float(value_static['_source']['user_fansnum'])/float(fans_max)*9+1,10)
    #fans_value = (math.log(static['fansnum']+1000000,100000000)-0.75)*4
    #if fans_value>1:
    #    fans_value=1.0
    influence_max = es_user_portrait.search(index = portrait_index_name,doc_type = portrait_index_type,body={'query':{'match_all':{}},'sort':{'influence':{'order':'desc'}},'size':1})['hits']['hits'][0]['_source']['influence']
    influence_value = float(value_inf['_source']['influence'])/float(influence_max)
    final= (ver_calue*0.1+times*0.05+fans_value+influence_value*1.2)*30
    print ver_calue,times,fans_value,influence_value
    return final
예제 #3
0
def ajax_revise_task():
    task_name = request.args.get('task_name', '')  # must
    finish = request.args.get("finish", "10")
    stop_time = request.args.get('stop_time', '')  # timestamp
    user = request.args.get('user', '')

    #now_ts = datetime2ts("2013-09-06")
    _id = user + '-' + task_name
    now_ts = time.time()
    if stop_time and stop_time < now_ts:
        return json.dumps([])

    if task_name and user:
        task_detail = es.get(index=index_manage_sensing_task,
                             doc_type=task_doc_type,
                             id=_id)['_source']
        if stop_time:
            task_detail['stop_time'] = stop_time
        if int(finish) == 0:
            task_detail['finish'] = finish
            task_detail['processing_status'] = "1"  # 重启时将处理状态改为
        if stop_time or int(finish) == 0:
            es.index(index=index_manage_sensing_task,
                     doc_type=task_doc_type,
                     id=_id,
                     body=task_detail)
            return json.dumps(['1'])
    return json.dumps([])
예제 #4
0
파일: utils.py 프로젝트: lvleilei/screen
def get_group_user_track(uid):
    results = []
    #step1:get user_portrait activity_geo_dict
    try:
        portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\
                id=uid, _source=False, fields=['activity_geo_dict'])
    except:
        portrait_result = {}
    if portrait_result == {}:
        return 'uid is not in user_portrait'
    activity_geo_dict = json.loads(
        portrait_result['fields']['activity_geo_dict'][0])
    now_date_ts = datetime2ts(ts2datetime(int(time.time())))
    start_ts = now_date_ts - DAY * len(activity_geo_dict)
    #step2: iter date to get month track
    for geo_item in activity_geo_dict:
        iter_date = ts2datetime(start_ts)
        sort_day_dict = sorted(geo_item.items(),
                               key=lambda x: x[1],
                               reverse=True)
        if sort_day_dict:
            results.append([iter_date, sort_day_dict[0][0]])
        else:
            results.append([iter_date, ''])
        start_ts = start_ts + DAY

    return results
예제 #5
0
파일: views.py 프로젝트: SwoJa/ruman
def ajax_get_group_detail():
    task_name = request.args.get('task_name','') # task_name
    user = request.args.get('user', '')
    _id = user + '-' + task_name
    portrait_detail = []
    top_activeness = get_top_influence("activeness")
    top_influence = get_top_influence("influence")
    top_importance = get_top_influence("importance")
    search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {})
    if search_result:
        try:
            uid_list = json.loads(search_result['uid_list'])
        except:
            uid_list = search_result['uid_list']
        if uid_list:
            search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, fields=SOCIAL_SENSOR_INFO)['docs']
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["fields"][iter_item][0].split('&'))
                            temp.append(item["fields"][iter_item][0].split('&'))
                        elif iter_item == "activeness":
                            temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100)
                        elif iter_item == "importance":
                            temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100)
                        elif iter_item == "influence":
                            temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100)
                        else:
                            temp.append(item["fields"][iter_item][0])
                    portrait_detail.append(temp)

    return json.dumps(portrait_detail)
예제 #6
0
파일: utils.py 프로젝트: SwoJa/ruman
def get_text_detail(task_name, ts, text_type, user, order, size=100):
    results = []
    _id = user + '-' + task_name
    task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"]
    social_sensors = json.loads(task_detail["social_sensors"])
    
    #print social_sensors
    if int(text_type) == 0: # 热门原创微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 1)

    elif int(text_type) == 1: # 热门转发微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 2)

    elif int(text_type) == 2: # 普通转发微博
        results = get_retweet_weibo_detail(ts, user, task_name, size, "message_type", 3)

    elif int(text_type) == 3: # 普通评论微博
        results = get_retweet_weibo_detail(ts, user, task_name, size, "message_type", 2)

    elif int(text_type) == 4: # 积极微博
        results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", "1")

    elif int(text_type) == 5: # 中性微博
        results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", "0")

    elif int(text_type) == 6: # 消极微博
        results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", ["2", "3", "4", "5", "6"])
    elif int(text_type) == 7: # 敏感微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 3)

    else:
        print "error"
    print '******************'
    #print results
    return results
예제 #7
0
파일: search.py 프로젝트: SwoJa/ruman
def search_identify_uid(uid):
    result = 0
    try:
        user_dict = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid)
        result = 1
    except:
        result = 0
    return result
예제 #8
0
def search_identify_uid(uid):
    result = 0
    try:
        user_dict = es_user_portrait.get(index=portrait_index_name,
                                         doc_type=portrait_index_type,
                                         id=uid)
        result = 1
    except:
        result = 0
    return result
예제 #9
0
파일: views.py 프로젝트: SwoJa/ruman
def ajax_stop_task():
    task_name = request.args.get('task_name','') # must
    user = request.args.get('user', '')
    if task_name and user:
        _id = user + "-" + task_name
        task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source']
        #task_detail["finish"] = finish_signal
        task_detail['processing_status'] = '0'
        es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail)
        return json.dumps(['1'])
    else:
        return json.dumps([])
예제 #10
0
def ajax_get_group_detail():
    task_name = request.args.get('task_name', '')  # task_name
    user = request.args.get('user', '')
    _id = user + '-' + task_name
    portrait_detail = []
    top_activeness = get_top_influence("activeness")
    top_influence = get_top_influence("influence")
    top_importance = get_top_influence("importance")
    search_result = es.get(index=index_group_manage,
                           doc_type=doc_type_group,
                           id=_id).get('_source', {})
    if search_result:
        try:
            uid_list = json.loads(search_result['uid_list'])
        except:
            uid_list = search_result['uid_list']
        if uid_list:
            search_results = es.mget(index=portrait_index_name,
                                     doc_type=portrait_index_type,
                                     body={"ids": uid_list},
                                     fields=SOCIAL_SENSOR_INFO)['docs']
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(
                                item["fields"][iter_item][0].split('&'))
                            temp.append(
                                item["fields"][iter_item][0].split('&'))
                        elif iter_item == "activeness":
                            temp.append(
                                math.log(
                                    item['fields']['activeness'][0] /
                                    float(top_activeness) * 9 + 1, 10) * 100)
                        elif iter_item == "importance":
                            temp.append(
                                math.log(
                                    item['fields']['importance'][0] /
                                    float(top_importance) * 9 + 1, 10) * 100)
                        elif iter_item == "influence":
                            temp.append(
                                math.log(
                                    item['fields']['influence'][0] /
                                    float(top_influence) * 9 + 1, 10) * 100)
                        else:
                            temp.append(item["fields"][iter_item][0])
                    portrait_detail.append(temp)

    return json.dumps(portrait_detail)
예제 #11
0
def ajax_get_task_detail_info():
    task_name = request.args.get('task_name', '')  # task_name
    user = request.args.get('user', 'admin')
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task,
                         doc_type=task_doc_type,
                         id=_id)['_source']
    task_detail["social_sensors"] = json.loads(task_detail["social_sensors"])
    #task_detail['keywords'] = json.loads(task_detail['keywords'])
    #task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"])
    history_status = json.loads(task_detail['history_status'])
    if history_status:
        temp_list = []
        """
        temp_list.append(history_status[-1])
        print history_status
        for item in history_status[:-1]:
            temp_list.append(item)
        """
        sorted_list = sorted(history_status, key=lambda x: x, reverse=True)
        task_detail['history_status'] = sorted_list
    else:
        task_detail['history_status'] = []
    task_detail['social_sensors_portrait'] = []
    portrait_detail = []

    if task_detail["social_sensors"]:
        search_results = es.mget(index=portrait_index_name,
                                 doc_type=portrait_index_type,
                                 body={"ids":
                                       task_detail["social_sensors"]})['docs']
        if search_results:
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["_source"][iter_item].split('&'))
                        else:
                            temp.append(item["_source"][iter_item])
                    portrait_detail.append(temp)
        if portrait_detail:
            portrait_detail = sorted(portrait_detail,
                                     key=lambda x: x[5],
                                     reverse=True)
    task_detail['social_sensors_portrait'] = portrait_detail

    return json.dumps(task_detail)
예제 #12
0
def new_get_sensitive_words(uid):
    try:
        user_portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\
                id=uid)['_source']
    except:
        user_portrait_result = {}
    if user_portrait_result:
        try:
            sensitive_dict = json.loads(user_portrait_result['sensitive_dict'])
        except:
            sensitive_dict = {}
    else:
        sensitive_dict = {}
    sort_sensitive_dict = sorted(sensitive_dict.items(), key=lambda x:x[1], reverse=True)
    
    return sort_sensitive_dict
예제 #13
0
def ajax_stop_task():
    task_name = request.args.get('task_name', '')  # must
    user = request.args.get('user', '')
    if task_name and user:
        _id = user + "-" + task_name
        task_detail = es.get(index=index_manage_sensing_task,
                             doc_type=task_doc_type,
                             id=_id)['_source']
        #task_detail["finish"] = finish_signal
        task_detail['processing_status'] = '0'
        es.index(index=index_manage_sensing_task,
                 doc_type=task_doc_type,
                 id=_id,
                 body=task_detail)
        return json.dumps(['1'])
    else:
        return json.dumps([])
예제 #14
0
def get_text_detail(task_name, ts, text_type, user, order, size=100):
    results = []
    _id = user + '-' + task_name
    task_detail = es.get(index=index_manage_sensing_task,
                         doc_type=task_doc_type,
                         id=_id)["_source"]
    social_sensors = json.loads(task_detail["social_sensors"])

    #print social_sensors
    if int(text_type) == 0:  # 热门原创微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 1)

    elif int(text_type) == 1:  # 热门转发微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 2)

    elif int(text_type) == 2:  # 普通转发微博
        results = get_retweet_weibo_detail(ts, user, task_name, size,
                                           "message_type", 3)

    elif int(text_type) == 3:  # 普通评论微博
        results = get_retweet_weibo_detail(ts, user, task_name, size,
                                           "message_type", 2)

    elif int(text_type) == 4:  # 积极微博
        results = get_retweet_weibo_detail(ts, user, task_name, size,
                                           "sentiment", "1")

    elif int(text_type) == 5:  # 中性微博
        results = get_retweet_weibo_detail(ts, user, task_name, size,
                                           "sentiment", "0")

    elif int(text_type) == 6:  # 消极微博
        results = get_retweet_weibo_detail(ts, user, task_name, size,
                                           "sentiment",
                                           ["2", "3", "4", "5", "6"])
    elif int(text_type) == 7:  # 敏感微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 3)

    else:
        print "error"
    print '******************'
    #print results
    return results
예제 #15
0
파일: description.py 프로젝트: SwoJa/ruman
def conclusion_on_activeness(uid):
    # test
    index_name = copy_portrait_index_name
    index_type = copy_portrait_index_type
    try:
        influ_result = es.get(index=index_name, doc_type=index_type,
                              id=uid)['_source']
    except:
        influ_result = {}
        result = activeness_conclusion_dict['0']
        return result

    # generate time series---keys
    now_ts = time.time()
    now_ts = datetime2ts('2013-09-12')
    activeness_set = set()
    for i in range(N):
        ts = ts2datetime(now_ts - i * 3600 * 24)
        activeness_set.add(pre_activeness + ts)

    # 区分影响力和活跃度的keys
    keys_set = set(influ_result.keys())
    activeness_keys = keys_set & activeness_set

    if activeness_keys:
        activeness_value = []
        for key in activeness_keys:
            activeness_value.append(influ_result[key])
        mean, std_var = level(activeness_value)
        if mean < activeness_level[0]:
            result = activeness_conclusion_dict['1']
        elif mean >= activeness_level[0] and mean < activeness_level[1]:
            result = activeness_conclusion_dict['2']
        elif mean >= activeness_level[1] and mean < activeness_level[2]:
            result = activeness_conclusion_dict["3"]
        elif mean >= activeness_level[2] and mean < activeness_level[3]:
            result = activeness_conclusion_dict["4"]
        else:
            result = activeness_conclusion_dict["5"]
    else:
        result = conclusion_dict['0']

    return result
예제 #16
0
파일: description.py 프로젝트: SwoJa/ruman
def conclusion_on_activeness(uid):
    # test
    index_name = copy_portrait_index_name
    index_type = copy_portrait_index_type
    try:
        influ_result = es.get(index=index_name, doc_type=index_type, id=uid)['_source']
    except:
        influ_result = {}
        result = activeness_conclusion_dict['0']
        return result

    # generate time series---keys
    now_ts = time.time()
    now_ts = datetime2ts('2013-09-12')
    activeness_set = set()
    for i in range(N):
        ts = ts2datetime(now_ts - i*3600*24)
        activeness_set.add(pre_activeness+ts)

    # 区分影响力和活跃度的keys
    keys_set = set(influ_result.keys())
    activeness_keys = keys_set & activeness_set

    if activeness_keys:
        activeness_value = []
        for key in activeness_keys:
            activeness_value.append(influ_result[key])
        mean, std_var = level(activeness_value)
        if mean < activeness_level[0]:
            result = activeness_conclusion_dict['1']
        elif mean >= activeness_level[0] and mean < activeness_level[1]:
            result = activeness_conclusion_dict['2']
        elif mean >= activeness_level[1] and mean < activeness_level[2]:
            result = activeness_conclusion_dict["3"]
        elif mean >= activeness_level[2] and mean < activeness_level[3]:
            result = activeness_conclusion_dict["4"]
        else:
            result = activeness_conclusion_dict["5"]
    else:
        result = conclusion_dict['0']

    return result
예제 #17
0
파일: views.py 프로젝트: SwoJa/ruman
def ajax_get_task_detail_info():
    task_name = request.args.get('task_name','') # task_name
    user = request.args.get('user', 'admin')
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source']
    task_detail["social_sensors"] = json.loads(task_detail["social_sensors"])
    #task_detail['keywords'] = json.loads(task_detail['keywords'])
    #task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"])
    history_status = json.loads(task_detail['history_status'])
    if history_status:
        temp_list = []
        """
        temp_list.append(history_status[-1])
        print history_status
        for item in history_status[:-1]:
            temp_list.append(item)
        """
        sorted_list = sorted(history_status, key=lambda x:x, reverse=True)
        task_detail['history_status'] = sorted_list
    else:
        task_detail['history_status'] = []
    task_detail['social_sensors_portrait'] = []
    portrait_detail = []

    if task_detail["social_sensors"]:
        search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs']
        if search_results:
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["_source"][iter_item].split('&'))
                        else:
                            temp.append(item["_source"][iter_item])
                    portrait_detail.append(temp)
        if portrait_detail:
            portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True)
    task_detail['social_sensors_portrait'] = portrait_detail

    return json.dumps(task_detail)
예제 #18
0
파일: views.py 프로젝트: SwoJa/ruman
def ajax_get_clustering_topic():
    task_name = request.args.get('task_name','') # task_name
    user = request.args.get('user', '')
    ts = int(request.args.get('ts', '')) # timestamp: 123456789
    topic_list = []
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    #burst_reason = task_detail['burst_reason']
    burst_reason = 1
    filter_list = []
    if burst_reason:
        topic_list = task_detail.get("clustering_topic", [])
        if topic_list:
            topic_list = json.loads(topic_list)
            for item in topic_list:
                tmp = []
                for word in item:
                    if len(word) > 1:
                        tmp.append(word)
                filter_list.append(tmp)

    return json.dumps(filter_list[:5])
예제 #19
0
def ajax_get_clustering_topic():
    task_name = request.args.get('task_name', '')  # task_name
    user = request.args.get('user', '')
    ts = int(request.args.get('ts', ''))  # timestamp: 123456789
    topic_list = []
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id,
                         id=ts)['_source']
    #burst_reason = task_detail['burst_reason']
    burst_reason = 1
    filter_list = []
    if burst_reason:
        topic_list = task_detail.get("clustering_topic", [])
        if topic_list:
            topic_list = json.loads(topic_list)
            for item in topic_list:
                tmp = []
                for word in item:
                    if len(word) > 1:
                        tmp.append(word)
                filter_list.append(tmp)

    return json.dumps(filter_list[:5])
예제 #20
0
파일: views.py 프로젝트: SwoJa/ruman
def ajax_revise_task():
    task_name = request.args.get('task_name','') # must
    finish = request.args.get("finish", "10")
    stop_time = request.args.get('stop_time', '') # timestamp
    user = request.args.get('user', '')

    #now_ts = datetime2ts("2013-09-06")
    _id = user + '-' + task_name
    now_ts = time.time()
    if stop_time and stop_time < now_ts:
        return json.dumps([])

    if task_name and user:
        task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source']
        if stop_time:
            task_detail['stop_time'] = stop_time
        if int(finish) == 0:
            task_detail['finish'] = finish
            task_detail['processing_status'] = "1" # 重启时将处理状态改为
        if stop_time or int(finish) == 0:
            es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail)
            return json.dumps(['1'])
    return json.dumps([])
예제 #21
0
파일: utils.py 프로젝트: SwoJa/ruman
def get_group_user_track(uid):
    results = []
    #step1:get user_portrait activity_geo_dict
    try:
        portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\
                id=uid, _source=False, fields=['activity_geo_dict'])
    except:
        portrait_result = {}
    if portrait_result == {}:
        return 'uid is not in user_portrait'
    activity_geo_dict = json.loads(portrait_result['fields']['activity_geo_dict'][0])
    now_date_ts = datetime2ts(ts2datetime(int(time.time())))
    start_ts = now_date_ts - DAY * len(activity_geo_dict)
    #step2: iter date to get month track
    for geo_item in activity_geo_dict:
        iter_date = ts2datetime(start_ts)
        sort_day_dict = sorted(geo_item.items(), key=lambda x:x[1], reverse=True)
        if sort_day_dict:
            results.append([iter_date, sort_day_dict[0][0]])
        else:
            results.append([iter_date, ''])
        start_ts = start_ts + DAY

    return results
예제 #22
0
def get_origin_weibo_detail(ts, user, task_name, size, order, message_type=1):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    print '37',index_sensing_task,_id
    mid_value = json.loads(task_detail['mid_topic_value'])
    duplicate_dict = json.loads(task_detail['duplicate_dict'])
    tmp_duplicate_dict = dict()
    for k,v in duplicate_dict.iteritems():
        try:
            tmp_duplicate_dict[v].append(k)
        except:
            tmp_duplicate_dict[v] = [k, v]


    if message_type == 1:
        weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    elif message_type == 2:
        weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])
    else:
        weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])
    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()
    print len(mid_list)
    results = []
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "terms":{"mid": mid_list}
                }
            }
        },
        "size": 1000,
        "sort": {"timestamp": {"order": "desc"}}
    }


    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-DAY)
    index_name = flow_text_index_name_pre + datetime
    print es_text
    exist_es = es_text.indices.exists(index_name)
    print exist_es
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict() # 文本信息
    portrait_dict = dict() # 背景信息
    sort_results = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source'] # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]}
                else:
                    portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""}


        if order == "total":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True)[:10]
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True)[:10]
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True)[:10]
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        results_dict = dict()
        mid_index_dict = dict()
        for item in sorted_list: # size
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, keywords_string, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid,''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                if message_type == 1:
                    temp.append(1)
                elif message_type == 2:
                    temp.append(3)
                else:
                    temp.append(iter_text['message_type'])
                #jln 提取关键词
                f_key = get_weibo_single(iter_text['text'])
                temp.append(sorted(f_key.iteritems(),key=lambda x:x[1],reverse=True))
                
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                temp.append(iter_text['timestamp'])
                temp.append(mid_value[mid])
                temp.append(mid)
                


                results.append(temp)
            count_n += 1


                

        results = sorted(results, key=operator.itemgetter(-4, -2, -6), reverse=True) # -4 -2 -3
        sort_results = []
        count = 0
        for item in results:
            sort_results.append([item])
            mid_index_dict[item[-1]] = count
            count += 1

        
        if tmp_duplicate_dict:
            remove_list = []
            value_list = tmp_duplicate_dict.values() # [[mid, mid], ]
            for item in value_list:
                tmp = []
                for mid in item:
                    if mid_index_dict.get(mid, 0):
                        tmp.append(mid_index_dict[mid])
                if len(tmp) > 1:
                    tmp_min = min(tmp)
                else:
                    continue
                tmp.remove(tmp_min)
                for iter_count in tmp:
                    sort_results[tmp_min].extend(sort_results[iter_count])
                    remove_list.append(sort_results[iter_count])
            if remove_list:
                for item in remove_list:
                    sort_results.remove(item)
        

    return sort_results
예제 #23
0
def get_person_value(uid):
    #认证类型
    #print es_user_profile,profile_index_name,profile_index_type,uid
    try:
        value_static = es_bci_history.get(index=bci_history_index_name,
                                          doc_type=bci_history_index_type,
                                          id=uid)
        value_inf = es_user_portrait.get(index=portrait_index_name,
                                         doc_type=portrait_index_type,
                                         id=uid)
        static = es_user_profile.get(index=profile_index_name,
                                     doc_type=profile_index_type,
                                     id=uid)
    except:
        return 'no'
    fans_max = es_bci_history.search(
        index=bci_history_index_name,
        doc_type=bci_history_index_type,
        body={
            'query': {
                'match_all': {}
            },
            'sort': {
                'user_fansnum': {
                    'order': 'desc'
                }
            },
            'size': 1
        })['hits']['hits'][0]['_source']['user_fansnum']
    print 'max:', fans_max
    #print static['found']
    if static['found'] == False:
        return 'no'
    else:
        static = static['_source']
    #print "static",static
    try:
        ver_calue = verified_value[static['verified_type']]
    except:
        ver_calue = 0
    #账号创建时间
    times = math.ceil((time.time() - int(static['create_at'])) / 31536000)
    #粉丝数
    #person = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)['_source']
    fans_value = math.log(
        float(value_static['_source']['user_fansnum']) / float(fans_max) * 9 +
        1, 10)
    #fans_value = (math.log(static['fansnum']+1000000,100000000)-0.75)*4
    #if fans_value>1:
    #    fans_value=1.0
    influence_max = es_user_portrait.search(
        index=portrait_index_name,
        doc_type=portrait_index_type,
        body={
            'query': {
                'match_all': {}
            },
            'sort': {
                'influence': {
                    'order': 'desc'
                }
            },
            'size': 1
        })['hits']['hits'][0]['_source']['influence']
    influence_value = float(
        value_inf['_source']['influence']) / float(influence_max)
    final = (ver_calue * 0.1 + times * 0.05 + fans_value +
             influence_value * 1.2) * 30
    print ver_calue, times, fans_value, influence_value
    return final
예제 #24
0
def get_retweet_weibo_detail(ts, user, task_name, size, text_type, type_value):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task,
                                       doc_type=_id,
                                       id=ts)['_source']
    origin_weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    retweeted_weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])

    mid_list = []
    mid_list.extend(origin_weibo_detail.keys())
    mid_list.extend(retweeted_weibo_detail.keys())

    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [{
                            "range": {
                                "timestamp": {
                                    "gte": ts - time_interval,
                                    "lt": ts
                                }
                            }
                        }, {
                            "terms": {
                                "root_mid": mid_list
                            }
                        }]
                    }
                }
            }
        },
        "sort": {
            "timestamp": {
                "order": "desc"
            }
        },
        "size": 100
    }

    if text_type == "message_type":
        query_body['query']['filtered']['filter']['bool']['must'].append(
            {"term": {
                text_type: type_value
            }})
    if text_type == "sentiment":
        #if isinstance(type_value, str):
        if len(type_value) == 1:
            query_body['query']['filtered']['filter']['bool']['must'].append(
                {"term": {
                    text_type: type_value
                }})
        else:
            query_body['query']['filtered']['filter']['bool']['must'].append(
                {"terms": {
                    text_type: type_value
                }})

    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts - time_interval)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)

    # 1. 查询微博
    if datetime == datetime_1 and exist_es:
        search_results = es_text.search(index=index_name,
                                        doc_type=flow_text_index_type,
                                        body=query_body)["hits"]["hits"]
    elif datetime != datetime_1 and exist_es_1:
        search_results = es_text.search(index=index_name_1,
                                        doc_type=flow_text_index_type,
                                        body=query_body)["hits"]["hits"]
    else:
        search_results = []
    #print search_results
    # 2. 获取微博相关信息
    results = []
    uid_list = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
        if uid_list:
            portrait_result = es_profile.mget(
                index=profile_index_name,
                doc_type=profile_index_type,
                body={"ids": uid_list},
                fields=['nick_name', 'photo_url'])["docs"]

        for i in range(len(uid_list)):
            item = search_results[i]['_source']
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            temp.append(item['uid'])
            if portrait_result[i]['found']:
                temp.append(portrait_result[i]["fields"]["nick_name"][0])
                temp.append(portrait_result[i]["fields"]["photo_url"][0])
            else:
                temp.append(item['uid'])
                temp.append("")
            temp.append(item["text"])
            #print item['text']
            temp.append(item["sentiment"])
            temp.append(ts2date(item['timestamp']))
            temp.append(item['geo'])
            temp.append(item["message_type"])
            results.append(temp)

    return results
예제 #25
0
파일: description.py 프로젝트: SwoJa/ruman
def conclusion_on_influence(uid):
    # test
    index_name = copy_portrait_index_name
    index_type = copy_portrait_index_type
    total_number = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type)["count"]

    try:
        influ_result = es.get(index=index_name, doc_type=index_type, id=uid)['_source']
    except:
        influ_result = {}
        result = [0, 0, 0, 0, 0, 0, total_number] # aver_activeness, sorted, aver_influence, sorted
        return result

    aver_activeness = influ_result.get("aver_activeness", 0)
    aver_influence = influ_result.get("aver_influence", 0)
    aver_importance = influ_result.get('aver_importance', 0)
    influence_query_body = {
        "query":{
            "match_all": {}
        },
        "sort": {"aver_influence": {"order": "desc"}},
        "size": 1
    }
    top_influence = es.search(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body)['hits']['hits'][0]['sort'][0]

    importance_query_body = {
        "query":{
            "match_all": {}
        },
        "sort": {"aver_importance": {"order": "desc"}},
        "size": 1
    }
    top_importance = es.search(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body)['hits']['hits'][0]['sort'][0]

    activeness_query_body = {
        "query":{
            "match_all": {}
        },
        "sort": {"aver_activeness": {"order": "desc"}},
        "size": 1
    }
    top_activeness = es.search(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body)['hits']['hits'][0]['sort'][0]

    influence_query_body = {
        "query": {
            "filtered":{
                "filter": {
                    "range": {
                        "aver_influence": {
                            "gt": aver_influence
                        }
                    }
                }
            }
        }
    }

    activeness_query_body = {
        "query": {
            "filtered":{
                "filter": {
                    "range": {
                        "aver_activeness": {
                            "gt": aver_activeness
                        }
                    }
                }
            }
        }
    }

    importance_query_body = {
        "query": {
            "filtered":{
                "filter": {
                    "range": {
                        "aver_importance": {
                            "gt": aver_importance
                        }
                    }
                }
            }
        }
    }

    influence_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body)['count']
    activeness_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body)['count']
    importance_count = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body)['count']

    result = [int(aver_activeness*100.0/top_activeness), activeness_count, int(aver_influence*100.0/top_influence), influence_count, int(aver_importance*100.0/top_importance), importance_count, total_number]
    return result
예제 #26
0
def get_origin_weibo_detail(ts, user, task_name, size, order, message_type=1):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task,
                                       doc_type=_id,
                                       id=ts)['_source']
    print '37', index_sensing_task, _id
    mid_value = json.loads(task_detail['mid_topic_value'])
    duplicate_dict = json.loads(task_detail['duplicate_dict'])
    tmp_duplicate_dict = dict()
    for k, v in duplicate_dict.iteritems():
        try:
            tmp_duplicate_dict[v].append(k)
        except:
            tmp_duplicate_dict[v] = [k, v]

    if message_type == 1:
        weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    elif message_type == 2:
        weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])
    else:
        weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])
    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()
    print len(mid_list)
    results = []
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "terms": {
                        "mid": mid_list
                    }
                }
            }
        },
        "size": 1000,
        "sort": {
            "timestamp": {
                "order": "desc"
            }
        }
    }

    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts - DAY)
    index_name = flow_text_index_name_pre + datetime
    print es_text
    exist_es = es_text.indices.exists(index_name)
    print exist_es
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list,
                                        doc_type=flow_text_index_type,
                                        body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict()  # 文本信息
    portrait_dict = dict()  # 背景信息
    sort_results = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source']  # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(
                index=profile_index_name,
                doc_type=profile_index_type,
                body={"ids": uid_list},
                fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {
                        "nick_name": item["fields"]["nick_name"][0],
                        "photo_url": item["fields"]["photo_url"][0]
                    }
                else:
                    portrait_dict[item['_id']] = {
                        "nick_name": item['_id'],
                        "photo_url": ""
                    }

        if order == "total":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[1],
                                 reverse=True)[:10]
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[2],
                                 reverse=True)[:10]
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[3],
                                 reverse=True)[:10]
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        results_dict = dict()
        mid_index_dict = dict()
        for item in sorted_list:  # size
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, keywords_string, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid, ''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                if message_type == 1:
                    temp.append(1)
                elif message_type == 2:
                    temp.append(3)
                else:
                    temp.append(iter_text['message_type'])
                #jln 提取关键词
                f_key = get_weibo_single(iter_text['text'])
                temp.append(
                    sorted(f_key.iteritems(), key=lambda x: x[1],
                           reverse=True))

                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                temp.append(iter_text['timestamp'])
                temp.append(mid_value[mid])
                temp.append(mid)

                results.append(temp)
            count_n += 1

        results = sorted(results,
                         key=operator.itemgetter(-4, -2, -6),
                         reverse=True)  # -4 -2 -3
        sort_results = []
        count = 0
        for item in results:
            sort_results.append([item])
            mid_index_dict[item[-1]] = count
            count += 1

        if tmp_duplicate_dict:
            remove_list = []
            value_list = tmp_duplicate_dict.values()  # [[mid, mid], ]
            for item in value_list:
                tmp = []
                for mid in item:
                    if mid_index_dict.get(mid, 0):
                        tmp.append(mid_index_dict[mid])
                if len(tmp) > 1:
                    tmp_min = min(tmp)
                else:
                    continue
                tmp.remove(tmp_min)
                for iter_count in tmp:
                    sort_results[tmp_min].extend(sort_results[iter_count])
                    remove_list.append(sort_results[iter_count])
            if remove_list:
                for item in remove_list:
                    sort_results.remove(item)

    return sort_results
예제 #27
0
def get_retweet_weibo_detail(ts, user, task_name, size, text_type, type_value):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    origin_weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    retweeted_weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])

    mid_list = []
    mid_list.extend(origin_weibo_detail.keys())
    mid_list.extend(retweeted_weibo_detail.keys())

    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"range":{
                                "timestamp":{
                                    "gte": ts - time_interval,
                                    "lt": ts
                                }
                            }},
                            {"terms": {"root_mid": mid_list}}
                        ]
                    }
                }
            }
        },
        "sort": {"timestamp": {"order": "desc"}},
        "size": 100
    }

    if text_type == "message_type":
        query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}})
    if text_type == "sentiment":
        #if isinstance(type_value, str):
        if len(type_value) == 1:
            query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}})
        else:
            query_body['query']['filtered']['filter']['bool']['must'].append({"terms":{text_type: type_value}})

    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-time_interval)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)

    # 1. 查询微博
    if datetime == datetime_1 and exist_es:
        search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    elif datetime != datetime_1 and exist_es_1:
        search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []
    #print search_results
    # 2. 获取微博相关信息
    results = []
    uid_list = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]

        for i in range(len(uid_list)):
            item = search_results[i]['_source']
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            temp.append(item['uid'])
            if portrait_result[i]['found']:
                temp.append(portrait_result[i]["fields"]["nick_name"][0])
                temp.append(portrait_result[i]["fields"]["photo_url"][0])
            else:
                temp.append(item['uid'])
                temp.append("")
            temp.append(item["text"])
            #print item['text']
            temp.append(item["sentiment"])
            temp.append(ts2date(item['timestamp']))
            temp.append(item['geo'])
            temp.append(item["message_type"])
            results.append(temp)

    return results
예제 #28
0
def get_task_detail_2(task_name, ts, user):
    results = dict()
    index_name = task_name
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"]
    task_name = task_detail['task_name']
    social_sensors = json.loads(task_detail['social_sensors'])
    history_status = json.loads(task_detail['history_status'])
    start_time = task_detail['create_at']
    create_by = task_detail['create_by']
    stop_time = task_detail['stop_time']
    remark = task_detail.get('remark', '')
    portrait_detail = []
    count = 0 # 计数

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")

    if social_sensors:
        search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":social_sensors}, fields=SOCIAL_SENSOR_INFO)['docs']
        for item in search_results:
            temp = []
            if item['found']:
                for iter_item in SOCIAL_SENSOR_INFO:
                    if iter_item == "topic_string":
                        temp.append(item["fields"][iter_item][0].split('&'))
                    elif iter_item == "activeness":
                        temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100)
                    elif iter_item == "importance":
                        temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100)
                    elif iter_item == "influence":
                        temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100)
                    else:
                        temp.append(item["fields"][iter_item][0])
                portrait_detail.append(temp)
        portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True)

    time_series = [] # 时间
    #positive_sentiment_list = [] # 情绪列表
    #neutral_sentiment_list = []
    #negetive_sentiment_list = []
    all_weibo_list = []
    origin_weibo_list = [] # 微博列表
    retweeted_weibo_list = []
    #retweeted_weibo_count = [] # 别人转发他的数量
    #comment_weibo_count = []
    #total_number_count = []
    #burst_time_list = [] # 爆发时间列表
    important_user_set = set() # 重要人物列表
    out_portrait_users = set() # 未入库

    ts = int(ts)
    time_series = history_status
    #for item in history_status:
    #    if int(item[0]) <= ts:
    #        time_series.append(item[0]) # 到目前为止的所有的时间戳

    # get detail task information from es
    if time_series:
        flow_detail = es.mget(index=index_sensing_task, doc_type=_id, body={"ids": time_series})['docs']
    else:
        flow_detail = {}
    if flow_detail:
        for item in flow_detail:
            item = item['_source']
            timestamp = item['timestamp']
            #sentiment_distribution = json.loads(item["sentiment_distribution"])
            #positive_sentiment_list.append(int(sentiment_distribution['1']))
            #negetive_sentiment_list.append(int(sentiment_distribution['2'])+int(sentiment_distribution['3']) \
            #        +int(sentiment_distribution['4'])+int(sentiment_distribution['5'])+int(sentiment_distribution['6']))
            #neutral_sentiment_list.append(int(sentiment_distribution['0']))
            origin_weibo_list.append(item["origin_weibo_number"]) # real
            retweeted_weibo_list.append(item['retweeted_weibo_number']) # real
            all_weibo_list.append(item["origin_weibo_number"]+item['retweeted_weibo_number'])
            #retweeted_weibo_count.append(item['retweeted_weibo_count'])
            #comment_weibo_count.append(item['comment_weibo_count'])
            #total_number_count.append(item['weibo_total_number'])
            temp_important_user_list = json.loads(item['important_users'])
            unfiltered_users = json.loads(item['unfilter_users'])
            temp_out_portrait_users = set(unfiltered_users) - set(temp_important_user_list) # 未入库
            important_user_set = important_user_set | set(temp_important_user_list)
            out_portrait_users = out_portrait_users | set(temp_out_portrait_users)

            #burst_reason = item.get("burst_reason", "")
            #if burst_reason:
            #    burst_time_list.append([timestamp, count, burst_reason])
            count += 1

    ####################################################################################
    # 统计爆发原因,下相应的结论
    """
    weibo_variation_count = 0
    weibo_variation_time = []
    sentiment_variation_count = 0
    sentiment_variation_time = []
    sensitive_variation_count = 0 # sensitive
    sensitive_variation_time = [] # sensitive
    common_variation_count = 0
    common_variation_time = []
    if burst_time_list:
        for item in burst_time_list:
            tmp_common = 0
            x1 = 0
            x2 = 0
            x3 = 0
            if signal_count_varition in item[2]:
                weibo_variation_count += 1
                weibo_variation_time.append([ts2date_min(item[0]), total_number_count[item[1]]])
                x1 = total_number_count[item[1]]
                tmp_common += 1
            if signal_sentiment_varition in item[2]:
                tmp_common += 1
                sentiment_variation_count += 1
                x2 = negetive_sentiment_list[item[1]]
                sentiment_variation_time.append([ts2date_min(item[0]), negetive_sentiment_list[item[1]]])
            if signal_sensitive_variation in item[2]:
                tmp_common += 1
                sensitive_variation_count += 1
                x3 = sensitive_total_number_list[item[1]]
                sensitive_variation_time.append([ts2date_min(item[0]), all_weibo_list[item[1]]])
            if tmp_common >= 2:
                common_variation_count += 1
                common_variation_time.append([ts2date_min(item[0]), x1, x2, x3])

    warning_conclusion = remark
    variation_distribution = []
    if weibo_variation_count:
        variation_distribution.append(weibo_variation_time)
    else:
        variation_distribution.append([])

    if sentiment_variation_count:
        variation_distribution.append(sentiment_variation_time)
    else:
        variation_distribution.append([])

    if sensitive_variation_count:
        variation_distribution.append(sensitive_variation_time)
    else:
        variation_distribution.append([])

    if common_variation_count:
        variation_distribution.append(common_variation_time)
    else:
        variation_distribution.append([])

    results['warning_conclusion'] = warning_conclusion
    results['variation_distribution'] = variation_distribution

    # 每个用户的热度
    """

    # 获取重要用户的个人信息
    important_uid_list = list(important_user_set)
    out_portrait_users_list = list(out_portrait_users)
    social_sensor_set = set(social_sensors)
    user_detail_info = [] #
    out_user_detail_info = []
    if important_uid_list:
        user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":important_uid_list}, fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness'])['docs']
        for item in user_results:
            if item['found']:
                temp = []
                #if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD:
                #    continue
                temp.append(item['fields']['uid'][0])
                uname = item['fields']['uname'][0]
                if not uname or uname == "未知":
                    uname = item['fields']['uid'][0]
                temp.append(uname)
                temp.append(item['fields']['photo_url'][0])
                temp.append(item['fields']['domain'][0])
                temp.append(item['fields']['topic_string'][0].split('&'))
                #hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time)
                #temp.append(hot_count)
                temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100)
                temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100)
                temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100)
                if item['fields']['uid'][0] in social_sensor_set:
                    temp.append(1)
                else:
                    temp.append(0)
                user_detail_info.append(temp)
    # 排序
    if user_detail_info:
        user_detail_info = sorted(user_detail_info, key=lambda x:x[6], reverse=True)
    else:
        user_detail_info = []

    if out_portrait_users_list:
        profile_results = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":out_portrait_users_list})["docs"]
        bci_index = "bci_" + ts2datetime(ts-DAY).replace('-','')
        influence_results = es.mget(index=bci_index, doc_type="bci", body={"ids":out_portrait_users_list}, fields=["user_index"])['docs']
        bci_results = es_profile.mget(index="bci_history", doc_type="bci", body={"ids":out_portrait_users_list}, fields=['user_fansnum'])['docs']
        top_influence = get_top_all_influence("user_index", ts)
        count = 0
        if profile_results:
            for item in profile_results:
                temp = []
                if item['found']:
                    temp.append(item['_source']['uid'])
                    if item['_source']['nick_name']:
                        temp.append(item['_source']['nick_name'])
                    else:
                        temp.append(item['_source']['uid'])
                    temp.append(item['_source']['user_location'])
                    #temp.append(item['_source']['fansnum'])
                else:
                    temp.append(item['_id'])
                    temp.append(item['_id'])
                    temp.extend([''])
                try:
                    user_fansnum = bci_results[count]["fields"]["user_fansnum"][0]
                except:
                    user_fansnum = 0
                temp.append(user_fansnum)
                temp_influ = influence_results[count]
                if temp_influ.get('found', 0):
                    user_index = temp_influ['fields']['user_index'][0]
                    temp.append(math.log(user_index/float(top_influence)*9+1, 10)*100)
                else:
                    temp.append(0)
                count += 1
                out_user_detail_info.append(temp)
    print len(out_user_detail_info)
    if len(out_user_detail_info):
        print "sort"
        out_user_detail_info = sorted(out_user_detail_info, key=lambda x:x[4], reverse=True)


    revise_time_series = []
    for item in time_series:
        revise_time_series.append(ts2date_min(item))

    results['important_user_detail'] = user_detail_info
    results['out_portrait_user_detail'] = out_user_detail_info
    #results['burst_time'] = burst_time_list # 爆发时间点,以及爆发原因
    results['time_series'] = revise_time_series
    #results['positive_sentiment_list'] = positive_sentiment_list
    #esults['negetive_sentiment_list'] = negetive_sentiment_list
    #results['neutral_sentiment_list'] = neutral_sentiment_list
    results['all_weibo_list'] = all_weibo_list
    results['origin_weibo_list'] = origin_weibo_list
    results['retweeted_weibo_list'] = retweeted_weibo_list
    #results['comment_weibo_count'] = comment_weibo_count
    #results['retweeted_weibo_count'] = retweeted_weibo_count
    #results['total_number_list'] = total_number_count
    results['social_sensors_detail'] = portrait_detail

    return results
예제 #29
0
파일: utils.py 프로젝트: SwoJa/ruman
def get_sensitive_text_detail(task_name, ts, user, order):
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])

    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()

    results = []
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "terms":{"mid": mid_list}
                }
            }
        }
    }

    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-DAY)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict() # 文本信息
    portrait_dict = dict() # 背景信息
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source'] # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]}
                else:
                    portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""}

        if order == "total":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        for item in sorted_list:
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid,''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                temp.append(iter_text['message_type'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                count_n += 1
                results.append(temp)

        if results and order == "ts":
            results = sorted(results, key=lambda x:x[5], reverse=True)

        if results and order == "sensitive":
            results = sorted(results, key=lambda x:x[-1], reverse=True)

    return results
예제 #30
0
def get_sensitive_text_detail(task_name, ts, user, order):
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id,
                         id=ts)['_source']
    weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])

    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()

    results = []
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "terms": {
                        "mid": mid_list
                    }
                }
            }
        }
    }

    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts - DAY)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list,
                                        doc_type=flow_text_index_type,
                                        body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict()  # 文本信息
    portrait_dict = dict()  # 背景信息
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source']  # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(
                index=profile_index_name,
                doc_type=profile_index_type,
                body={"ids": uid_list},
                fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {
                        "nick_name": item["fields"]["nick_name"][0],
                        "photo_url": item["fields"]["photo_url"][0]
                    }
                else:
                    portrait_dict[item['_id']] = {
                        "nick_name": item['_id'],
                        "photo_url": ""
                    }

        if order == "total":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[1],
                                 reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[2],
                                 reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[3],
                                 reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        for item in sorted_list:
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid, ''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                temp.append(iter_text['message_type'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                count_n += 1
                results.append(temp)

        if results and order == "ts":
            results = sorted(results, key=lambda x: x[5], reverse=True)

        if results and order == "sensitive":
            results = sorted(results, key=lambda x: x[-1], reverse=True)

    return results
예제 #31
0
파일: utils.py 프로젝트: lvleilei/screen
def get_sort(uid, fe):
    result = {}
    try:
        u_bci = es.get(index=BCI_INDEX_NAME,
                       doc_type=BCI_INDEX_TYPE,
                       id=uid,
                       fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
        #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
        result['in_score'] = u_bci
    except:
        result['in_score'] = ""
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'range': {
                                'bci_week_ave': {
                                    'gte': u_bci
                                }
                            }
                        }, {
                            'term': {
                                'topic_string': fe
                            }
                        }]
                    }
                }
            }
        }
    }
    result['in_top'] = es.search(index=BCI_INDEX_NAME,
                                 doc_type=BCI_INDEX_TYPE,
                                 body=query_body)['hits']['total']
    # print 'essearch'
    # print es.search(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE,body=query_body)

    try:
        u_bci = es.get(index='bci_history',
                       doc_type='bci',
                       id=uid,
                       fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
        #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
        # print "trymax"
        bci_max = get_max_value(es_user_profile, "bci_history", "bci")
        # print "max",bci_max
        result['all_score'] = math.log(u_bci / float(bci_max) * 9 + 1,
                                       10) * 100
    except:
        result['all_score'] = ""
        result['all_top'] = ""
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'range': {
                                'bci_week_ave': {
                                    'gte': u_bci
                                }
                            }
                        }]
                    }
                }
            }
        }
    }
    result['all_top'] = es.search(index='bci_history',
                                  doc_type='bci',
                                  body=query_body)['hits']['total']

    #result = es.search(index='user_portrait_1222', doc_type='user',body=query_body)
    # return json.dumps([result['hits']['total'],u_bci])
    return json.dumps(result)
예제 #32
0
def new_get_user_portrait(uid, admin_user):
    results = {}
    print 'jln ',es_user_portrait,portrait_index_name
    try:
        user_portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\
                id=uid)['_source']
    except:
        user_portrait_result = {}
    if not user_portrait_result:
        results['tag_remark'] = {}
        results['attention_information'] = {}
        results['tendency'] = {}
        results['group_tag'] = []
    else:
        print 'step1'
        #step1: get attention_information
        #sensitive words
        try:
            sensitive_words_dict = json.loads(user_portrait_result['sensitive_dict'])
        except:
            sensitive_words_dict = {}
        sort_sensitive_words = sorted(sensitive_words_dict.items(), key=lambda x:x[1], reverse=True)
        results['attention_information'] = {'sensitive_dict': sort_sensitive_words}
        #keywords
        try:
            #sort_keywords = json.loads(user_portrait_result['keywords'])
            keywords_list = json.loads(user_portrait_result['keywords'])
        except:
            #sort_keywords = []
            keywords_list = {}
        keywords_dict = dict()
        for item in keywords_list:
            keywords_dict[item[0]] = item[1]
        filter_word_dict = keyword_filter(keywords_dict)
        sort_keywords = sorted(filter_word_dict.items(), key=lambda x:x[1], reverse=True)
        results['attention_information']['keywords'] = sort_keywords
        #hashtag
        try:
            hashtag_dict = json.loads(user_portrait_result['hashtag_dict'])
        except:
            hashtag_dict = {}
        sort_hashtag = sorted(hashtag_dict.items(), key=lambda x:x[1], reverse=True)
        results['attention_information']['hashtag'] = sort_hashtag
        #step2: get tendency_information
        results['tendency'] = {'domain':user_portrait_result['domain']}
        results['tendency']['topic'] = user_portrait_result['topic_string'].split('&')[0]
        #add school information
        
        results['tendency']['is_school'] = user_portrait_result['is_school']
        results['tendency']['school'] = user_portrait_result['school_string']
        
        results['tendency']['character_sentiment'] = user_portrait_result['character_sentiment']
        results['tendency']['character_text'] = user_portrait_result['character_text']
        #step3: get tag_information
        #tag
        try:
            admin_tag = user_portrait_result[admin_user + '-tag']
        except:
            admin_tag = {}
        if not admin_tag:
            results['tag_remark'] = {'tag': []}
        else:
            tag_list = admin_tag.split('&')
            results['tag_remark'] = {'tag': tag_list}
        #remark
        try:
            remark = user_portrait_result['remark']
        except:
            remark = ''
        results['tag_remark']['remark'] = remark
        #step4: get group_tag information
        results['group_tag'] = []
        try:
            group_tag = user_portrait_result['group']
        except:
            group_tag = ''
        if group_tag:
            group_tag_list = group_tag.split('&')
            for group_tag in group_tag_list:
                group_tag_item_list = group_tag.split('-')
                if group_tag_item_list[0] == admin_user:
                    results['group_tag'].append(group_tag_item_list[1])

    return results
예제 #33
0
파일: description.py 프로젝트: SwoJa/ruman
def conclusion_on_influence(uid):
    # test
    index_name = copy_portrait_index_name
    index_type = copy_portrait_index_type
    total_number = es.count(index=copy_portrait_index_name,
                            doc_type=copy_portrait_index_type)["count"]

    try:
        influ_result = es.get(index=index_name, doc_type=index_type,
                              id=uid)['_source']
    except:
        influ_result = {}
        result = [0, 0, 0, 0, 0, 0, total_number
                  ]  # aver_activeness, sorted, aver_influence, sorted
        return result

    aver_activeness = influ_result.get("aver_activeness", 0)
    aver_influence = influ_result.get("aver_influence", 0)
    aver_importance = influ_result.get('aver_importance', 0)
    influence_query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "aver_influence": {
                "order": "desc"
            }
        },
        "size": 1
    }
    top_influence = es.search(
        index=copy_portrait_index_name,
        doc_type=copy_portrait_index_type,
        body=influence_query_body)['hits']['hits'][0]['sort'][0]

    importance_query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "aver_importance": {
                "order": "desc"
            }
        },
        "size": 1
    }
    top_importance = es.search(
        index=copy_portrait_index_name,
        doc_type=copy_portrait_index_type,
        body=importance_query_body)['hits']['hits'][0]['sort'][0]

    activeness_query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "aver_activeness": {
                "order": "desc"
            }
        },
        "size": 1
    }
    top_activeness = es.search(
        index=copy_portrait_index_name,
        doc_type=copy_portrait_index_type,
        body=activeness_query_body)['hits']['hits'][0]['sort'][0]

    influence_query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "range": {
                        "aver_influence": {
                            "gt": aver_influence
                        }
                    }
                }
            }
        }
    }

    activeness_query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "range": {
                        "aver_activeness": {
                            "gt": aver_activeness
                        }
                    }
                }
            }
        }
    }

    importance_query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "range": {
                        "aver_importance": {
                            "gt": aver_importance
                        }
                    }
                }
            }
        }
    }

    influence_count = es.count(index=copy_portrait_index_name,
                               doc_type=copy_portrait_index_type,
                               body=influence_query_body)['count']
    activeness_count = es.count(index=copy_portrait_index_name,
                                doc_type=copy_portrait_index_type,
                                body=activeness_query_body)['count']
    importance_count = es.count(index=copy_portrait_index_name,
                                doc_type=copy_portrait_index_type,
                                body=importance_query_body)['count']

    result = [
        int(aver_activeness * 100.0 / top_activeness), activeness_count,
        int(aver_influence * 100.0 / top_influence), influence_count,
        int(aver_importance * 100.0 / top_importance), importance_count,
        total_number
    ]
    return result