def ajax_revise_task():
    task_name = request.args.get('task_name', '')  # must
    finish = request.args.get("finish", "10")
    stop_time = request.args.get('stop_time', '')  # timestamp
    user = request.args.get('user', '')

    #now_ts = datetime2ts("2013-09-06")
    _id = user + '-' + task_name
    now_ts = time.time()
    if stop_time and stop_time < now_ts:
        return json.dumps([])

    if task_name and user:
        task_detail = es.get(index=index_manage_sensing_task,
                             doc_type=task_doc_type,
                             id=_id)['_source']
        if stop_time:
            task_detail['stop_time'] = stop_time
        if int(finish) == 0:
            task_detail['finish'] = finish
            task_detail['processing_status'] = "1"  # 重启时将处理状态改为
        if stop_time or int(finish) == 0:
            es.index(index=index_manage_sensing_task,
                     doc_type=task_doc_type,
                     id=_id,
                     body=task_detail)
            return json.dumps(['1'])
    return json.dumps([])
def get_text_detail(task_name, ts, text_type, user, order, size=100):
    results = []
    _id = user + '-' + task_name
    task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"]

    if int(text_type) == 0: # 热门原创微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 1)

    elif int(text_type) == 1: # 热门转发微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 2)

    elif int(text_type) == 2: # 普通转发微博
        results = get_retweet_weibo_detail(ts, user, task_name, size, "message_type", 3)

    elif int(text_type) == 3: # 普通评论微博
        results = get_retweet_weibo_detail(ts, user, task_name, size, "message_type", 2)

    elif int(text_type) == 4: # 积极微博
        results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", "1")

    elif int(text_type) == 5: # 中性微博
        results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", "0")

    elif int(text_type) == 6: # 消极微博
        results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", ["2", "3", "4", "5", "6"])
    elif int(text_type) == 7: # 敏感微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 3)

    else:
        print "error"

    return results
Beispiel #3
0
def get_group_user_track(uid):
    results = []
    #step1:get user_portrait activity_geo_dict
    try:
        portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\
                id=uid, _source=False, fields=['activity_geo_dict'])
    except:
        portrait_result = {}
    if portrait_result == {}:
        return 'uid is not in user_portrait'
    activity_geo_dict = json.loads(
        portrait_result['fields']['activity_geo_dict'][0])
    now_date_ts = datetime2ts(ts2datetime(int(time.time())))
    start_ts = now_date_ts - DAY * len(activity_geo_dict)
    #step2: iter date to get month track
    for geo_item in activity_geo_dict:
        iter_date = ts2datetime(start_ts)
        sort_day_dict = sorted(geo_item.items(),
                               key=lambda x: x[1],
                               reverse=True)
        if sort_day_dict:
            results.append([iter_date, sort_day_dict[0][0]])
        else:
            results.append([iter_date, ''])
        start_ts = start_ts + DAY

    return results
def ajax_get_group_detail():
    task_name = request.args.get('task_name','') # task_name
    user = request.args.get('user', '')
    _id = user + '-' + task_name
    portrait_detail = []
    top_activeness = get_top_influence("activeness")
    top_influence = get_top_influence("influence")
    top_importance = get_top_influence("importance")
    search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {})
    if search_result:
        try:
            uid_list = json.loads(search_result['uid_list'])
        except:
            uid_list = search_result['uid_list']
        if uid_list:
            search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, fields=SOCIAL_SENSOR_INFO)['docs']
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["fields"][iter_item][0].split('&'))
                            temp.append(item["fields"][iter_item][0].split('&'))
                        elif iter_item == "activeness":
                            temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100)
                        elif iter_item == "importance":
                            temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100)
                        elif iter_item == "influence":
                            temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100)
                        else:
                            temp.append(item["fields"][iter_item][0])
                    portrait_detail.append(temp)

    return json.dumps(portrait_detail)
def get_group_user_track(uid):
    results = []
    # step1:get user_portrait activity_geo_dict
    try:
        portrait_result = es_user_portrait.get(
            index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=False, fields=["activity_geo_dict"]
        )
    except:
        portrait_result = {}
    if portrait_result == {}:
        return "uid is not in user_portrait"
    activity_geo_dict = json.loads(portrait_result["fields"]["activity_geo_dict"][0])
    now_date_ts = datetime2ts(ts2datetime(int(time.time())))
    start_ts = now_date_ts - DAY * len(activity_geo_dict)
    # step2: iter date to get month track
    for geo_item in activity_geo_dict:
        iter_date = ts2datetime(start_ts)
        sort_day_dict = sorted(geo_item.items(), key=lambda x: x[1], reverse=True)
        if sort_day_dict:
            results.append([iter_date, sort_day_dict[0][0]])
        else:
            results.append([iter_date, ""])
        start_ts = start_ts + DAY

    return results
def ajax_stop_task():
    task_name = request.args.get('task_name','') # must
    user = request.args.get('user', '')
    if task_name and user:
        _id = user + "-" + task_name
        task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source']
        #task_detail["finish"] = finish_signal
        task_detail['processing_status'] = '0'
        es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail)
        return json.dumps(['1'])
    else:
        return json.dumps([])
def ajax_get_group_detail():
    task_name = request.args.get('task_name', '')  # task_name
    user = request.args.get('user', '')
    _id = user + '-' + task_name
    portrait_detail = []
    top_activeness = get_top_influence("activeness")
    top_influence = get_top_influence("influence")
    top_importance = get_top_influence("importance")
    search_result = es.get(index=index_group_manage,
                           doc_type=doc_type_group,
                           id=_id).get('_source', {})
    if search_result:
        try:
            uid_list = json.loads(search_result['uid_list'])
        except:
            uid_list = search_result['uid_list']
        if uid_list:
            search_results = es.mget(index=portrait_index_name,
                                     doc_type=portrait_index_type,
                                     body={"ids": uid_list},
                                     fields=SOCIAL_SENSOR_INFO)['docs']
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(
                                item["fields"][iter_item][0].split('&'))
                            temp.append(
                                item["fields"][iter_item][0].split('&'))
                        elif iter_item == "activeness":
                            temp.append(
                                math.log(
                                    item['fields']['activeness'][0] /
                                    float(top_activeness) * 9 + 1, 10) * 100)
                        elif iter_item == "importance":
                            temp.append(
                                math.log(
                                    item['fields']['importance'][0] /
                                    float(top_importance) * 9 + 1, 10) * 100)
                        elif iter_item == "influence":
                            temp.append(
                                math.log(
                                    item['fields']['influence'][0] /
                                    float(top_influence) * 9 + 1, 10) * 100)
                        else:
                            temp.append(item["fields"][iter_item][0])
                    portrait_detail.append(temp)

    return json.dumps(portrait_detail)
def ajax_stop_task():
    task_name = request.args.get('task_name', '')  # must
    user = request.args.get('user', '')
    if task_name and user:
        _id = user + "-" + task_name
        task_detail = es.get(index=index_manage_sensing_task,
                             doc_type=task_doc_type,
                             id=_id)['_source']
        #task_detail["finish"] = finish_signal
        task_detail['processing_status'] = '0'
        es.index(index=index_manage_sensing_task,
                 doc_type=task_doc_type,
                 id=_id,
                 body=task_detail)
        return json.dumps(['1'])
    else:
        return json.dumps([])
def ajax_get_task_detail_info():
    task_name = request.args.get('task_name','') # task_name
    user = request.args.get('user', 'admin')
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source']
    task_detail["social_sensors"] = json.loads(task_detail["social_sensors"])
    #task_detail['keywords'] = json.loads(task_detail['keywords'])
    #task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"])
    history_status = json.loads(task_detail['history_status'])
    if history_status:
        temp_list = []
        """
        temp_list.append(history_status[-1])
        print history_status
        for item in history_status[:-1]:
            temp_list.append(item)
        """
        sorted_list = sorted(history_status, key=lambda x:x, reverse=True)
        task_detail['history_status'] = sorted_list
    else:
        task_detail['history_status'] = []
    task_detail['social_sensors_portrait'] = []
    portrait_detail = []

    if task_detail["social_sensors"]:
        search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs']
        if search_results:
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["_source"][iter_item].split('&'))
                        else:
                            temp.append(item["_source"][iter_item])
                    portrait_detail.append(temp)
        if portrait_detail:
            portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True)
    task_detail['social_sensors_portrait'] = portrait_detail

    return json.dumps(task_detail)
def ajax_get_task_detail_info():
    task_name = request.args.get('task_name', '')  # task_name
    user = request.args.get('user', 'admin')
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task,
                         doc_type=task_doc_type,
                         id=_id)['_source']
    history_status = json.loads(task_detail['history_status'])
    if history_status:
        temp_list = []
        """
        temp_list.append(history_status[-1])
        print history_status
        for item in history_status[:-1]:
            temp_list.append(item)
        """
        sorted_list = sorted(history_status, key=lambda x: x, reverse=True)
        task_detail['history_status'] = sorted_list
    else:
        task_detail['history_status'] = []
    task_detail['social_sensors_portrait'] = []
    portrait_detail = []
    """
    if task_detail["social_sensors"]:
        search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs']
        if search_results:
            for item in search_results:
                temp = []
                if item['found']:
                    for iter_item in SOCIAL_SENSOR_INFO:
                        if iter_item == "topic_string":
                            temp.append(item["_source"][iter_item].split('&'))
                        else:
                            temp.append(item["_source"][iter_item])
                    portrait_detail.append(temp)
        if portrait_detail:
            portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True)
    task_detail['social_sensors_portrait'] = portrait_detail
    """

    return json.dumps(task_detail)
Beispiel #11
0
def get_text_detail(task_name, ts, text_type, user, order, size=100):
    results = []
    _id = user + '-' + task_name
    task_detail = es.get(index=index_manage_sensing_task,
                         doc_type=task_doc_type,
                         id=_id)["_source"]

    if int(text_type) == 0:  # 热门原创微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 1)

    elif int(text_type) == 1:  # 热门转发微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 2)

    elif int(text_type) == 2:  # 普通转发微博
        results = get_retweet_weibo_detail(ts, user, task_name, size,
                                           "message_type", 3)

    elif int(text_type) == 3:  # 普通评论微博
        results = get_retweet_weibo_detail(ts, user, task_name, size,
                                           "message_type", 2)

    elif int(text_type) == 4:  # 积极微博
        results = get_retweet_weibo_detail(ts, user, task_name, size,
                                           "sentiment", "1")

    elif int(text_type) == 5:  # 中性微博
        results = get_retweet_weibo_detail(ts, user, task_name, size,
                                           "sentiment", "0")

    elif int(text_type) == 6:  # 消极微博
        results = get_retweet_weibo_detail(ts, user, task_name, size,
                                           "sentiment",
                                           ["2", "3", "4", "5", "6"])
    elif int(text_type) == 7:  # 敏感微博
        results = get_origin_weibo_detail(ts, user, task_name, size, order, 3)

    else:
        print "error"

    return results
def ajax_get_clustering_topic():
    task_name = request.args.get('task_name','') # task_name
    user = request.args.get('user', '')
    ts = int(request.args.get('ts', '')) # timestamp: 123456789
    topic_list = []
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    #burst_reason = task_detail['burst_reason']
    burst_reason = 1
    filter_list = []
    if burst_reason:
        topic_list = task_detail.get("clustering_topic", [])
        if topic_list:
            topic_list = json.loads(topic_list)
            for item in topic_list:
                tmp = []
                for word in item:
                    if len(word) > 1:
                        tmp.append(word)
                filter_list.append(tmp)

    return json.dumps(filter_list[:5])
def ajax_get_clustering_topic():
    task_name = request.args.get('task_name', '')  # task_name
    user = request.args.get('user', '')
    ts = int(request.args.get('ts', ''))  # timestamp: 123456789
    topic_list = []
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id,
                         id=ts)['_source']
    #burst_reason = task_detail['burst_reason']
    burst_reason = 1
    filter_list = []
    if burst_reason:
        topic_list = task_detail.get("clustering_topic", [])
        if topic_list:
            topic_list = json.loads(topic_list)
            for item in topic_list:
                tmp = []
                for word in item:
                    if len(word) > 1:
                        tmp.append(word)
                filter_list.append(tmp)

    return json.dumps(filter_list[:5])
def ajax_revise_task():
    task_name = request.args.get('task_name','') # must
    finish = request.args.get("finish", "10")
    stop_time = request.args.get('stop_time', '') # timestamp
    user = request.args.get('user', '')

    #now_ts = datetime2ts("2013-09-06")
    _id = user + '-' + task_name
    now_ts = time.time()
    if stop_time and stop_time < now_ts:
        return json.dumps([])

    if task_name and user:
        task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source']
        if stop_time:
            task_detail['stop_time'] = stop_time
        if int(finish) == 0:
            task_detail['finish'] = finish
            task_detail['processing_status'] = "1" # 重启时将处理状态改为
        if stop_time or int(finish) == 0:
            es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail)
            return json.dumps(['1'])
    return json.dumps([])
def get_sensitive_text_detail(task_name, ts, user, order):
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])

    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()

    results = []
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "terms":{"mid": mid_list}
                }
            }
        }
    }

    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-DAY)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict() # 文本信息
    portrait_dict = dict() # 背景信息
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source'] # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]}
                else:
                    portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""}

        if order == "total":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        for item in sorted_list:
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid,''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                temp.append(iter_text['message_type'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                count_n += 1
                results.append(temp)

        if results and order == "ts":
            results = sorted(results, key=lambda x:x[5], reverse=True)

        if results and order == "sensitive":
            results = sorted(results, key=lambda x:x[-1], reverse=True)

    return results
Beispiel #16
0
def get_sensitive_text_detail(task_name, ts, user, order):
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id,
                         id=ts)['_source']
    weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])

    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()

    results = []
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "terms": {
                        "mid": mid_list
                    }
                }
            }
        }
    }

    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts - DAY)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list,
                                        doc_type=flow_text_index_type,
                                        body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict()  # 文本信息
    portrait_dict = dict()  # 背景信息
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source']  # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(
                index=profile_index_name,
                doc_type=profile_index_type,
                body={"ids": uid_list},
                fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {
                        "nick_name": item["fields"]["nick_name"][0],
                        "photo_url": item["fields"]["photo_url"][0]
                    }
                else:
                    portrait_dict[item['_id']] = {
                        "nick_name": item['_id'],
                        "photo_url": ""
                    }

        if order == "total":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[1],
                                 reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[2],
                                 reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[3],
                                 reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        for item in sorted_list:
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid, ''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                temp.append(iter_text['message_type'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                count_n += 1
                results.append(temp)

        if results and order == "ts":
            results = sorted(results, key=lambda x: x[5], reverse=True)

        if results and order == "sensitive":
            results = sorted(results, key=lambda x: x[-1], reverse=True)

    return results
def imagine(uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type):
    default_setting_dict = query_fields_dict
    print query_fields_dict

    personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source']

    # tag
    tag_dict = dict()
    tag_dict_value = 0
    if "tag" in query_fields_dict:
        tag_dict_value = query_fields_dict["tag"]
        query_fields_dict.pop("tag")
        for key,value in personal_info.iteritems():
            if "tag-" in key:
                tag_dict[key] = value
    print tag_dict, tag_dict_value

    # size
    sort_size = query_fields_dict["size"]
    query_fields_dict.pop("size")

    keys_list = []
    for k, v in query_fields_dict.iteritems():
        if v:
            keys_list.append(k) #需要进行关联的键

    search_dict = {} # 检索的属性字典
    iter_list = []
    tag_attri_vaule = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if iter_key in personal_info:
            if not personal_info[iter_key] or not query_fields_dict[iter_key]:
                query_fields_dict.pop(iter_key)
                continue
            else:
                iter_list.append(iter_key)
                temp = personal_info[iter_key]
                search_dict[iter_key] = temp.split('&')

            """
            query_fields_dict.pop(iter_key)
            if tag_dict.get(iter_key,''):
                tag_attri_vaule.append(iter_key+"-"+tag_dict[iter_key])
            """

    if len(iter_list) == 0 and len(tag_dict) == 0:
        return []

    query_body = {
        'query':{
            'function_score':{
                'query':{
                    'bool':{
                        'must':[
                        ]
                    }
                }
            }
        }
    }

    number = es.count(index=index_name, doc_type=doctype, body=query_body)['count']
    query_body["size"] = sort_size+100

    for (k,v) in query_fields_dict.items():
        temp = {}
        temp_list = []
        if k in personal_info and v != 0:
            for iter_key in search_dict[k]:
                temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*', 'boost': v}}})

            query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})

    if tag_dict and tag_dict_value:
        temp_list = []
        for k,v in tag_dict.iteritems():
            temp = {"term":{k:v}}
            temp_list.append(temp)
        query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})

    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    field_list = ['uid','uname', 'activeness','importance', 'influence', 'sensitive']
    evaluate_index_list = ['activeness', 'importance', 'influence', 'sensitive']
    return_list = []
    count = 0

    if len(result) > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item['_id']:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(value / float(evaluate_max_dict[field] )* 9 + 1, 10) * 100
            else:
                normal_value = item['_source'][field]
                if not normal_value:
                    normal_value = item['_id']
            info.append(normal_value)
        info.append(item['_score']/float(top_score)*100)
        return_list.append(info)
        count += 1

        if count == sort_size:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(return_list)


    return results
def show_important_users(task_name):
    return_results = dict() # 返回字典
    top_influence = get_top_influence()
    task_detail = es.get(index=index_manage_social_task, doc_type=task_doc_type, id=task_name)["_source"]
    portrait_detail = []
    important_user_set = set() # 重要人物列表
    history_status = json.loads(task_detail['history_status'])
    start_time = int(task_detail['create_at'])
    stop_time = int(task_detail['stop_time'])
    time_series = []
    keywords_list = json.loads(task_detail['keywords'])
    return_results['keywords'] = keywords_list
    return_results['remark'] = task_detail['remark']
    social_sensors = json.loads(task_detail['social_sensors'])
    for item in history_status:
        time_series.append(item[0])

    # return social sensors details
    if social_sensors:
        search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":social_sensors},fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness'])["docs"]
        for item in search_results:
            temp = []
            if item['found']:
                for iter_item in SOCIAL_SENSOR_INFO:
                    if iter_item == "topic_string":
                        temp.append(item["fields"][iter_item][0].split('&'))
                    else:
                        temp.append(item["fields"][iter_item][0])
                portrait_detail.append(temp)
    return_results['social_sensors_detail'] = portrait_detail

    if time_series:
        flow_detail = es.mget(index=index_sensing_task, doc_type=task_name, body={"ids": time_series})['docs']
    else:
        flow_detail = {}
    if flow_detail:
        for item in flow_detail:
            item = item['_source']
            temp_user_list = json.loads(item['important_users'])
            important_user_set = important_user_set | set(temp_user_list)

    top_importance = get_top_influence('importance')
    top_activeness = get_top_influence('activeness')
    top_influence = get_top_influence('influence')
    important_uid_list = list(important_user_set)
    user_detail_info = [] #
    if important_uid_list:
        user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":important_uid_list},fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence','activeness'])['docs']
        for item in user_results:
            if item['found']:
                temp = []
                if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD:
                    continue
                temp.append(item['fields']['uid'][0])
                temp.append(item['fields']['uname'][0])
                temp.append(item['fields']['photo_url'][0])
                temp.append(item['fields']['domain'][0])
                temp.append(item['fields']['topic_string'][0].split('&'))
                hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time, keywords_list)
                temp.append(hot_count)
                importance = math.log(item['fields']['importance'][0]/top_importance*9+1, 10)*100
                temp.append("%.2f" %importance)
                temp.append(math.log(item['fields']['influence'][0]/top_influence*9+1, 10)*100)
                temp.append(math.log(item['fields']['activeness'][0]/top_activeness*9+1, 10)*100)
                user_detail_info.append(temp)


    return_results['group_list'] = user_detail_info
    return return_results
def get_retweet_weibo_detail(ts, user, task_name, size, text_type, type_value):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    origin_weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    retweeted_weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])

    mid_list = []
    mid_list.extend(origin_weibo_detail.keys())
    mid_list.extend(retweeted_weibo_detail.keys())

    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"range":{
                                "timestamp":{
                                    "gte": ts - time_interval,
                                    "lt": ts
                                }
                            }},
                            {"terms": {"root_mid": mid_list}}
                        ]
                    }
                }
            }
        },
        "sort": {"timestamp": {"order": "desc"}},
        "size": 100
    }

    if text_type == "message_type":
        query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}})
    if text_type == "sentiment":
        #if isinstance(type_value, str):
        if len(type_value) == 1:
            query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}})
        else:
            query_body['query']['filtered']['filter']['bool']['must'].append({"terms":{text_type: type_value}})

    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-time_interval)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)

    # 1. 查询微博
    if datetime == datetime_1 and exist_es:
        search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    elif datetime != datetime_1 and exist_es_1:
        search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []
    #print search_results
    # 2. 获取微博相关信息
    results = []
    uid_list = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]

        for i in range(len(uid_list)):
            item = search_results[i]['_source']
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            temp.append(item['uid'])
            if portrait_result[i]['found']:
                temp.append(portrait_result[i]["fields"]["nick_name"][0])
                temp.append(portrait_result[i]["fields"]["photo_url"][0])
            else:
                temp.append(item['uid'])
                temp.append("")
            temp.append(item["text"])
            #print item['text']
            temp.append(item["sentiment"])
            temp.append(ts2date(item['timestamp']))
            temp.append(item['geo'])
            temp.append(item["message_type"])
            results.append(temp)

    return results
def get_origin_weibo_detail(ts, user, task_name, size, order, message_type=1):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']

    mid_value = json.loads(task_detail['mid_topic_value'])
    duplicate_dict = json.loads(task_detail['duplicate_dict'])
    tmp_duplicate_dict = dict()
    for k,v in duplicate_dict.iteritems():
        try:
            tmp_duplicate_dict[v].append(k)
        except:
            tmp_duplicate_dict[v] = [k, v]

        

    if message_type == 1:
        weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    elif message_type == 2:
        weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])
    else:
        weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])
    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()

    results = []
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "terms":{"mid": mid_list}
                }
            }
        },
        "size": 1000,
        "sort": {"timestamp": {"order": "desc"}}
    }


    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-DAY)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict() # 文本信息
    portrait_dict = dict() # 背景信息
    sort_results = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source'] # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]}
                else:
                    portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""}

        if order == "total":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        results_dict = dict()
        mid_index_dict = dict()
        for item in sorted_list: # size
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid,''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                if message_type == 1:
                    temp.append(1)
                elif message_type == 2:
                    temp.append(3)
                else:
                    temp.append(iter_text['message_type'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                temp.append(iter_text['timestamp'])
                temp.append(mid_value[mid])
                temp.append(mid)
                results.append(temp)
            count_n += 1

        results = sorted(results, key=operator.itemgetter(-4, -2, -6), reverse=True) # -4 -2 -3
        sort_results = []
        count = 0
        for item in results:
            sort_results.append([item])
            mid_index_dict[item[-1]] = count
            count += 1

        
        if tmp_duplicate_dict:
            remove_list = []
            value_list = tmp_duplicate_dict.values() # [[mid, mid], ]
            for item in value_list:
                tmp = []
                for mid in item:
                    if mid_index_dict.get(mid, 0):
                        tmp.append(mid_index_dict[mid])
                if len(tmp) > 1:
                    tmp_min = min(tmp)
                else:
                    continue
                tmp.remove(tmp_min)
                for iter_count in tmp:
                    sort_results[tmp_min].extend(sort_results[iter_count])
                    remove_list.append(sort_results[iter_count])
            if remove_list:
                for item in remove_list:
                    sort_results.remove(item)
        

    return sort_results
def show_important_users(task_name):
    return_results = dict()  # 返回字典
    top_influence = get_top_influence()
    task_detail = es.get(index=index_manage_social_task,
                         doc_type=task_doc_type,
                         id=task_name)["_source"]
    portrait_detail = []
    important_user_set = set()  # 重要人物列表
    history_status = json.loads(task_detail['history_status'])
    start_time = int(task_detail['create_at'])
    stop_time = int(task_detail['stop_time'])
    time_series = []
    keywords_list = json.loads(task_detail['keywords'])
    return_results['keywords'] = keywords_list
    return_results['remark'] = task_detail['remark']
    social_sensors = json.loads(task_detail['social_sensors'])
    for item in history_status:
        time_series.append(item[0])

    # return social sensors details
    if social_sensors:
        search_results = es.mget(index=portrait_index_name,
                                 doc_type=portrait_index_type,
                                 body={"ids": social_sensors},
                                 fields=[
                                     'uid', 'uname', 'domain', 'topic_string',
                                     "photo_url", 'importance', 'influence',
                                     'activeness'
                                 ])["docs"]
        for item in search_results:
            temp = []
            if item['found']:
                for iter_item in SOCIAL_SENSOR_INFO:
                    if iter_item == "topic_string":
                        temp.append(item["fields"][iter_item][0].split('&'))
                    else:
                        temp.append(item["fields"][iter_item][0])
                portrait_detail.append(temp)
    return_results['social_sensors_detail'] = portrait_detail

    if time_series:
        flow_detail = es.mget(index=index_sensing_task,
                              doc_type=task_name,
                              body={"ids": time_series})['docs']
    else:
        flow_detail = {}
    if flow_detail:
        for item in flow_detail:
            item = item['_source']
            temp_user_list = json.loads(item['important_users'])
            important_user_set = important_user_set | set(temp_user_list)

    top_importance = get_top_influence('importance')
    top_activeness = get_top_influence('activeness')
    top_influence = get_top_influence('influence')
    important_uid_list = list(important_user_set)
    user_detail_info = []  #
    if important_uid_list:
        user_results = es.mget(index=portrait_index_name,
                               doc_type=portrait_index_type,
                               body={"ids": important_uid_list},
                               fields=[
                                   'uid', 'uname', 'domain', 'topic_string',
                                   "photo_url", 'importance', 'influence',
                                   'activeness'
                               ])['docs']
        for item in user_results:
            if item['found']:
                temp = []
                if int(item['fields']['importance']
                       [0]) < IMPORTANT_USER_THRESHOULD:
                    continue
                temp.append(item['fields']['uid'][0])
                temp.append(item['fields']['uname'][0])
                temp.append(item['fields']['photo_url'][0])
                temp.append(item['fields']['domain'][0])
                temp.append(item['fields']['topic_string'][0].split('&'))
                hot_count = count_hot_uid(item['fields']['uid'][0], start_time,
                                          stop_time, keywords_list)
                temp.append(hot_count)
                importance = math.log(
                    item['fields']['importance'][0] / top_importance * 9 + 1,
                    10) * 100
                temp.append("%.2f" % importance)
                temp.append(
                    math.log(
                        item['fields']['influence'][0] / top_influence * 9 + 1,
                        10) * 100)
                temp.append(
                    math.log(
                        item['fields']['activeness'][0] / top_activeness * 9 +
                        1, 10) * 100)
                user_detail_info.append(temp)

    return_results['group_list'] = user_detail_info
    return return_results
Beispiel #22
0
def get_task_detail_2(task_name, ts, user):
    results = dict()
    index_name = task_name
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"]
    task_name = task_detail['task_name']
    history_status = json.loads(task_detail['history_status'])
    start_time = task_detail['create_at']
    create_by = task_detail['create_by']
    stop_time = task_detail['stop_time']
    remark = task_detail.get('remark', '')
    portrait_detail = []
    count = 0 # 计数

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")

    time_series = [] # 时间
    #positive_sentiment_list = [] # 情绪列表
    #neutral_sentiment_list = []
    #negetive_sentiment_list = []
    all_weibo_list = []
    origin_weibo_list = [] # 微博列表
    retweeted_weibo_list = []
    #retweeted_weibo_count = [] # 别人转发他的数量
    #comment_weibo_count = []
    #total_number_count = []
    #burst_time_list = [] # 爆发时间列表
    important_user_set = set() # 重要人物列表
    out_portrait_users = set() # 未入库

    ts = int(ts)
    time_series = history_status
    #for item in history_status:
    #    if int(item[0]) <= ts:
    #        time_series.append(item[0]) # 到目前为止的所有的时间戳

    # get detail task information from es
    if time_series:
        flow_detail = es.mget(index=index_sensing_task, doc_type=_id, body={"ids": time_series})['docs']
    else:
        flow_detail = {}
    if flow_detail:
        for item in flow_detail:
            item = item['_source']
            timestamp = item['timestamp']
            #sentiment_distribution = json.loads(item["sentiment_distribution"])
            #positive_sentiment_list.append(int(sentiment_distribution['1']))
            #negetive_sentiment_list.append(int(sentiment_distribution['2'])+int(sentiment_distribution['3']) \
            #        +int(sentiment_distribution['4'])+int(sentiment_distribution['5'])+int(sentiment_distribution['6']))
            #neutral_sentiment_list.append(int(sentiment_distribution['0']))
            origin_weibo_list.append(item["origin_weibo_number"]) # real
            retweeted_weibo_list.append(item['retweeted_weibo_number']) # real
            all_weibo_list.append(item["origin_weibo_number"]+item['retweeted_weibo_number'])
            #retweeted_weibo_count.append(item['retweeted_weibo_count'])
            #comment_weibo_count.append(item['comment_weibo_count'])
            #total_number_count.append(item['weibo_total_number'])
            temp_important_user_list = json.loads(item['important_users'])
            unfiltered_users = json.loads(item['unfilter_users'])
            temp_out_portrait_users = set(unfiltered_users) - set(temp_important_user_list) # 未入库
            important_user_set = important_user_set | set(temp_important_user_list)
            out_portrait_users = out_portrait_users | set(temp_out_portrait_users)

            #burst_reason = item.get("burst_reason", "")
            #if burst_reason:
            #    burst_time_list.append([timestamp, count, burst_reason])
            count += 1

    ####################################################################################
    # 统计爆发原因,下相应的结论
    """
    weibo_variation_count = 0
    weibo_variation_time = []
    sentiment_variation_count = 0
    sentiment_variation_time = []
    sensitive_variation_count = 0 # sensitive
    sensitive_variation_time = [] # sensitive
    common_variation_count = 0
    common_variation_time = []
    if burst_time_list:
        for item in burst_time_list:
            tmp_common = 0
            x1 = 0
            x2 = 0
            x3 = 0
            if signal_count_varition in item[2]:
                weibo_variation_count += 1
                weibo_variation_time.append([ts2date_min(item[0]), total_number_count[item[1]]])
                x1 = total_number_count[item[1]]
                tmp_common += 1
            if signal_sentiment_varition in item[2]:
                tmp_common += 1
                sentiment_variation_count += 1
                x2 = negetive_sentiment_list[item[1]]
                sentiment_variation_time.append([ts2date_min(item[0]), negetive_sentiment_list[item[1]]])
            if signal_sensitive_variation in item[2]:
                tmp_common += 1
                sensitive_variation_count += 1
                x3 = sensitive_total_number_list[item[1]]
                sensitive_variation_time.append([ts2date_min(item[0]), all_weibo_list[item[1]]])
            if tmp_common >= 2:
                common_variation_count += 1
                common_variation_time.append([ts2date_min(item[0]), x1, x2, x3])

    warning_conclusion = remark
    variation_distribution = []
    if weibo_variation_count:
        variation_distribution.append(weibo_variation_time)
    else:
        variation_distribution.append([])

    if sentiment_variation_count:
        variation_distribution.append(sentiment_variation_time)
    else:
        variation_distribution.append([])

    if sensitive_variation_count:
        variation_distribution.append(sensitive_variation_time)
    else:
        variation_distribution.append([])

    if common_variation_count:
        variation_distribution.append(common_variation_time)
    else:
        variation_distribution.append([])

    results['warning_conclusion'] = warning_conclusion
    results['variation_distribution'] = variation_distribution

    # 每个用户的热度
    """

    # 获取重要用户的个人信息
    important_uid_list = list(important_user_set)
    out_portrait_users_list = list(out_portrait_users)
    user_detail_info = [] #
    out_user_detail_info = []
    if important_uid_list:
        user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":important_uid_list}, fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness'])['docs']
        for item in user_results:
            if item['found']:
                temp = []
                #if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD:
                #    continue
                temp.append(item['fields']['uid'][0])
                uname = item['fields']['uname'][0]
                if not uname or uname == "未知":
                    uname = item['fields']['uid'][0]
                temp.append(uname)
                temp.append(item['fields']['photo_url'][0])
                temp.append(item['fields']['domain'][0])
                temp.append(item['fields']['topic_string'][0].split('&'))
                #hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time)
                #temp.append(hot_count)
                temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100)
                temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100)
                temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100)
                user_detail_info.append(temp)
    # 排序
    if user_detail_info:
        user_detail_info = sorted(user_detail_info, key=lambda x:x[6], reverse=True)
    else:
        user_detail_info = []

    if out_portrait_users_list:
        profile_results = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":out_portrait_users_list})["docs"]
        bci_index = "bci_" + ts2datetime(ts-DAY).replace('-','')
        influence_results = es.mget(index=bci_index, doc_type="bci", body={"ids":out_portrait_users_list}, fields=["user_index"])['docs']
        bci_results = es_profile.mget(index="bci_history", doc_type="bci", body={"ids":out_portrait_users_list}, fields=['user_fansnum'])['docs']
        top_influence = get_top_all_influence("user_index", ts)
        count = 0
        if profile_results:
            for item in profile_results:
                temp = []
                if item['found']:
                    uid = item['_source']['uid']
                    temp = get_user_profile([uid], ['nick_name', 'user_location', 'statusnum', 'fansnum'])[0]
                else:
                    temp = [item['_id'], item['_id'], '', '', '']

                """
                if item['found']:
                    temp.append(item['_source']['uid'])
                    if item['_source']['nick_name']:
                        temp.append(item['_source']['nick_name'])
                    else:
                        temp.append(item['_source']['uid'])
                    temp.append(item['_source']['user_location'])
                    temp.append(item['_source']['statusnum'])
                    temp.append(item['_source']['friendsnum'])
                else:
                    temp.append(item['_id'])
                    temp.append(item['_id'])
                    temp.extend([''])
                    temp.append('--')
                    temp.append('--')
                try:
                    user_fansnum = bci_results[count]["fields"]["user_fansnum"][0]
                except:
                    user_fansnum = 0
                temp.append(user_fansnum)
                temp_influ = influence_results[count]
                if temp_influ.get('found', 0):
                    user_index = temp_influ['fields']['user_index'][0]
                    temp.append(math.log(user_index/float(top_influence)*9+1, 10)*100)
                else:
                    temp.append(0)
                """
                count += 1
                out_user_detail_info.append(temp)

    revise_time_series = []
    for item in time_series:
        revise_time_series.append(ts2date_min(item))

    results['important_user_detail'] = user_detail_info
    results['out_portrait_user_detail'] = out_user_detail_info
    #results['burst_time'] = burst_time_list # 爆发时间点,以及爆发原因
    results['time_series'] = revise_time_series
    #results['positive_sentiment_list'] = positive_sentiment_list
    #esults['negetive_sentiment_list'] = negetive_sentiment_list
    #results['neutral_sentiment_list'] = neutral_sentiment_list
    results['all_weibo_list'] = all_weibo_list
    results['origin_weibo_list'] = origin_weibo_list
    results['retweeted_weibo_list'] = retweeted_weibo_list
    #results['comment_weibo_count'] = comment_weibo_count
    #results['retweeted_weibo_count'] = retweeted_weibo_count
    #results['total_number_list'] = total_number_count

    return results
Beispiel #23
0
def imagine(uid,
            query_fields_dict,
            index_name=portrait_index_name,
            doctype=portrait_index_type):
    default_setting_dict = query_fields_dict
    print query_fields_dict

    personal_info = es.get(index=portrait_index_name,
                           doc_type=portrait_index_type,
                           id=uid,
                           _source=True)['_source']

    # tag
    tag_dict = dict()
    tag_dict_value = 0
    if "tag" in query_fields_dict:
        tag_dict_value = query_fields_dict["tag"]
        query_fields_dict.pop("tag")
        for key, value in personal_info.iteritems():
            if "tag-" in key:
                tag_dict[key] = value
    print tag_dict, tag_dict_value

    # size
    sort_size = query_fields_dict["size"]
    query_fields_dict.pop("size")

    keys_list = []
    for k, v in query_fields_dict.iteritems():
        if v:
            keys_list.append(k)  #需要进行关联的键

    search_dict = {}  # 检索的属性字典
    iter_list = []
    tag_attri_vaule = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if iter_key in personal_info:
            if not personal_info[iter_key] or not query_fields_dict[iter_key]:
                query_fields_dict.pop(iter_key)
                continue
            else:
                iter_list.append(iter_key)
                temp = personal_info[iter_key]
                search_dict[iter_key] = temp.split('&')
            """
            query_fields_dict.pop(iter_key)
            if tag_dict.get(iter_key,''):
                tag_attri_vaule.append(iter_key+"-"+tag_dict[iter_key])
            """

    if len(iter_list) == 0 and len(tag_dict) == 0:
        return []

    query_body = {
        'query': {
            'function_score': {
                'query': {
                    'bool': {
                        'must': []
                    }
                }
            }
        }
    }

    number = es.count(index=index_name, doc_type=doctype,
                      body=query_body)['count']
    query_body["size"] = sort_size + 100

    for (k, v) in query_fields_dict.items():
        temp = {}
        temp_list = []
        if k in personal_info and v != 0:
            for iter_key in search_dict[k]:
                temp_list.append({
                    'wildcard': {
                        k: {
                            'wildcard': '*' + iter_key + '*',
                            'boost': v
                        }
                    }
                })

            query_body['query']['function_score']['query']['bool'][
                'must'].append({'bool': {
                    'should': temp_list
                }})

    if tag_dict and tag_dict_value:
        temp_list = []
        for k, v in tag_dict.iteritems():
            temp = {"term": {k: v}}
            temp_list.append(temp)
        query_body['query']['function_score']['query']['bool']['must'].append(
            {'bool': {
                'should': temp_list
            }})

    result = es.search(index=index_name, doc_type=doctype,
                       body=query_body)['hits']['hits']
    field_list = [
        'uid', 'uname', 'activeness', 'importance', 'influence', 'sensitive'
    ]
    evaluate_index_list = [
        'activeness', 'importance', 'influence', 'sensitive'
    ]
    return_list = []
    count = 0

    if len(result) > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item['_id']:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(
                    value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
            else:
                normal_value = item['_source'][field]
                if not normal_value:
                    normal_value = item['_id']
            info.append(normal_value)
        info.append(item['_score'] / float(top_score) * 100)
        return_list.append(info)
        count += 1

        if count == sort_size:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(
                value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(return_list)

    return results