def get_vary_detail_info(vary_detail_dict, uid_list):
    results = {}
    #get uname
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                            body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    uname_dict = {}
    for portrait_item in user_portrait_result:
        uid = portrait_item['_id']
        if portrait_item['found']==True:
            uname = portrait_item['_source']['uname']
            uname_dict[uid] = uname
        else:
            uname_dict[uid] = uid

    #get new vary detail information
    for vary_pattern in vary_detail_dict:
        user_info_list = vary_detail_dict[vary_pattern]
        new_pattern_list = []
        for user_item in user_info_list:
            uid = user_item[0]
            uname= uname_dict[uid]
            start_date = ts2datetime(int(user_item[1]))
            end_date = ts2datetime(int(user_item[2]))
            new_pattern_list.append([uid, uname, start_date, end_date])
        results[vary_pattern] = new_pattern_list

    return results
def recommentation_in_auto(seatch_date, submit_user):
    results = []
    #run type
    if RUN_TYPE == 1:
        now_date = ts2datetime(time.time() - DAY)
    else:
        now_date = ts2datetime(datetime2ts(RUN_TEST_TIME) - DAY)
    recomment_hash_name = 'recomment_' + now_date + '_auto'
    recomment_influence_hash_name = 'recomment_' + now_date + '_influence'
    recomment_sensitive_hash_name = 'recomment_' + now_date + '_sensitive'
    recomment_compute_hash_name = 'compute'
    #step1: get auto
    auto_result = r.hget(recomment_hash_name, 'auto')
    if auto_result:
        auto_user_list = json.loads(auto_result)
    else:
        auto_user_list = []
    #step2: get admin user result
    admin_result = r.hget(recomment_hash_name, submit_user)
    if admin_result:
        admin_user_list = json.loads(admin_result)
    else:
        admin_user_list = []
    #step3: get union user and filter compute/influence/sensitive
    union_user_auto_set = set(auto_user_list) | set(admin_user_list)
    influence_user = set(r.hkeys(recomment_influence_hash_name))
    sensitive_user = set(r.hkeys(recomment_sensitive_hash_name))
    compute_user = set(r.hkeys(recomment_compute_hash_name))
    filter_union_user = union_user_auto_set - (influence_user | sensitive_user | compute_user)
    auto_user_list = list(filter_union_user)
    #step4: get user detail
    results = get_user_detail(now_date, auto_user_list, 'show_in', 'auto')
    return results
Exemple #3
0
def get_geo_track(uid):
    date_results = [] # {'2013-09-01':[(geo1, count1),(geo2, count2)], '2013-09-02'...}
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    #test
    now_date = '2013-09-08'
    ts = datetime2ts(now_date)
    city_list = []
    city_set = set()
    for i in range(7, 0, -1):
        timestamp = ts - i*24*3600
        #print 'timestamp:', ts2datetime(timestamp)
        ip_dict = dict()
        results = r_cluster.hget('ip_'+str(timestamp), uid)
        ip_dict = dict()
        date = ts2datetime(timestamp)
        date_key = '-'.join(date.split('-')[1:])
        if results:
            ip_dict = json.loads(results)
            geo_dict = ip_dict2geo(ip_dict)
            city_list.extend(geo_dict.keys())
            sort_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True)
            date_results.append([date_key, sort_geo_dict[:2]])
        else:
            date_results.append([date_key, []])

    print 'results:', date_results
    city_set = set(city_list)
    geo_conclusion = get_geo_conclusion(uid, city_set)
    return [date_results, geo_conclusion]
def get_user_detail(date, input_result, status, user_type="influence", auth=""):
    bci_date = ts2datetime(datetime2ts(date) - DAY)
    results = []
    if status=='show_in':
        uid_list = input_result
    if status=='show_compute':
        uid_list = input_result.keys()
    if status=='show_in_history':
        uid_list = input_result.keys()
    if date!='all':
        index_name = 'bci_' + ''.join(bci_date.split('-'))
    else:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        index_name = 'bci_' + ''.join(now_date.split('-'))
    tmp_ts = str(datetime2ts(date) - DAY)
    sensitive_string = "sensitive_score_" + tmp_ts
    query_sensitive_body = {
        "query":{
            "match_all":{}
        },
        "size":1,
        "sort":{sensitive_string:{"order":"desc"}}
    }
    try:
        top_sensitive_result = es_bci_history.search(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits']
        top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0]
    except Exception, reason:
        print Exception, reason
        top_sensitive = 400
def count_hot_uid(uid, start_time, stop_time, keywords_list):
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"range":{
                                "timestamp":{
                                    "gte":start_time,
                                    "lt": stop_time
                                }
                            }},
                            {"term": {"root_uid": uid}}
                        ]
                    }
                }
#                "query":{
#                    "bool":{
#                        "should":[
#                        ]
#                    }
#                }
            }
        }
    }

    if keywords_list:
        query_body['query']['filtered']['filter']['bool']['must'].append({"terms": {"keywords_string": keywords_list}})
        #for word in keywords_list:
            #query_body['query']['filtered']['query']['bool']['should'].append({'wildcard':{"text": "*"+word+"*"}})

    count = 0
    datetime = ts2datetime(float(stop_time))
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        count = es_text.count(index=index_name, doc_type=flow_text_index_type, body=query_body)["count"]
    else:
        count = 0

    datetime_1 = ts2datetime(float(start_time))
    if datetime_1 == datetime:
        pass
    else:
        ts = float(stop_time)
        while 1:
            ts = ts-day_time
            datetime = ts2datetime(ts)
            index_name = flow_text_index_name_pre + datetime
            exist_es = es_text.indices.exists(index_name)
            if exist_es:
                count = es_text.count(index=index_name, doc_type=flow_text_index_type, body=query_body)["count"]
            else:
                count += 0
            if datetime_1 == datetime:
                break

    return count
def get_recommentation(submit_user):
    if RUN_TYPE:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)

    in_portrait_set = set(r.hkeys("compute"))
    result = []
    for i in range(7):
        iter_ts = now_ts - i*DAY
        iter_date = ts2datetime(iter_ts)
        submit_user_recomment = "recomment_" + submit_user + "_" + str(iter_date)
        bci_date = ts2datetime(iter_ts - DAY)
        submit_user_recomment = r.hkeys(submit_user_recomment)
        bci_index_name = "bci_" + bci_date.replace('-', '')
        exist_bool = es_cluster.indices.exists(index=bci_index_name)
        if not exist_bool:
            continue
        if submit_user_recomment:
            user_bci_result = es_cluster.mget(index=bci_index_name, doc_type="bci", body={'ids':submit_user_recomment}, _source=True)['docs']
            user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':submit_user_recomment}, _source=True)['docs']
            max_evaluate_influ = get_evaluate_max(bci_index_name)
            for i in range(len(submit_user_recomment)):
                uid = submit_user_recomment[i]
                bci_dict = user_bci_result[i]
                profile_dict = user_profile_result[i]
                try:
                    bci_source = bci_dict['_source']
                except:
                    bci_source = None
                if bci_source:
                    influence = bci_source['user_index']
                    influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10)
                    influence = influence * 100
                else:
                    influence = ''
                try:
                    profile_source = profile_dict['_source']
                except:
                    profile_source = None
                if profile_source:
                    uname = profile_source['nick_name']
                    location = profile_source['user_location']
                    fansnum = profile_source['fansnum']
                    statusnum = profile_source['statusnum']
                else:
                    uname = ''
                    location = ''
                    fansnum = ''
                    statusnum = ''
                if uid in in_portrait_set:
                    in_portrait = "1"
                else:
                    in_portrait = "0"
                recomment_day = iter_date
                result.append([iter_date, uid, uname, location, fansnum, statusnum, influence, in_portrait])

    return result    
def count_hot_uid(uid, start_time, stop_time):
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"range":{
                                "timestamp":{
                                    "gte":start_time,
                                    "lt": stop_time
                                }
                            }},
                            {"term": {"root_uid": uid}}
                        ]
                    }
                }
#                "query":{
#                    "bool":{
#                        "should":[
#                        ]
#                    }
#                }
            }
        }
    }


    count = 0
    datetime = ts2datetime(float(stop_time))
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        count = es_text.count(index=index_name, doc_type=flow_text_index_type, body=query_body)["count"]
    else:
        count = 0

    datetime_1 = ts2datetime(float(start_time))
    if datetime_1 == datetime:
        pass
    else:
        ts = float(stop_time)
        while 1:
            ts = ts-day_time
            datetime = ts2datetime(ts)
            index_name = flow_text_index_name_pre + datetime
            exist_es = es_text.indices.exists(index_name)
            if exist_es:
                count = es_text.count(index=index_name, doc_type=flow_text_index_type, body=query_body)["count"]
            else:
                count += 0
            if datetime_1 == datetime:
                break

    return count
Exemple #8
0
def get_psycho_status(uid_list):
    results = {}
    uid_sentiment_dict = {}
    #time for es_flow_text
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    #run_type
    if RUN_TYPE == 0:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    start_date_ts = now_date_ts - DAY * WEEK
    for i in range(0, WEEK):
        iter_date_ts = start_date_ts + DAY * i
        flow_text_index_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + flow_text_index_date
        try:
            flow_text_exist = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'size': MAX_VALUE}, _source=False,  fields=['uid', 'sentiment'])['hits']['hits']
        except:
            flow_text_exist = []
        for flow_text_item in flow_text_exist:
            uid = flow_text_item['fields']['uid'][0]
            sentiment = flow_text_item['fields']['sentiment'][0]
            if uid in uid_sentiment_dict:
                try:
                    uid_sentiment_dict[uid][str(sentiment)] += 1
                except:
                    uid_sentiment_dict[uid][str(sentiment)] = 1
            else:
                uid_sentiment_dict[uid] = {str(sentiment): 1}
    #compute first and second psycho_status
    for uid in uid_list:
        results[uid] = {'first':{}, 'second':{}}
        try:
            user_sentiment_result = uid_sentiment_dict[uid]
        except:
            user_sentiment_result = {}
        all_count = sum(user_sentiment_result.values())
        #compute second level sentiment---negative type sentiment
        second_sentiment_count_list = [user_sentiment_result[item] for item in user_sentiment_result if item in SENTIMENT_SECOND]
        second_sentiment_all_count = sum(second_sentiment_count_list)
        for sentiment_item in SENTIMENT_SECOND:
            try:
                results[uid]['second'][sentiment_item] = float(user_sentiment_result[sentiment_item]) / all_count
            except:
                results[uid]['second'][sentiment_item] = 0
        #compute first level sentiment---middle, postive, negative
        user_sentiment_result['7'] = second_sentiment_all_count
        for sentiment_item in SENTIMENT_FIRST:
            try:
                sentiment_ratio = float(user_sentiment_result[sentiment_item]) / all_count
            except:
                sentiment_ratio = 0
            results[uid]['first'][sentiment_item] = sentiment_ratio

    return results
def get_social_inter_content(uid1, uid2, type_mark):
    weibo_list = []
    #get two type relation about uid1 and uid2
    #search weibo list
    now_ts = int(time.time())
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    #uid2uname
    uid2uname = {}
    try:
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                                body={'ids': [uid1, uid2]}, _source=False, fields=['uid', 'uname'])['docs']
    except:
        portrait_result = []
    
    for item in portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
        else:
            uid2uname[uid] = 'unknown'
    #iter date to search weibo list
    for i in range(7, 0, -1):
        iter_date_ts = now_date_ts - i*DAY
        iter_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + str(iter_date)
        query = []
        query.append({'bool':{'must':[{'term':{'uid':uid1}}, {'term':{'directed_uid': int(uid2)}}]}})
        if type_mark=='out':
            query.append({'bool':{'must':[{'term':{'uid':uid2}}, {'term':{'directed_uid': int(uid1)}}]}})
        try:
            flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query': {'bool':{'should': query}}, 'sort':[{'timestamp':{'order': 'asc'}}], 'size':MAX_VALUE})['hits']['hits']
        except:
            flow_text_result = []
        for flow_text in flow_text_result:
            source = flow_text['_source']
            weibo = {}
            weibo['timestamp'] = source['timestamp']
            weibo['ip'] = source['ip']
            weibo['geo'] = source['geo']
            weibo['text'] = '\t'.join(source['text'].split('&'))
            weibo['uid'] =  source['uid']
            weibo['uname'] = uid2uname[weibo['uid']]
            weibo['directed_uid'] = str(source['directed_uid'])
            weibo['directed_uname'] = uid2uname[str(source['directed_uid'])]
            weibo_list.append(weibo)

    return weibo_list
def query_mid_list(ts, keywords_list, time_segment, social_sensors=[]):
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [
                            {"range": {
                                "timestamp": {
                                    "gte": ts - time_segment,
                                    "lt": ts
                                 }
                            }},
                            {"terms": {"keywords_string": keywords_list}}
                        ]
                    }
                }
            }
        },
        "size": 10000
    }

    if social_sensors:
        query_body['query']['filtered']['filter']['bool']['must'].append({"terms": {"uid": social_sensors}})

    datetime = ts2datetime(ts)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body, fields=["root_mid"])["hits"]["hits"]
    else:
        search_results = []
    origin_mid_list = set() # all related weibo mid list
    if search_results:
        for item in search_results:
            #if item.get("fields", ""):
            #    origin_mid_list.append(item["fields"]["root_mid"][0])
            #else:
            origin_mid_list.add(item["_id"])

    datetime_1 = ts2datetime(ts-time_segment)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_bool = es_text.indices.exists(index_name_1)
    if datetime != datetime_1 and exist_bool:
        search_results_1 = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body, fields=['root_mid'])["hits"]["hits"]
        if search_results_1:
            for item in search_results_1:
                #if item.get("fields", ""):
                #    origin_mid_list.append(item["fields"]["root_mid"][0])
                #else:
                origin_mid_list.add(item["_id"])

    return list(origin_mid_list)
def query_hot_mid(ts, keywords_list, text_type,size=100):
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"range":{
                                "timestamp":{
                                    "gte":ts - time_interval,
                                    "lt": ts
                                }
                            }},
                            {"terms": {"keywords_string": keywords_list}},
                            {"term": {"message_type": "0"}}
                        ]
                    }
                }
            }
        },
        "aggs":{
            "all_interests":{
                "terms":{"field": "root_mid", "size": size}
            }
        }
    }

    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-time_interval)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_bool_1 = es_text.indices.exists(index_name_1)
    print datetime, datetime_1
    if datetime == datetime_1 and exist_es:
        search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["aggregations"]["all_interests"]["buckets"]
    elif datetime != datetime_1 and exist_bool_1:
        search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["aggregations"]["all_interests"]["buckets"]
    else:
        search_results = []

    hot_mid_list = []
    if search_results:
        for item in search_results:
            print item
            temp = []
            temp.append(item['key'])
            temp.append(item['doc_count'])
            hot_mid_list.append(temp)

    #print hot_mid_list

    return hot_mid_list
def new_get_user_weibo(uid, sort_type):
    results = []
    weibo_list = []
    now_date = ts2datetime(time.time())
    #run_type
    if RUN_TYPE == 0:
        now_date = RUN_TEST_TIME
        sort_type = 'timestamp'
    #step1:get user name
    try:
        user_profile_result = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type,\
                id=uid, _source=False, fields=['nick_name'])
    except:
        user_profile_result = {}
    if user_profile_result:
        uname = user_profile_result['fields']['nick_name'][0]
    else:
        uname = ''
    #step2:get user weibo
    for i in range(7, 0, -1):
        iter_date = ts2datetime(datetime2ts(now_date) - i * DAY)
        index_name = flow_text_index_name_pre + iter_date
        try:
            weibo_result = es_flow_text.search(index=index_name, doc_type=flow_text_index_type,\
                    body={'query':{'filtered':{'filter':{'term': {'uid': uid}}}}, 'sort':sort_type, 'size':100})['hits']['hits']
        except:
            weibo_result = []
        if weibo_result:
            weibo_list.extend(weibo_result)
    print 'weibo_list:', weibo_list[0]
    sort_weibo_list = sorted(weibo_list, key=lambda x:x['_source'][sort_type], reverse=True)[:100]
    for weibo_item in sort_weibo_list:
        source = weibo_item['_source']
        mid = source['mid']
        uid = source['uid']
        text = source['text']
        ip = source['geo']
        timestamp = source['timestamp']
        date = ts2date(timestamp)
        sentiment = source['sentiment']
        #run_type
        if RUN_TYPE == 1:
            retweet_count = source['retweet_count']
            comment_count = source['comment_count']
            sensitive_score = source['sensitive']
        else:
            retweet_count = 0
            comment_count = 0
            sensitive_score = 0
        city = ip2city(ip)
        results.append([mid, uid, text, ip, city,timestamp, date, retweet_count, comment_count, sensitive_score])

    return results
def new_get_user_location(uid):
    results = {}
    now_date = ts2datetime(time.time())
    now_date_ts = datetime2ts(now_date)
    #run type
    if RUN_TYPE == 0:
        now_date_ts = datetime2ts(RUN_TEST_TIME) - DAY
        now_date = ts2datetime(now_date_ts)
    #now ip
    try:
        ip_time_string = r_cluster.hget('new_ip_'+str(now_date_ts), uid)
    except Exception, e:
        raise e
def read_flow_text_sentiment(uid_list):
    """
        读取用户微博(返回结果有微博情绪标签):
        输入数据:uid_list(字符串型列表)
        输出数据:word_dict(用户分词结果字典),weibo_list(用户微博列表)
        word_dict示例:{uid1:{'w1':f1,'w2':f2...}...}
        weibo_list示例:[[uid1,text1,s1,ts1],[uid2,text2,s2,ts2],...](每一条记录对应四个值:uid、text、sentiment、timestamp)
    """
    word_dict = dict()  # 词频字典
    weibo_list = []  # 微博列表
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    now_date_ts = datetime2ts("2013-09-08")
    start_date_ts = now_date_ts - DAY * WEEK
    for i in range(0, WEEK):
        iter_date_ts = start_date_ts + DAY * i
        flow_text_index_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + flow_text_index_date
        print flow_text_index_name
        try:
            flow_text_exist = es_flow_text.search(
                index=flow_text_index_name,
                doc_type=flow_text_index_type,
                body={"query": {"filtered": {"filter": {"terms": {"uid": uid_list}}}}, "size": MAX_VALUE},
                _source=False,
                fields=["text", "uid", "sentiment", "keywords_dict", "timestamp"],
            )["hits"]["hits"]
        except:
            flow_text_exist = []

        for flow_text_item in flow_text_exist:
            uid = flow_text_item["fields"]["uid"][0].encode("utf-8")
            text = flow_text_item["fields"]["text"][0].encode("utf-8")
            sentiment = int(flow_text_item["fields"]["sentiment"][0])
            ts = flow_text_item["fields"]["timestamp"][0]
            keywords_dict = json.loads(flow_text_item["fields"]["keywords_dict"][0])
            keywords_dict = json.dumps(keywords_dict, encoding="UTF-8", ensure_ascii=False)
            keywords_dict = eval(keywords_dict)

            if word_dict.has_key(uid):
                item_dict = Counter(word_dict[uid])
                keywords_dict = Counter(keywords_dict)
                item_dict = dict(item_dict + keywords_dict)
                word_dict[uid] = item_dict
            else:
                word_dict[uid] = keywords_dict

            weibo_list.append([uid, text, sentiment, ts])

    return word_dict, weibo_list
def show_vary_detail(task_name, submit_user, vary_pattern):
    results = []
    task_id = submit_user + '-' + task_name
    #identify the task_id exist
    try:
        source = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return 'group task is not exist'
    #identify the task status=1
    status = source['status']
    if status != 1:
        return 'group task is not completed'
    #get vary detail geo
    try:
        vary_detail_geo = json.loads(source['vary_detail_geo'])
    except:
        vary_detail_geo = {}
    if vary_detail_geo == {}:
        return 'vary detail geo none'
    #get vary_detail
    vary_pattern_list = vary_pattern.split('-')
    vary_pattern_key = '&'.join(vary_pattern_list)
    uid_ts_list = vary_detail_geo[vary_pattern_dict]
    uid_list = [item[0] for item in uid_ts_list]
    #get user name
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    uname_dict = {}
    for portrait_item in user_portrait_result:
        uid = portrait_item['_id']
        if portrait_item['found']==True:
            uname = portrait_item['_source']['uname']
            uname_dict[uid] = uname
        else:
            uname_dict[uid] = uid
    #get vary detail
    new_detail = []
    for vary_item in uid_ts_list:
        uname = uname_dict[vary_item[0]]
        start_date = ts2datetime(vary_item[1])
        end_date = ts2datetime(vary_item[2])
        new_detail.append([vary_item[0], uname, start_date, end_date])
    
    return new_detail
def get_repost_weibo(mid, weibo_timestamp):
    repost_result = []
    index_date = ts2datetime(weibo_timestamp)
    index_name = flow_text_index_name_pre + index_date
    query_body = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'root_mid': mid}},
                        {'range':{'timestamp':{'gte': weibo_timestamp}}},
                        {'term':{'message_type': 2}}
                        ]
                    }
                }
            }
    try:
        flow_text_result = es_flow_text.search(index=index_name, doc_type=flow_text_index_type,\
                body=query_body)['hits']['hits']
    except:
        flow_text_result = []
    repost_uid_list = [item['_source']['uid'] for item in flow_text_result]
    repost_user_info_dict = get_user_profile_weibo(repost_uid_list)
    statuses = []
    for item in flow_text_result:
        item_source = item['_source']
        item_source['user'] = repost_user_info_dict[item['uid']]
        statuses.append(item_source)
    
    return statuses
def search_sentiment_all_portrait(start_date, end_date, time_segment):
    sentiment_ts_count_dict = {}
    start_ts = datetime2ts(start_date)
    end_ts = datetime2ts(end_date)
    search_date_list = []
    domain_list = domain_en2ch_dict.keys()
    for i in range(start_ts, end_ts + DAY, DAY):
        iter_date = ts2datetime(i)
        search_date_list.append(iter_date)
    for sentiment in sentiment_type_list:
        sentiment_ts_count_dict[sentiment] = []
        for date_item in search_date_list:
            ts_count_result_list = []
            for domain in domain_list:
                iter_r_name = r_domain_sentiment_pre + date_item + '_' + sentiment + '_' + domain
                #get ts_count_dict in one day
                ts_count_result = R_DOMAIN_SENTIMENT.hgetall(iter_r_name)
                ts_count_result_list.append(ts_count_result)
            #union all domain to get all portrait
            all_ts_count_result = union_dict(ts_count_result_list)
            #get x and y list by timesegment
            new_ts_count_dict = get_new_ts_count_dict(all_ts_count_result, time_segment, date_item)
            sort_new_ts_count = sorted(new_ts_count_dict.items(), key=lambda x:x[0])
            sentiment_ts_count_dict[sentiment].extend(sort_new_ts_count)
    return sentiment_ts_count_dict
def get_db_num(timestamp):
    date = ts2datetime(timestamp)
    date_ts = datetime2ts(date)
    db_number = ((date_ts - r_beigin_ts) / (DAY * 7)) % 2 + 1
    if RUN_TYPE == 0:
        db_number = 1
    return db_number
Exemple #19
0
def get_user_geo(uid):
    result = []
    user_geo_result = {}
    user_ip_dict = {}
    user_ip_result = dict()
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    ts = datetime2ts(now_date)
    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1, 8):
        ts = ts - 3600*24
        results = r_cluster.hget('ip_'+str(ts), uid)
        if results:
            ip_dict = json.loads(results)
            for ip in ip_dict:
                try:
                    user_ip_result[ip] += ip_dict[ip]
                except:
                    user_ip_result[ip] = ip_dict[ip]
    #print 'user_ip_result:', user_ip_result
    user_geo_dict = ip2geo(user_ip_result)
    user_geo_result = sorted(user_geo_dict.items(), key=lambda x:x[1], reverse=True)

    return user_geo_result
def get_user_geo(uid):
    result = []
    user_geo_result = {}
    user_ip_dict = {}
    user_ip_result = dict()
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    #run_type
    if RUN_TYPE == 1:
        ts = datetime2ts(now_date)
    else:
        ts = datetime2ts(RUN_TEST_TIME)
    for i in range(1, 8):
        ts = ts - 3600*24
        results = r_cluster.hget('new_ip_'+str(ts), uid)
        if results:
            ip_dict = json.loads(results)
            for ip in ip_dict:
                ip_count = len(ip_dict[ip].split('&'))
                try:
                    user_ip_result[ip] += ip_count
                except:
                    user_ip_result[ip] = ip_count
    user_geo_dict = ip2geo(user_ip_result)
    user_geo_result = sorted(user_geo_dict.items(), key=lambda x:x[1], reverse=True)

    return user_geo_result
Exemple #21
0
def search_group_sentiment_weibo(task_name, start_ts, sentiment):
    weibo_list = []
    #step1:get task_name uid
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                        id=task_name, _source=False, fields=['uid_list'])
    except:
        group_result = {}
    if group_result == {}:
        return 'task name invalid'
    try:
        uid_list = group_result['fields']['uid_list']
    except:
        uid_list = []
    if uid_list == []:
        return 'task uid list null'
    #step3: get ui2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                        body={'ids':uid_list}, _source=False, fields=['uname'])['docs']
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found']==True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
    #step4:iter date to search weibo
    weibo_list = []
    iter_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + str(iter_date)
    #step4: get query_body
    if sentiment != '2':
        query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \
                {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}]
    else:
        query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\
                {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}]
    try:
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits']
    except:
        flow_text_result = []
    for flow_text_item in flow_text_result:
        source = flow_text_item['_source']
        weibo = {}
        weibo['uid'] = source['uid']
        weibo['uname'] = uid2uname[weibo['uid']]
        weibo['ip'] = source['ip']
        try:
            weibo['geo'] = '\t'.join(source['geo'].split('&'))
        except:
            weibo['geo'] = ''
        weibo['text'] = source['text']
        weibo['timestamp'] = source['timestamp']
        weibo['sentiment'] = source['sentiment']
        weibo_list.append(weibo)

    return weibo_list
def new_get_activeness_trend(uid, time_segment):
    results = {}
    try:
        activeness_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_ACTIVENESS, doc_type=COPY_USER_PORTRAIT_ACTIVENESS_TYPE,\
                id=uid)['_source']
    except:
        activeness_history = {}
    if activeness_history:
        results = get_evaluate_trend(activeness_history, 'activeness')
    else:
        results = {}
    #deal results for situation---server power off
    new_time_list = []
    new_count_list = []
    new_results = {}
    now_time_ts = time.time()
    now_date_ts  = datetime2ts(ts2datetime(now_time_ts))
    for i in range(time_segment, 0, -1):
        iter_date_ts = now_date_ts - i * DAY
        try:
            date_count = results[iter_date_ts]
        except:
            date_count = 0
        new_time_list.append(iter_date_ts)
        new_count_list.append(date_count)
    new_results = {'timeline': new_time_list, 'evaluate_index': new_count_list}
    return new_results
def get_user_weibo(uid):
    result = []
    #use to test
    datestr = '2013-09-02'
    end_ts = datetime2ts(datestr)
    #real way to get datestr and ts_segment
    '''
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    now_date_ts = datetime2ts(now_date)
    ts_segment = (int((now_ts - now_date_ts) / 3600)) % 24
    end_ts = now_date_ts + ts_segment * 3600
    '''
    file_list = set(os.listdir(DEFAULT_LEVELDBPATH))
    for i in range(24*7, 0, -1):
        ts = end_ts - i * 3600
        datestr = ts2datetime(ts)
        ts_segment = (int((ts - datetime2ts(datestr)) / 3600)) % 24 + 1
        leveldb_folder = datestr + str(ts_segment)
        
        if leveldb_folder in file_list:
            leveldb_bucket = dynamic_leveldb(leveldb_folder)
            try:
                user_weibo = leveldb_bucket.Get(uid)
                weibo_list = json.loads(user_weibo)
                result.extend(weibo_list)
            except:
                pass
            

    return result
Exemple #24
0
def search_mention(now_ts, uid):
    date = ts2datetime(now_ts)
    ts = datetime2ts(date)
    #print 'at date-ts:', ts
    stat_results = dict()
    results = dict()
    for i in range(1,8):
        ts = ts - 24 * 3600
        try:
            result_string = r_cluster.hget('at_' + str(ts), str(uid))
        except:
            result_string = ''
        if not result_string:
            continue
        result_dict = json.loads(result_string)
        for at_uid in result_dict:
            try:
                stat_results[at_uid] += result_dict[at_uid]
            except:
                stat_results[at_uid] = result_dict[at_uid]
    
    for at_uid in stat_results:
        # search uid
        '''
        uname = search_uid2uname(at_uid)
        if not uname:
        '''    
        uid = ''
        count = stat_results[at_uid]
        results[at_uid] = [uid, count]
    if results:
        sort_results = sorted(results.items(), key=lambda x:x[1][1], reverse=True)
        return [sort_results[:20], len(results)]
    else:
        return [None, 0]
Exemple #25
0
def search_location(now_ts, uid):
    date = ts2datetime(now_ts)
    #print 'date:', date
    ts = datetime2ts(date)
    #print 'date-ts:', ts
    stat_results = dict()
    results = dict()
    for i in range(1, 8):
        ts = ts - 24 * 3600
        #print 'for-ts:', ts
        result_string = r_cluster.hget('ip_' + str(ts), str(uid))
        if not result_string:
            continue
        result_dict = json.loads(result_string)
        for ip in result_dict:
            try:
                stat_results[ip] += result_dict[ip]
            except:
                stat_results[ip] = result_dict[ip]
    for ip in stat_results:
        city = ip2city(ip)
        if city:
            try:
                results[city][ip] = stat_results[ip]
            except:
                results[city] = {ip: stat_results[ip]}
                

    description = active_geo_description(results)
    results['description'] = description
    #print 'location results:', results
    return results
def submit_attribute(attribute_name, attribute_value, submit_user, submit_date):
    print "-----------submit_user---------"
    print submit_user
    status = False
    id_attribute = '-'.join([submit_user,attribute_name])
    print 'id_attribute:', id_attribute
    #maybe there have to identify the user admitted to submit attribute
    try:
        attribute_exist = es_tag.get(index=attribute_index_name, doc_type=attribute_index_type, id=id_attribute)['_source']
    except:
        attribute_exist = {}
    #identify the tag name not same with the identify_attribute_list
    if attribute_exist == {} and id_attribute not in identify_attribute_list:
        input_data = dict()
        now_ts = time.time()
        date = ts2datetime(now_ts)
        input_data['attribute_name'] = attribute_name
        input_data['attribute_value'] = '&'.join(attribute_value.split(','))
        input_data['user'] = submit_user
        input_data['date'] = submit_date
        es_tag.index(index=attribute_index_name, doc_type=attribute_index_type, id=id_attribute, body=input_data)
        status = True
        #put mappings to es_user_portrait
        submit_user_tag = str(submit_user) + "-tag"
        exist_field = es.indices.get_field_mapping(index=user_index_name, doc_type=user_index_type, field=submit_user_tag)
        if not exist_field:
            es.indices.put_mapping(index=user_index_name, doc_type=user_index_type, \
                body={'properties':{submit_user_tag:{'type':'string', 'analyzer':'my_analyzer'}}}, ignore=400)
    return status
Exemple #27
0
def get_influence_content(uid, timestamp_from, timestamp_to):
    weibo_list = []
    #split timestamp range to new_range_dict_list
    from_date_ts = datetime2ts(ts2datetime(timestamp_from))
    to_date_ts = datetime2ts(ts2datetime(timestamp_to))
    new_range_dict_list = []
    if from_date_ts != to_date_ts:
        iter_date_ts = from_date_ts
        while iter_date_ts < to_date_ts:
            iter_next_date_ts = iter_date_ts + DAY
            new_range_dict_list.append({'range':{'timestamp':{'gte':iter_date_ts, 'lt':iter_next_date_ts}}})
            iter_date_ts = iter_next_date_ts
        if new_range_dict_list[0]['range']['timestamp']['gte'] < timestamp_from:
            new_range_dict_list[0]['range']['timestamp']['gte'] = timestamp_from
        if new_range_dict_list[-1]['range']['timestamp']['lt'] > timestamp_to:
            new_range_dict_list[-1]['range']['timestamp']['lt'] = timestamp_to
    else:
        new_range_dict_list = [{'range':{'timestamp':{'gte':timestamp_from, 'lt':timestamp_to}}}]
    #iter date to search flow_text
    iter_result = []
    for range_item in new_range_dict_list:
        range_from_ts = range_item['range']['timestamp']['gte']
        range_from_date = ts2datetime(range_from_ts)
        flow_text_index_name = flow_text_index_name_pre + range_from_date
        query = []
        query.append({'term':{'uid':uid}})
        query.append(range_item)
        try:
            flow_text_exist = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body={'query':{'bool':{'must': query}}, 'sort':[{'timestamp':'asc'}]})['hits']['hits']
        except:
            flow_text_exist = []
        iter_result.extend(flow_text_exist)
    # get weibo list
    for item in flow_text_exist:
        source = item['_source']
        weibo = {}
        weibo['timestamp'] = ts2date(source['timestamp'])
        weibo['ip'] = source['ip']
        weibo['text'] = source['text']
        if source['geo']:
            weibo['geo'] = '\t'.join(source['geo'].split('&'))
        else:
            weibo['geo'] = ''
        weibo_list.append(weibo)
        
    return weibo_list
def search_sentiment_all_keywords_task(submit_date, keywords_string, submit_user, start_date, end_date, status):
    results = []
    query_list = []
    if submit_date:
        submit_ts_start = datetime2ts(submit_date)
        submit_ts_end = submit_ts_start + DAY
        query_list.append({'range': {'submit_ts': {'gte': submit_ts_start, 'lt':submit_ts_end}}})
    if keywords_string:
        keywords_list = keywords_string.split(',')
        query_list.append({'terms':{'query_keywords': keywords_list}})
    if submit_user:
        query_list.append({'term': {'submit_user': submit_user}})
    if start_date:
        start_s_ts = datetime2ts(start_date)
        if end_date:
            start_e_ts = datetime2ts(end_date)
        else:
            start_e_ts = start_s_ts + DAY * 30
        start_date_nest_body_list = [ts2datetime(ts) for ts in range(start_s_ts, start_e_ts + DAY, DAY)]
        query_list.append({'terms':{'start_date': start_date_nest_body_list}})
    if end_date:
        end_e_ts = datetime2ts(end_date)
        if start_date:
            end_s_ts = datetime2ts(start_date)
        else:
            end_s_ts = end_e_ts - DAY * 30
        end_date_nest_body_list = [ts2datetime(ts) for ts in range(end_s_ts, end_e_ts + DAY, DAY)]
        query_list.append({'terms': {'end_date': end_date_mest_body_list}})
    if status:
        query_list.append({'term': {'status': status}})
    try:
        task_results = es_sentiment_task.search(index=sentiment_keywords_index_name, \
                doc_type=sentiment_keywords_index_type, body={'query':{'bool':{'must':query_list}}})['hits']['hits']
    except:
        task_results = []
    for task_item in task_results:
        task_source = task_item['_source']
        task_id = task_source['task_id']
        start_date = task_source['start_date']
        end_date = task_source['end_date']
        keywords = task_source['query_keywords']
        submit_ts = ts2date(task_source['submit_ts'])
        status = task_source['status']
        segment = task_source['segment']
        results.append([task_id, start_date, end_date, keywords, submit_ts, status, segment])

    return results
Exemple #29
0
def get_user_detail(date, input_result, status):
    results = []
    if status=='show_in':
        uid_list = input_result
    if status=='show_compute':
        uid_list = input_result.keys()
    if status=='show_in_history':
        uid_list = input_result.keys()
    if date!='all':
        index_name = 'bci_' + ''.join(date.split('-'))
    else:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        index_name = 'bci_' + ''.join(now_date.split('-'))
    index_type = 'bci'
    user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs']
    user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs']
    max_evaluate_influ = get_evaluate_max(index_name)
    for i in range(0, len(uid_list)):
        uid = uid_list[i]
        bci_dict = user_bci_result[i]
        profile_dict = user_profile_result[i]
        try:
            bci_source = bci_dict['_source']
        except:
            bci_source = None
        if bci_source:
            influence = bci_source['user_index']
            influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10)
            influence = influence * 100
        else:
            influence = ''
        try:
            profile_source = profile_dict['_source']
        except:
            profile_source = None
        if profile_source:
            uname = profile_source['nick_name'] 
            location = profile_source['user_location']
            fansnum = profile_source['fansnum']
            statusnum = profile_source['statusnum']
        else:
            uname = ''
            location = ''
            fansnum = ''
            statusnum = ''
        if status == 'show_in':
            results.append([uid, uname, location, fansnum, statusnum, influence])
        if status == 'show_compute':
            in_date = json.loads(input_result[uid])[0]
            compute_status = json.loads(input_result[uid])[1]
            if compute_status == '1':
                compute_status = '3'
            results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status])
        if status == 'show_in_history':
            in_status = input_result[uid]
            results.append([uid, uname, location, fansnum, statusnum, influence, in_status])

    return results
def get_db_num(timestamp):
    date = ts2datetime(timestamp)
    date_ts = datetime2ts(date)
    db_number = 2 - (((date_ts - begin_ts) / (DAY * 7))) % 2
    #run_type
    if RUN_TYPE == 0:
        db_number = 1
    return db_number
def search_retweet_network_keywords(task_id, uid):
    results = {}
    task_results = es_network_task.get(index=network_keywords_index_name, \
                doc_type=network_keywords_index_type, id=task_id)['_source']

    start_date = task_results['start_date']
    start_ts = datetime2ts(start_date)
    end_date = task_resuts['end_date']
    end_ts = datetime2ts(end_date)
    iter_date_ts = start_ts
    to_date_ts = end_ts
    iter_query_date_list = [] # ['2013-09-01', '2013-09-02']
    while iter_date_ts <= to_date_ts:
        iter_date = ts2datetime(iter_date_ts)
        iter_query_date_list.append(iter_date)
        iter_date_ts += DAY
    #step2: get iter search flow_text_index_name
    #step2.1: get search keywords list
    query_must_list = []
    keyword_nest_body_list = []
    keywords_string = task_results['query_keywords']
    keywords_list = keywords_string.split('&')
    for keywords_item in keywords_list:
        keyword_nest_body_list.append({'wildcard': {'text': '*' + keywords_item + '*'}})
    query_must_list.append({'bool': {'should': keyword_nest_body_list}})
    network_results = {}
    retweet_query = query_must_list
    be_retweet_query = query_must_list
    #retweet
    retweet_query.append({'term': {'uid': uid}})
    item_results = {}
    for iter_date in iter_query_date_list:
        flow_text_index_name = flow_text_index_name_pre + iter_date
        query_body = {
            'query':{
                'bool':{
                    'must':retweet_query
                }
            },
            'size': 100
        }
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body=query_body)['hits']['hits']
        for item in flow_text_result:
            source = item['_source']
            source_uid = source['directed_uid']
            try:
                item_results[source_uid] += 1
            except:
                item_results[source_uid] = 1
    results = retweet_dict2results(uid, item_results)
    network_results['retweet'] = results
    #be_retweet
    retweet_query.append({'term': {'directed_uid': uid}})
    item_results = {}
    for iter_date in iter_query_date_list:
        flow_text_index_name = flow_text_index_name_pre + iter_date
        query_body = {
            'query':{
                'bool':{
                    'must':retweet_query
                }
            },
            'size': 100
        }
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                    body=query_body)['hits']['hits']
        for item in flow_text_result:
            source = item['_source']
            source_uid = source['directed_uid']
            try:
                item_results[source_uid] += 1
            except:
                item_results[source_uid] = 1
    results = retweet_dict2results(uid, item_results)
    network_results['be_retweet'] = results
    return network_results 
Exemple #32
0
def get_final_submit_user_info(uid_list):
    final_results = []
    try:
        profile_results = es_user_profile.mget(index=profile_index_name,
                                               doc_type=profile_index_type,
                                               body={'ids': uid_list})['docs']
    except:
        profile_results = []
    try:
        bci_history_results = es_bci_history.mget(
            index=bci_history_index_name,
            doc_type=bci_history_index_type,
            body={'ids': uid_list})['docs']
    except:
        bci_history_results = []
    #get bci_history max value
    now_time_ts = time.time()
    search_date_ts = datetime2ts(ts2datetime(now_time_ts - DAY))
    bci_key = 'bci_' + str(search_date_ts)
    query_body = {
        'query': {
            'match_all': {}
        },
        'sort': [{
            bci_key: {
                'order': 'desc'
            }
        }],
        'size': 1
    }
    #try:
    bci_max_result = es_bci_history.search(index=bci_history_index_name,
                                           doc_type=bci_history_index_type,
                                           body=query_body,
                                           _source=False,
                                           fields=[bci_key])['hits']['hits']
    #except:
    #    bci_max_result = {}
    if bci_max_result:
        bci_max_value = bci_max_result[0]['fields'][bci_key][0]
    else:
        bci_max_value = MAX_VALUE
    iter_count = 0
    for uid in uid_list:
        try:
            profile_item = profile_results[iter_count]
        except:
            profile_item = {}
        try:
            bci_history_item = bci_history_results[iter_count]
        except:
            bci_history_item = {}
        if profile_item and profile_item['found'] == True:
            uname = profile_item['_source']['nick_name']
            location = profile_item['_source']['user_location']
        else:
            uname = ''
            location = ''
        if bci_history_item and bci_history_item['found'] == True:
            fansnum = bci_history_item['_source']['user_fansnum']
            statusnum = bci_history_item['_source']['weibo_month_sum']
            try:
                bci = bci_history_item['_source'][bci_key]
                normal_bci = math.log(bci / bci_max_value * 9 + 1, 10) * 100
            except:
                normal_bci = ''
        else:
            fansnum = ''
            statusnum = ''
            normal_bci = ''
        final_results.append(
            [uid, uname, location, fansnum, statusnum, normal_bci])
        iter_count += 1

    return final_results
Exemple #33
0
def get_task_detail_2(task_name, ts, user):
    results = dict()
    index_name = task_name
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task,
                         doc_type=task_doc_type,
                         id=_id)["_source"]
    task_name = task_detail['task_name']
    social_sensors = json.loads(task_detail['social_sensors'])
    history_status = json.loads(task_detail['history_status'])
    start_time = task_detail['create_at']
    create_by = task_detail['create_by']
    stop_time = task_detail['stop_time']
    remark = task_detail['remark']
    portrait_detail = []
    count = 0  # 计数

    if social_sensors:
        search_results = es.mget(index=portrait_index_name,
                                 doc_type=portrait_index_type,
                                 body={"ids": social_sensors},
                                 fields=SOCIAL_SENSOR_INFO)['docs']
        for item in search_results:
            temp = []
            if item['found']:
                for iter_item in SOCIAL_SENSOR_INFO:
                    if iter_item == "topic_string":
                        temp.append(item["fields"][iter_item][0].split('&'))
                    else:
                        temp.append(item["fields"][iter_item][0])
                portrait_detail.append(temp)
        portrait_detail = sorted(portrait_detail,
                                 key=lambda x: x[5],
                                 reverse=True)

    time_series = []  # 时间
    positive_sentiment_list = []  # 情绪列表
    neutral_sentiment_list = []
    negetive_sentiment_list = []
    all_weibo_list = []
    origin_weibo_list = []  # 微博列表
    retweeted_weibo_list = []
    retweeted_weibo_count = []  # 别人转发他的数量
    comment_weibo_count = []
    total_number_count = []
    burst_time_list = []  # 爆发时间列表
    important_user_set = set()  # 重要人物列表
    out_portrait_users = set()  # 未入库

    ts = int(ts)
    for item in history_status:
        if int(item[0]) <= ts:
            time_series.append(item[0])  # 到目前为止的所有的时间戳

    # get detail task information from es
    if time_series:
        #print time_series
        flow_detail = es.mget(index=index_sensing_task,
                              doc_type=_id,
                              body={"ids": time_series})['docs']
    else:
        flow_detail = {}
    if flow_detail:
        for item in flow_detail:
            item = item['_source']
            timestamp = item['timestamp']
            sentiment_distribution = json.loads(item["sentiment_distribution"])
            positive_sentiment_list.append(int(sentiment_distribution['1']))
            negetive_sentiment_list.append(int(sentiment_distribution['2'])+int(sentiment_distribution['3']) \
                    +int(sentiment_distribution['4'])+int(sentiment_distribution['5'])+int(sentiment_distribution['6']))
            neutral_sentiment_list.append(int(sentiment_distribution['0']))
            origin_weibo_list.append(item["origin_weibo_number"])  # real
            retweeted_weibo_list.append(item['retweeted_weibo_number'])  # real
            all_weibo_list.append(item["origin_weibo_number"] +
                                  item['retweeted_weibo_number'])
            retweeted_weibo_count.append(item['retweeted_weibo_count'])
            comment_weibo_count.append(item['comment_weibo_count'])
            total_number_count.append(item['weibo_total_number'])
            temp_important_user_list = json.loads(item['important_users'])
            unfiltered_users = json.loads(item['unfilter_users'])
            temp_out_portrait_users = set(unfiltered_users) - set(
                temp_important_user_list)  # 未入库
            important_user_set = important_user_set | set(
                temp_important_user_list)
            out_portrait_users = out_portrait_users | set(
                temp_out_portrait_users)

            burst_reason = item.get("burst_reason", "")
            if burst_reason:
                burst_time_list.append([timestamp, count, burst_reason])
            count += 1

    ####################################################################################
    # 统计爆发原因,下相应的结论
    weibo_variation_count = 0
    weibo_variation_time = []
    sentiment_variation_count = 0
    sentiment_variation_time = []
    common_variation_count = 0
    common_variation_time = []
    if burst_time_list:
        for item in burst_time_list:
            tmp_common = 0
            x1 = 0
            x2 = 0
            if signal_count_varition in item[2]:
                weibo_variation_count += 1
                weibo_variation_time.append(
                    [ts2date_min(item[0]), total_number_count[item[1]]])
                x1 = total_number_count[item[1]]
                tmp_common += 1
            if signal_sentiment_varition in item[2]:
                tmp_common += 1
                sentiment_variation_count += 1
                x2 = negetive_sentiment_list[item[1]]
                sentiment_variation_time.append(
                    [ts2date_min(item[0]), negetive_sentiment_list[item[1]]])

            if tmp_common == 2:
                common_variation_count += 1
                common_variation_time.append([ts2date_min(item[0]), x1, x2])

    warning_conclusion = remark
    variation_distribution = []
    if weibo_variation_count:
        variation_distribution.append(weibo_variation_time)
    else:
        variation_distribution.append([])

    if sentiment_variation_count:
        variation_distribution.append(sentiment_variation_time)
    else:
        variation_distribution.append([])

    if common_variation_count:
        variation_distribution.append(common_variation_time)
    else:
        variation_distribution.append([])

    results['warning_conclusion'] = warning_conclusion
    results['variation_distribution'] = variation_distribution

    # 每个用户的热度

    # 获取重要用户的个人信息
    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")
    important_uid_list = list(important_user_set)
    out_portrait_users_list = list(out_portrait_users)
    user_detail_info = []  #
    out_user_detail_info = []
    if important_uid_list:
        user_results = es.mget(index=portrait_index_name,
                               doc_type=portrait_index_type,
                               body={"ids": important_uid_list},
                               fields=[
                                   'uid', 'uname', 'domain', 'topic_string',
                                   "photo_url", 'importance', 'influence',
                                   'activeness'
                               ])['docs']
        for item in user_results:
            if item['found']:
                temp = []
                #if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD:
                #    continue
                temp.append(item['fields']['uid'][0])
                temp.append(item['fields']['uname'][0])
                temp.append(item['fields']['photo_url'][0])
                temp.append(item['fields']['domain'][0])
                temp.append(item['fields']['topic_string'][0].split('&'))
                #hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time)
                #temp.append(hot_count)
                temp.append(
                    math.ceil(item['fields']['importance'][0] /
                              float(top_importance) * 100))
                temp.append(
                    math.ceil(item['fields']['influence'][0] /
                              float(top_influence) * 100))
                temp.append(
                    math.ceil(item['fields']['activeness'][0] /
                              float(top_activeness) * 100))
                user_detail_info.append(temp)
    # 排序
    user_detail_info = sorted(user_detail_info,
                              key=lambda x: x[6],
                              reverse=True)

    if out_portrait_users_list:
        profile_results = es_profile.mget(
            index=profile_index_name,
            doc_type=profile_index_type,
            body={"ids": out_portrait_users_list})["docs"]
        bci_index = "bci_" + ts2datetime(ts - DAY).replace('-', '')
        influence_results = es.mget(index=bci_index,
                                    doc_type="bci",
                                    body={"ids":
                                          out_portrait_users_list})['docs']
        top_influence = get_top_all_influence("influence", ts)
        count = 0
        if profile_results:
            for item in profile_results:
                temp = []
                if item['found']:
                    temp.append(item['_source']['uid'])
                    if item['_source']['nick_name']:
                        temp.append(item['_source']['nick_name'])
                    else:
                        temp.append(item['_source']['uid'])
                    temp.append(item['_source']['user_location'])
                    temp.append(item['_source']['fansnum'])
                else:
                    temp.append(item['_id'])
                    temp.append(item['_id'])
                    temp.extend(['', ''])
                temp_influ = influence_results[count]
                if temp_influ.get('found', 0):
                    user_index = temp_influ['_source']['user_index']
                    temp.append(
                        math.ceil(item['_source']['user_index'] /
                                  float(top_influence) * 100))
                else:
                    temp.append(0)
                count += 1
            out_user_detail_info.append(temp)

    revise_time_series = []
    for item in time_series:
        revise_time_series.append(ts2date_min(item))

    results['important_user_detail'] = user_detail_info
    results['out_portrait_user_detail'] = out_user_detail_info
    results['burst_time'] = burst_time_list  # 爆发时间点,以及爆发原因
    results['time_series'] = revise_time_series
    results['positive_sentiment_list'] = positive_sentiment_list
    results['negetive_sentiment_list'] = negetive_sentiment_list
    results['neutral_sentiment_list'] = neutral_sentiment_list
    results['all_weibo_list'] = all_weibo_list
    results['origin_weibo_list'] = origin_weibo_list
    results['retweeted_weibo_list'] = retweeted_weibo_list
    results['comment_weibo_count'] = comment_weibo_count
    results['retweeted_weibo_count'] = retweeted_weibo_count
    results['total_number_list'] = total_number_count
    results['social_sensors_detail'] = portrait_detail

    return results