def scan_reducer():
    if RUN_TYPE:
        ts = datetime2ts(ts2datetime(time.time - DAY))
    else:
        ts = datetime2ts('2016-05-14')
    date = ts2datetime(ts)
    ts = str(ts)
    hash_name = pre_ip + ts
    #sen_hash_name = sen_pre_ip + ts
    index_name = ip_index_pre + date
    #sen_index_name = sen_ip_index_pre + date
    mapping(index_name, ip_doc_type)
    count = 0
    bulk_action = []
    tb = time.time()

    while 1:
        tmp_list = redis_ip.rpop('ip_uid_list_0520')
        if tmp_list:
            uid_list = json.loads(tmp_list)
            ip_dict = redis_ip.hmget(hash_name, uid_list)
            for i in range(len(uid_list)):
                save_dict = dict()
                uid = uid_list[i]
                save_dict['uid'] = uid_list[i]
                save_dict['ip_dict'] = ip_dict[i]
                bulk_action.extend([{'index': {'_id': uid}}, save_dict])
            es_cluster.bulk(bulk_action,
                            index=index_name,
                            doc_type=ip_doc_type)
            bulk_action = []
            count += len(uid_list)
            te = time.time()
            if RUN_TYPE == 0:
                print '%s sec scan %s count user' % (te - tb, count)
            tb = te
        else:
            print count
            break
def scan_reducer():
    if RUN_TYPE:
        ts = datetime2ts(ts2datetime(time.time - DAY))
        date = ts2datetime(time.time - DAY)
    else:
        ts = datetime2ts('2013-09-01')
        date = '2013-09-01'
    ts = str(ts)
    hash_name = pre_ip + ts
    #sen_hash_name = sen_pre_ip + ts
    index_name = ip_index_pre + date
    #sen_index_name = sen_ip_index_pre + date
    mapping(index_name, ip_doc_type)
    count = 0
    bulk_action = []
    tb = time.time()

    while 1:
        tmp_list = redis_ip.rpop('ip_uid_list')
        if tmp_list:
            uid_list = json.loads(tmp_list)
            ip_dict = redis_ip.hmget(hash_name, uid_list)
            for i in range(len(uid_list)):
                save_dict = dict()
                uid = uid_list[i]
                save_dict['uid'] = uid_list[i]
                save_dict['ip_dict'] = ip_dict[i]
                bulk_action.extend([{'index':{'_id':uid}}, save_dict])
            es_cluster.bulk(bulk_action, index=index_name, doc_type=ip_doc_type)
            bulk_action = []
            count += len(uid_list)
            te = time.time()
            if RUN_TYPE == 0:
                print '%s sec scan %s count user' % (te-tb, count)
            tb = te
        else:
           print count
           break
Beispiel #3
0
def get_school(uid_list):
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    school_results = {}
    for i in range(WEEK, 0, -1):
        ts = now_date_ts - DAY * i
        ip_results = redis_ip.hmget('ip_'+str(ts), uid_list)
        count = 0
        for uid in uid_list:
            if uid not in school_results:
                school_results[uid] = {}
            ip_item = ip_results[count]
            if ip_item:
                uid_ip_dict = json.loads(ip_item)
            else:
                uid_ip_dict = {}
            for ip in uid_ip_dict:
                ip_count = len(uid_ip_dict[ip].split('&'))
                
                school = ip2school(ip)
                if school:
                    try:
                        school_results[uid][school] += ip_count
                    except:
                        school_results[uid][school] = ip_count
           
            count += 1
    results = {} 
    for uid in uid_list:
        school_dict = school_results[uid]
        school_string = '&'.join(school_dict.keys())
        if school_dict != {}:
            is_school = '1'
        else:
            is_school = '0'
        results[uid] = {'is_school': is_school, 'school_string': school_string, 'school_dict': json.dumps(school_dict)}
    return results
def scan_reducer():
    if RUN_TYPE:
        ts = datetime2ts(ts2datetime(time.time - DAY))
    else:
        ts = datetime2ts("2016-05-14")
    date = ts2datetime(ts)
    ts = str(ts)
    hash_name = pre_ip + ts
    # sen_hash_name = sen_pre_ip + ts
    index_name = ip_index_pre + date
    # sen_index_name = sen_ip_index_pre + date
    mapping(index_name, ip_doc_type)
    count = 0
    bulk_action = []
    tb = time.time()

    while 1:
        tmp_list = redis_ip.rpop("ip_uid_list_0520")
        if tmp_list:
            uid_list = json.loads(tmp_list)
            ip_dict = redis_ip.hmget(hash_name, uid_list)
            for i in range(len(uid_list)):
                save_dict = dict()
                uid = uid_list[i]
                save_dict["uid"] = uid_list[i]
                save_dict["ip_dict"] = ip_dict[i]
                bulk_action.extend([{"index": {"_id": uid}}, save_dict])
            es_cluster.bulk(bulk_action, index=index_name, doc_type=ip_doc_type)
            bulk_action = []
            count += len(uid_list)
            te = time.time()
            if RUN_TYPE == 0:
                print "%s sec scan %s count user" % (te - tb, count)
            tb = te
        else:
            print count
            break
Beispiel #5
0
def get_flow_information(uid_list):
    # 前七天的数据, 不能用于每天更新
    lenth = len(uid_list)
    results = {}
    iter_results = {}
    result_dict = {}
    if RUN_TYPE:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)  # date: 2013-09-01
    else:
        now_date = "2013-09-08"
    ts = datetime2ts(now_date)

    start_ts = ts - 8 * 3600 * 24
    for i in range(1, 8):
        ts = start_ts + i * 3600 * 24
        date = ts2datetime(ts)
        print "date:", date
        uid_day_geo = {}
        sensitive_uid_day_geo = {}
        flow_index_name = flow_text_index_name_pre + str(date)
        # hashtag
        print uid_list
        hashtag_results = redis_cluster.hmget('hashtag_' + str(ts), uid_list)
        sensitive_hashtag = redis_cluster.hmget('sensitive_hashtag_' + str(ts),
                                                uid_list)
        # sensitive_words
        sensitive_results = redis_cluster.hmget('sensitive_' + str(ts),
                                                uid_list)
        # ip
        if WORK_TYPE == 0:
            ip_index_name = ip_index_pre + str(date)
            sensitive_ip_index_name = sen_ip_index_pre + str(date)
            #activity_index_name = act_index_pre + str(date)
            #sensitive_activity_index_name = sen_act_index_pre + str(date)
            exist_bool = es_cluster.indices.exists(index=ip_index_name)
            sensitive_exist_bool = es_cluster.indices.exists(
                index=sensitive_ip_index_name)
            #activity_exist_bool = es_cluster.indices.exists(index=activity_index_name)
            #sensitive_activity_exist_bool = es_cluster.indices.exists(index=sensitive_activity_index_name)
            if exist_bool:
                ip_results = es_cluster.mget(index=ip_index_name,
                                             doc_type="ip",
                                             body={"ids": uid_list})["docs"]
            else:
                ip_results = [dict()] * lenth
            if sensitive_exist_bool:
                sensitive_ip_results = es_cluster.mget(
                    index=sensitive_ip_index_name,
                    doc_type="sensitive_ip",
                    body={"ids": uid_list})["docs"]
            else:
                sensitive_ip_results = [dict()] * lenth
            """
            if activity_exist_bool:
                activity_results = es_cluster.mget(index=activity_index_name, doc_type="activity", body={"ids":uid_list})["docs"]
            else:
                activity_results = [dict()]*lenth
            if sensitive_activity_exist_bool:
                sensitive_activity_results = es_cluster.mget(index=sensitive_activity_index_name, doc_type="sensitive_activity", body={"ids":uid_list})["docs"]
            else:
                sensitive_activity_results = [dict()]*lenth
            """
        else:
            ip_results = redis_ip.hmget('ip_' + str(ts), uid_list)
            sensitive_ip_results = redis_ip.hmget('sensitive_ip_' + str(ts),
                                                  uid_list)
            #activity_results = redis_activity.hmget('activity_'+str(date), uid_list)
            #sensitive_activity_results = redis_activity.hmget('sensitive_activity_'+str(date), uid_list)

        for j in range(0, len(uid_list)):
            uid = uid_list[j]
            if uid not in iter_results:
                iter_results[uid] = {'hashtag':{}, 'sensitive_hashtag':{}, 'geo':{}, "sensitive_geo":{},'geo_track':[],'keywords':{}, \
                        'sensitive_words':{}, "sensitive_geo_track":[],'ip': [], 'sensitive_ip':[]}
            # sensitive words
            if sensitive_results[j]:
                sensitive_words_results = json.loads(sensitive_results[j])
                for sensitive_word in sensitive_words_results:
                    try:
                        iter_results[uid]["sensitive_words"][
                            sensitive_word] += sensitive_words_results[
                                sensitive_word]
                    except:
                        iter_results[uid]["sensitive_words"][
                            sensitive_word] = sensitive_words_results[
                                sensitive_word]
            #print "sensitive_words:", iter_results[uid]["sensitive_words"]

            if hashtag_results[j]:
                hashtag_dict = json.loads(hashtag_results[j])
                for hashtag in hashtag_dict:
                    try:
                        iter_results[uid]['hashtag'][hashtag] += hashtag_dict[
                            hashtag]
                    except:
                        iter_results[uid]['hashtag'][hashtag] = hashtag_dict[
                            hashtag]
            #print "hashtag: ", iter_results[uid]['hashtag']

            if sensitive_hashtag[j]:
                sensitive_hashtag_dict = json.loads(sensitive_hashtag[j])
                for hashtag in sensitive_hashtag_dict:
                    try:
                        iter_results[uid]['sensitive_hashtag'][
                            hashtag] += sensitive_hashtag_dict[hashtag]
                    except:
                        iter_results[uid]['sensitive_hashtag'][
                            hashtag] = sensitive_hashtag_dict[hashtag]
            #print "sensitive_hashtag:", iter_results[uid]['sensitive_hashtag']

            uid_day_geo[uid] = {}
            sensitive_uid_day_geo[uid] = {}
            if WORK_TYPE == 0:  # es
                if ip_results[j]:
                    if ip_results[j]['found']:
                        detail_item = ip_results[j]['_source']
                        ip_dict = json.loads(detail_item['ip_dict'])
                    else:
                        ip_dict = {}
                else:
                    ip_dict = {}
            else:
                if ip_results[j]:
                    ip_dict = json.loads(ip_results[j])
                else:
                    ip_dict = {}
            if ip_dict:
                #iter_results[uid]['ip'].append(ip_dict)
                geo_dict = ip2geo(ip_dict)
                for geo, count in geo_dict.iteritems():
                    try:
                        iter_results[uid]['geo'][geo] += count
                    except:
                        iter_results[uid]['geo'][geo] = count
                    try:
                        uid_day_geo[uid][geo] += count
                    except:
                        uid_day_geo[uid][geo] = count
            #iter_results[uid]['ip'].append(ip_dict)
            iter_results[uid]['geo_track'].append(uid_day_geo[uid])
            #print "ip:", iter_results[uid]['ip'], iter_results[uid]['geo_track']

            if WORK_TYPE == 0:
                if sensitive_ip_results[j]:
                    if sensitive_ip_results[j]['found']:
                        detail_item = sensitive_ip_results[j]['_source']
                        sensitive_ip_dict = json.loads(
                            detail_item['sensitive_ip_dict'])
                    else:
                        sensitive_ip_dict = dict()
                else:
                    sensitive_ip_dict = dict()
            else:
                if sensitive_ip_results[j]:
                    sensitive_ip_dict = json.loads(sensitive_ip_results[j])
                else:
                    sensitive_ip_dict = dict()
            if sensitive_ip_dict:
                sensitive_geo_dict = ip2geo(sensitive_ip_dict)
                #iter_results[uid]['sensitive_ip'].append(sensitive_ip_dict)
                for geo, count in sensitive_geo_dict.iteritems():
                    try:
                        iter_results[uid]['sensitive_geo'][geo] += count
                    except:
                        iter_results[uid]['sensitive_geo'][geo] = count
                    try:
                        sensitive_uid_day_geo[uid][geo] += count
                    except:
                        sensitive_uid_day_geo[uid][geo] = count
            #iter_results[uid]['sensitive_ip'].append(sensitive_ip_dict)
            iter_results[uid]['sensitive_geo_track'].append(
                sensitive_uid_day_geo[uid])
            #print "sensitive_ip:", iter_results[uid]['sensitive_ip'], iter_results[uid]['sensitive_geo_track']

        # compute keywords
        flow_text_exist = es_flow_text.indices.exists(index=flow_index_name)
        if flow_text_exist:
            text_results = es_flow_text.search(index=flow_index_name, doc_type=flow_text_index_type,\
                   body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'size':MAX_VALUE},_source=False, fields=['uid', 'keywords_dict'])['hits']['hits']
        else:
            text_results = {}
        for item in text_results:
            uid = item['fields']['uid'][0]
            uid_keywords_dict = json.loads(item['fields']['keywords_dict'][0])
            for keywords in uid_keywords_dict:
                try:
                    iter_results[uid]['keywords'][
                        keywords] += uid_keywords_dict[keywords]
                except:
                    iter_results[uid]['keywords'][
                        keywords] = uid_keywords_dict[keywords]
        #print "keywords:", iter_results[uid]['keywords']

    for uid in uid_list:
        results[uid] = {}
        # hashtag
        hashtag_dict = iter_results[uid]['hashtag']
        results[uid]['hashtag_dict'] = json.dumps(hashtag_dict)
        results[uid]['hashtag_string'] = '&'.join(hashtag_dict.keys())
        # sensitive hashtag
        sensitive_hashtag_dict = iter_results[uid]['sensitive_hashtag']
        results[uid]['sensitive_hashtag_dict'] = json.dumps(
            sensitive_hashtag_dict)
        results[uid]['sensitive_hashtag_string'] = '&'.join(
            sensitive_hashtag_dict.keys())
        # sensitive_words
        sensitive_word_dict = iter_results[uid]['sensitive_words']
        results[uid]['sensitive_words_dict'] = json.dumps(sensitive_word_dict)
        results[uid]['sensitive_words_string'] = '&'.join(
            sensitive_word_dict.keys())
        sensitive_score = 0
        for k, v in sensitive_word_dict.iteritems():
            tmp = r_sensitive.hget('sensitive_words', k)
            if tmp:
                tmp_stage = json.loads(tmp)
                sensitive_score += sensitive_score_dict[str(tmp_stage[0])] * v
        results[uid]['sensitive'] = sensitive_score
        # geo
        geo_dict = iter_results[uid]['geo']
        geo_track_list = iter_results[uid]['geo_track']
        results[uid]['activity_geo_dict'] = json.dumps(geo_track_list)
        geo_dict_keys = geo_dict.keys()
        results[uid]['activity_geo'] = '&'.join(
            ['&'.join(item.split('\t')) for item in geo_dict_keys])
        results[uid]['activity_geo_aggs'] = '&'.join(
            [item.split('\t')[-1] for item in geo_dict_keys])
        sensitive_geo_dict = iter_results[uid]['sensitive_geo']
        sensitive_geo_track_list = iter_results[uid]['sensitive_geo_track']
        results[uid]['sensitive_activity_geo_dict'] = json.dumps(
            sensitive_geo_track_list)
        sensitive_geo_dict_keys = sensitive_geo_dict.keys()
        results[uid]['sensitive_activity_geo'] = '&'.join(
            ['&'.join(item.split('\t')) for item in sensitive_geo_dict_keys])
        results[uid]['sensitive_activity_geo_aggs'] = '&'.join(
            [item.split('\t')[-1] for item in sensitive_geo_dict_keys])

        keywords_dict = iter_results[uid]['keywords']
        keywords_top50 = sorted(keywords_dict.items(),
                                key=lambda x: x[1],
                                reverse=True)[:50]
        keywords_top50_string = '&'.join(
            [keyword_item[0] for keyword_item in keywords_top50])
        results[uid]['keywords_dict'] = json.dumps(keywords_top50)
        results[uid]['keywords_string'] = keywords_top50_string

    return results
def get_flow_information(uid_list):
    # 前七天的数据, 不能用于每天更新
    lenth = len(uid_list)
    results = {}
    iter_results = {}
    result_dict = {}
    if RUN_TYPE:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)  # date: 2013-09-01
    else:
        now_date = "2013-09-08"
    ts = datetime2ts(now_date)

    start_ts = ts - 8 * 3600 * 24
    for i in range(1, 8):
        ts = start_ts + i * 3600 * 24
        date = ts2datetime(ts)
        print "date:", date
        uid_day_geo = {}
        sensitive_uid_day_geo = {}
        flow_index_name = flow_text_index_name_pre + str(date)
        # hashtag
        print uid_list
        hashtag_results = redis_cluster.hmget("hashtag_" + str(ts), uid_list)
        sensitive_hashtag = redis_cluster.hmget("sensitive_hashtag_" + str(ts), uid_list)
        # sensitive_words
        sensitive_results = redis_cluster.hmget("sensitive_" + str(ts), uid_list)
        # ip
        if WORK_TYPE == 0:
            ip_index_name = ip_index_pre + str(date)
            sensitive_ip_index_name = sen_ip_index_pre + str(date)
            # activity_index_name = act_index_pre + str(date)
            # sensitive_activity_index_name = sen_act_index_pre + str(date)
            exist_bool = es_cluster.indices.exists(index=ip_index_name)
            sensitive_exist_bool = es_cluster.indices.exists(index=sensitive_ip_index_name)
            # activity_exist_bool = es_cluster.indices.exists(index=activity_index_name)
            # sensitive_activity_exist_bool = es_cluster.indices.exists(index=sensitive_activity_index_name)
            if exist_bool:
                ip_results = es_cluster.mget(index=ip_index_name, doc_type="ip", body={"ids": uid_list})["docs"]
            else:
                ip_results = [dict()] * lenth
            if sensitive_exist_bool:
                sensitive_ip_results = es_cluster.mget(
                    index=sensitive_ip_index_name, doc_type="sensitive_ip", body={"ids": uid_list}
                )["docs"]
            else:
                sensitive_ip_results = [dict()] * lenth
            """
            if activity_exist_bool:
                activity_results = es_cluster.mget(index=activity_index_name, doc_type="activity", body={"ids":uid_list})["docs"]
            else:
                activity_results = [dict()]*lenth
            if sensitive_activity_exist_bool:
                sensitive_activity_results = es_cluster.mget(index=sensitive_activity_index_name, doc_type="sensitive_activity", body={"ids":uid_list})["docs"]
            else:
                sensitive_activity_results = [dict()]*lenth
            """
        else:
            ip_results = redis_ip.hmget("ip_" + str(ts), uid_list)
            sensitive_ip_results = redis_ip.hmget("sensitive_ip_" + str(ts), uid_list)
            # activity_results = redis_activity.hmget('activity_'+str(date), uid_list)
            # sensitive_activity_results = redis_activity.hmget('sensitive_activity_'+str(date), uid_list)

        for j in range(0, len(uid_list)):
            uid = uid_list[j]
            if uid not in iter_results:
                iter_results[uid] = {
                    "hashtag": {},
                    "sensitive_hashtag": {},
                    "geo": {},
                    "sensitive_geo": {},
                    "geo_track": [],
                    "keywords": {},
                    "sensitive_words": {},
                    "sensitive_geo_track": [],
                    "ip": [],
                    "sensitive_ip": [],
                }
            # sensitive words
            if sensitive_results[j]:
                sensitive_words_results = json.loads(sensitive_results[j])
                for sensitive_word in sensitive_words_results:
                    try:
                        iter_results[uid]["sensitive_words"][sensitive_word] += sensitive_words_results[sensitive_word]
                    except:
                        iter_results[uid]["sensitive_words"][sensitive_word] = sensitive_words_results[sensitive_word]
            # print "sensitive_words:", iter_results[uid]["sensitive_words"]

            if hashtag_results[j]:
                hashtag_dict = json.loads(hashtag_results[j])
                for hashtag in hashtag_dict:
                    try:
                        iter_results[uid]["hashtag"][hashtag] += hashtag_dict[hashtag]
                    except:
                        iter_results[uid]["hashtag"][hashtag] = hashtag_dict[hashtag]
            # print "hashtag: ", iter_results[uid]['hashtag']

            if sensitive_hashtag[j]:
                sensitive_hashtag_dict = json.loads(sensitive_hashtag[j])
                for hashtag in sensitive_hashtag_dict:
                    try:
                        iter_results[uid]["sensitive_hashtag"][hashtag] += sensitive_hashtag_dict[hashtag]
                    except:
                        iter_results[uid]["sensitive_hashtag"][hashtag] = sensitive_hashtag_dict[hashtag]
            # print "sensitive_hashtag:", iter_results[uid]['sensitive_hashtag']

            uid_day_geo[uid] = {}
            sensitive_uid_day_geo[uid] = {}
            if WORK_TYPE == 0:  # es
                if ip_results[j]:
                    if ip_results[j]["found"]:
                        detail_item = ip_results[j]["_source"]
                        ip_dict = json.loads(detail_item["ip_dict"])
                    else:
                        ip_dict = {}
                else:
                    ip_dict = {}
            else:
                if ip_results[j]:
                    ip_dict = json.loads(ip_results[j])
                else:
                    ip_dict = {}
            if ip_dict:
                # iter_results[uid]['ip'].append(ip_dict)
                geo_dict = ip2geo(ip_dict)
                for geo, count in geo_dict.iteritems():
                    try:
                        iter_results[uid]["geo"][geo] += count
                    except:
                        iter_results[uid]["geo"][geo] = count
                    try:
                        uid_day_geo[uid][geo] += count
                    except:
                        uid_day_geo[uid][geo] = count
            # iter_results[uid]['ip'].append(ip_dict)
            iter_results[uid]["geo_track"].append(uid_day_geo[uid])
            # print "ip:", iter_results[uid]['ip'], iter_results[uid]['geo_track']

            if WORK_TYPE == 0:
                if sensitive_ip_results[j]:
                    if sensitive_ip_results[j]["found"]:
                        detail_item = sensitive_ip_results[j]["_source"]
                        sensitive_ip_dict = json.loads(detail_item["sensitive_ip_dict"])
                    else:
                        sensitive_ip_dict = dict()
                else:
                    sensitive_ip_dict = dict()
            else:
                if sensitive_ip_results[j]:
                    sensitive_ip_dict = json.loads(sensitive_ip_results[j])
                else:
                    sensitive_ip_dict = dict()
            if sensitive_ip_dict:
                sensitive_geo_dict = ip2geo(sensitive_ip_dict)
                # iter_results[uid]['sensitive_ip'].append(sensitive_ip_dict)
                for geo, count in sensitive_geo_dict.iteritems():
                    try:
                        iter_results[uid]["sensitive_geo"][geo] += count
                    except:
                        iter_results[uid]["sensitive_geo"][geo] = count
                    try:
                        sensitive_uid_day_geo[uid][geo] += count
                    except:
                        sensitive_uid_day_geo[uid][geo] = count
            # iter_results[uid]['sensitive_ip'].append(sensitive_ip_dict)
            iter_results[uid]["sensitive_geo_track"].append(sensitive_uid_day_geo[uid])
            # print "sensitive_ip:", iter_results[uid]['sensitive_ip'], iter_results[uid]['sensitive_geo_track']

        # compute keywords
        flow_text_exist = es_flow_text.indices.exists(index=flow_index_name)
        if flow_text_exist:
            text_results = es_flow_text.search(
                index=flow_index_name,
                doc_type=flow_text_index_type,
                body={"query": {"filtered": {"filter": {"terms": {"uid": uid_list}}}}, "size": MAX_VALUE},
                _source=False,
                fields=["uid", "keywords_dict"],
            )["hits"]["hits"]
        else:
            text_results = {}
        for item in text_results:
            uid = item["fields"]["uid"][0]
            uid_keywords_dict = json.loads(item["fields"]["keywords_dict"][0])
            for keywords in uid_keywords_dict:
                try:
                    iter_results[uid]["keywords"][keywords] += uid_keywords_dict[keywords]
                except:
                    iter_results[uid]["keywords"][keywords] = uid_keywords_dict[keywords]
        # print "keywords:", iter_results[uid]['keywords']

    for uid in uid_list:
        results[uid] = {}
        # hashtag
        hashtag_dict = iter_results[uid]["hashtag"]
        results[uid]["hashtag_dict"] = json.dumps(hashtag_dict)
        results[uid]["hashtag_string"] = "&".join(hashtag_dict.keys())
        # sensitive hashtag
        sensitive_hashtag_dict = iter_results[uid]["sensitive_hashtag"]
        results[uid]["sensitive_hashtag_dict"] = json.dumps(sensitive_hashtag_dict)
        results[uid]["sensitive_hashtag_string"] = "&".join(sensitive_hashtag_dict.keys())
        # sensitive_words
        sensitive_word_dict = iter_results[uid]["sensitive_words"]
        results[uid]["sensitive_words_dict"] = json.dumps(sensitive_word_dict)
        results[uid]["sensitive_words_string"] = "&".join(sensitive_word_dict.keys())
        sensitive_score = 0
        for k, v in sensitive_word_dict.iteritems():
            tmp = r_sensitive.hget("sensitive_words", k)
            if tmp:
                tmp_stage = json.loads(tmp)
                sensitive_score += sensitive_score_dict[str(tmp_stage[0])] * v
        results[uid]["sensitive"] = sensitive_score
        # geo
        geo_dict = iter_results[uid]["geo"]
        geo_track_list = iter_results[uid]["geo_track"]
        results[uid]["activity_geo_dict"] = json.dumps(geo_track_list)
        geo_dict_keys = geo_dict.keys()
        results[uid]["activity_geo"] = "&".join(["&".join(item.split("\t")) for item in geo_dict_keys])
        results[uid]["activity_geo_aggs"] = "&".join([item.split("\t")[-1] for item in geo_dict_keys])
        sensitive_geo_dict = iter_results[uid]["sensitive_geo"]
        sensitive_geo_track_list = iter_results[uid]["sensitive_geo_track"]
        results[uid]["sensitive_activity_geo_dict"] = json.dumps(sensitive_geo_track_list)
        sensitive_geo_dict_keys = sensitive_geo_dict.keys()
        results[uid]["sensitive_activity_geo"] = "&".join(
            ["&".join(item.split("\t")) for item in sensitive_geo_dict_keys]
        )
        results[uid]["sensitive_activity_geo_aggs"] = "&".join(
            [item.split("\t")[-1] for item in sensitive_geo_dict_keys]
        )

        keywords_dict = iter_results[uid]["keywords"]
        keywords_top50 = sorted(keywords_dict.items(), key=lambda x: x[1], reverse=True)[:50]
        keywords_top50_string = "&".join([keyword_item[0] for keyword_item in keywords_top50])
        results[uid]["keywords_dict"] = json.dumps(keywords_top50)
        results[uid]["keywords_string"] = keywords_top50_string

    return results