def get_user_sensitive_words(uid):
    user_sensitive_words_dict = {}
    now_ts = time.time()
    now_date = ts2datetime(now_ts)  # 2015-09-22
    ts = datetime2ts(now_date)

    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1, 8):
        ts = ts - 3600 * 24
        date = ts2datetime(ts).replace('-', '')
        results = r_cluster.hget('sensitive_' + str(date), uid)
        if results:
            sensitive_words_dict = json.loads(results)
            for word in sensitive_words_dict:
                if user_sensitive_words_dict.has_key(word):
                    user_sensitive_words_dict[word] += sensitive_words_dict[
                        word]
                else:
                    user_sensitive_words_dict[word] = sensitive_words_dict[
                        word]
    sort_sensitive_words_dict = sorted(user_sensitive_words_dict.items(),
                                       key=lambda x: x[1],
                                       reverse=True)

    return sort_sensitive_words_dict
def get_user_geo(uid):
    results = []
    user_geo_result = {}
    user_ip_dict = {}
    user_ip_result = {}  # ordinary ip
    user_sensitive_ip_result = {}  # sensitive ip
    now_ts = time.time()
    now_date = ts2datetime(now_ts)  # 2015-09-22
    ts = datetime2ts(now_date)

    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1, 8):
        ts = ts - 3600 * 24
        date = ts2datetime(ts).replace('-', '')
        results = r_cluster.hget('ip_' + str(date), uid)
        sensitive_results = r_cluster.hget('sensitive_ip' + str(date), uid)
        if results:
            ip_results = json.loads(results)
            for ip in ip_results:
                if user_ip_result.has_key(ip):
                    user_ip_result[ip] += ip_results[ip]
                else:
                    user_ip_result[ip] = ip_results[ip]

        if sensitive_results:
            sensitive_ip_results = json.loads(sensitive_results)
            for ip in sensitive_ip_results:
                if user_sensitive_ip_result.has_key(ip):
                    user_sensitive_ip_result[ip] += sensitive_ip_results[ip]
                else:
                    user_sensitive_ip_result[ip] = sensitive_ip_results[ip]

    ordinary_key_set = set(user_ip_result.keys())
    sensitive_key_set = set(user_sensitive_ip_result.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            user_ip_result[key] += user_sensitive_ip_result[key]
        else:
            user_ip_result[key] = user_sensitive_ip_result[key]

    user_geo_dict = ip2geo(user_ip_result)
    sorted_user_geo_dict = sorted(user_geo_dict.items(),
                                  key=lambda x: x[1],
                                  reverse=True)
    sensitive_user_geo_dict = ip2geo(user_sensitive_ip_result)
    sorted_sensitive_user_geo_dict = sorted(sensitive_user_geo_dict.items(),
                                            key=lambda x: x[1],
                                            reverse=True)

    return_list = []
    return_list = [sorted_user_geo_dict,
                   sorted_sensitive_user_geo_dict]  # total and sensitive
    return return_list
def get_user_geo(uid):
    results = []
    user_geo_result = {}
    user_ip_dict = {}
    user_ip_result = {} # ordinary ip
    user_sensitive_ip_result = {} # sensitive ip
    now_ts = time.time()
    now_date = ts2datetime(now_ts) # 2015-09-22
    ts = datetime2ts(now_date)

    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1,8):
        ts = ts - 3600*24
        date = ts2datetime(ts).replace('-','')
        results = r_cluster.hget('ip_'+str(date), uid)
        sensitive_results = r_cluster.hget('sensitive_ip'+str(date), uid)
        if results:
            ip_results = json.loads(results)
            for ip in ip_results:
                if user_ip_result.has_key(ip):
                    user_ip_result[ip] += ip_results[ip]
                else:
                    user_ip_result[ip] = ip_results[ip]

        if sensitive_results:
            sensitive_ip_results = json.loads(sensitive_results)
            for ip in sensitive_ip_results:
                if user_sensitive_ip_result.has_key(ip):
                    user_sensitive_ip_result[ip] += sensitive_ip_results[ip]
                else:
                    user_sensitive_ip_result[ip] = sensitive_ip_results[ip]

    ordinary_key_set = set(user_ip_result.keys())
    sensitive_key_set = set(user_sensitive_ip_result.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            user_ip_result[key] += user_sensitive_ip_result[key]
        else:
            user_ip_result[key] = user_sensitive_ip_result[key]

    user_geo_dict = ip2geo(user_ip_result)
    sorted_user_geo_dict = sorted(user_geo_dict.items(), key=lambda x:x[1], reverse=True)
    sensitive_user_geo_dict = ip2geo(user_sensitive_ip_result)
    sorted_sensitive_user_geo_dict = sorted(sensitive_user_geo_dict.items(), key=lambda x:x[1], reverse=True)


    return_list = []
    return_list = [sorted_user_geo_dict, sorted_sensitive_user_geo_dict] # total and sensitive
    return return_list
def get_influence_content(uid, timestamp_from, timestamp_to):
    weibo_list = []
    # split timestamp range to new_range_dict_list
    from_date_ts = datetime2ts(ts2datetime(timestamp_from))
    to_date_ts = datetime2ts(ts2datetime(timestamp_to))
    new_range_dict_list = []
    if from_date_ts != to_date_ts:
        iter_date_ts = from_date_ts
        while iter_date_ts < to_date_ts:
            iter_next_date_ts = iter_date_ts + DAY
            new_range_dict_list.append({"range": {"timestamp": {"gte": iter_date_ts, "lt": iter_next_date_ts}}})
            iter_date_ts = iter_next_date_ts
        if new_range_dict_list[0]["range"]["timestamp"]["gte"] < timestamp_from:
            new_range_dict_list[0]["range"]["timestamp"]["gte"] = timestamp_from
        if new_range_dict_list[-1]["range"]["timestamp"]["lt"] > timestamp_to:
            new_range_dict_list[-1]["range"]["timestamp"]["lt"] = timestamp_to
    else:
        new_range_dict_list = [{"range": {"timestamp": {"gte": timestamp_from, "lt": timestamp_to}}}]
    # iter date to search flow_text
    iter_result = []
    for range_item in new_range_dict_list:
        range_from_ts = range_item["range"]["timestamp"]["gte"]
        range_from_date = ts2datetime(range_from_ts)
        flow_text_index_name = flow_text_index_name_pre + range_from_date
        query = []
        query.append({"term": {"uid": uid}})
        query.append(range_item)
        try:
            flow_text_exist = es_flow_text.search(
                index=flow_text_index_name,
                doc_type=flow_text_index_type,
                body={"query": {"bool": {"must": query}}, "sort": [{"timestamp": "asc"}]},
            )["hits"]["hits"]
        except:
            flow_text_exist = []
        iter_result.extend(flow_text_exist)
    # get weibo list
    for item in flow_text_exist:
        source = item["_source"]
        weibo = {}
        weibo["timestamp"] = ts2date(source["timestamp"])
        weibo["ip"] = source["ip"]
        weibo["text"] = source["text"]
        if source["geo"]:
            weibo["geo"] = "\t".join(source["geo"].split("&"))
        else:
            weibo["geo"] = ""
        weibo_list.append(weibo)

    return weibo_list
def get_network(task_exist):
    task_name = task_exist['task_name']
    submit_date = task_exist['submit_date']
    submit_ts = date2ts(submit_date)

    time_segment = 24*3600
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    now_date_ts = datetime2ts(now_date)
    #test
    now_date_ts = datetime2ts('2013-09-07')
    iter_date_ts = now_date_ts
    iter_count = 1
    date_list = []
    top_list_dict = {}
    while True:
        if iter_count >= 8 or iter_date_ts < submit_ts:
            break
        iter_date = ts2datetime(iter_date_ts)
        date_list.append(iter_date)
        key = 'inner_' + str(iter_date)
        try:
            task_date_result = es.get(index=monitor_index_name, doc_type=task_name, id=key)['_source']
        except:
            task_date_result = {}
        #print 'task_name, key, task_date_result:', task_name, key, task_date_result
        iter_field = ['top1', 'top2', 'top3', 'top4', 'top5']
        for field in iter_field:
            user_count_item = json.loads(task_date_result[field])
            uid = user_count_item[0]
            uname = uid2uname(uid)
            count = user_count_item[1]
            try:
                top_list_dict[field].append([uid, uname, count])
            except:
                top_list_dict[field] = [[uid, uname, count]]
        
        iter_date_ts -= time_segment
        # get inner-retweet group from es---field: inner_graph
        '''
        try:
            inner_graph = json.loads(task_date_result['inner_graph'])
        except:
            inner_graph = {}
        '''

    abnormal_index = compute_inner_polarization(top_list_dict)
    
    return [date_list, top_list_dict, abnormal_index]
Example #6
0
def ajax_upload_track_file():
    results = {}
    upload_data = request.form['upload_data']
    task_name = request.form['task_name']
    state = request.args.form['state']
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    now_date_ts = datetime2ts(now_date)
    time_segment = int((now_ts - now_Date_ts) / 900) + 1
    trans_ts = now_date_ts + time_segment * 900
    line_list = upload_data.split('\n')
    input_data = {}
    #submit task and start time is 15min multiple
    input_data['submit_date'] = trans_ts
    input_data['task_name'] = task_name
    uid_list = []
    for line in line_list:
        uid = line[:10]
        if len(uid) == 10:
            uid_list.append(uid)
    input_data['uid_list'] = uid_list
    input_data[
        'status'] = 1  # status show the track task is doing or end; doing 1, end 0
    input_data['count'] = len(uid_list)
    status = submit_track_task(input_data)
    return json.dumps(status)
def search_mention(uid, sensitive):
    date = ts2datetime(time.time()).replace('-','')
    stat_results = dict()
    results = dict()
    test_ts = time.time()
    test_ts = datetime2ts('2013-09-07')
    for i in range(0,7):
        ts = test_ts -i*24*3600
        date = ts2datetime(ts).replace('-', '')
        if not sensitive:
            at_temp = r_cluster.hget('at_' + str(date), str(uid))
        else:
            at_temp = r_cluster.hget('sensitive_at_' + str(date), str(uid))
        if not at_temp:
            continue
        else:
            result_dict = json.loads(at_temp)
        for at_uid in result_dict:
            if stat_results.has_key(at_uid):
                stat_results[uid] += result_dict[at_uid]
            else:
                stat_results[uid] = result_dict[at_uid]
    if not stat_results:
        return [None, 0]

    in_status = identify_uid_list_in(result_dict.keys())
    for at_uid in result_dict:
        if at_uid in in_status:
            results[at_uid] = [result_dict[at_uid], '1']
        else:
            results[at_uid] = [result_dict[at_uid], '0']

    sorted_results = sorted(results.items(), key=lambda x:x[1][0], reverse=True)
    return [sorted_results[0:20], len(results)]
def influence_distribute():

    row = [0, 200, 500, 700, 900, 1100, 10000]
    result = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') # test
    ts = ts - 8*3600*24
    for j in range(7):
        detail = []
        ts += 3600*24
        date = ts2datetime(ts).replace('-', '')
        for i in range(6):
            low_limit = row[i]
            upper_limit = row[i+1]
            query_body = {
                "query": {
                    "filtered": {
                        "filter": {
                            "range": {
                                date: {
                                    "gte": low_limit,
                                    "lt": upper_limit
                                }
                            }
                        }
                    }
                }
            }
            number = es.count(index='copy_sensitive_user_portrait', doc_type="user", body=query_body)['count']
            detail.append(number)
        result.append(detail)
    return [row, result]
def ajax_upload_track_file():
    results = {}
    upload_data = request.form['upload_data']
    task_name = request.form['task_name']
    state = request.args.form['state']
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    now_date_ts = datetime2ts(now_date)
    time_segment = int((now_ts - now_Date_ts) / 900) + 1
    trans_ts = now_date_ts + time_segment * 900
    line_list = upload_data.split('\n')
    input_data = {}
    #submit task and start time is 15min multiple
    input_data['submit_date'] = trans_ts
    input_data['task_name'] = task_name
    uid_list = []
    for line in line_list:
        uid = line[:10]
        if len(uid)==10:
            uid_list.append(uid)
    input_data['uid_list'] = uid_list
    input_data['status'] = 1 # status show the track task is doing or end; doing 1, end 0
    input_data['count'] = len(uid_list)
    status = submit_track_task(input_data)
    return json.dumps(status)
def get_user_sensitive_words(uid):
    user_sensitive_words_dict = {}
    if RUN_TYPE:
        now_ts = time.time()
        now_date = ts2datetime(now_ts) # 2015-09-22
    else:
        now_date = "2013-09-08"
    ts = datetime2ts(now_date)

    #test
    #ts = datetime2ts('2013-09-08')
    for i in range(1,8):
        ts = ts - 3600*24
        date = ts2datetime(ts).replace('-','')
        results = r_cluster.hget('sensitive_'+str(ts), uid)
        if results:
            sensitive_words_dict = json.loads(results)
            for word in sensitive_words_dict:
                if user_sensitive_words_dict.has_key(word):
                    user_sensitive_words_dict[word] += sensitive_words_dict[word]
                else:
                    user_sensitive_words_dict[word] = sensitive_words_dict[word]
    sort_sensitive_words_dict = sorted(user_sensitive_words_dict.items(), key=lambda x:x[1], reverse=True)

    return sort_sensitive_words_dict
def get_group_user_track(uid):
    results = []
    # step1:get user_portrait activity_geo_dict
    try:
        portrait_result = es_user_portrait.get(
            index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=False, fields=["activity_geo_dict"]
        )
    except:
        portrait_result = {}
    if portrait_result == {}:
        return "uid is not in user_portrait"
    activity_geo_dict = json.loads(portrait_result["fields"]["activity_geo_dict"][0])
    now_date_ts = datetime2ts(ts2datetime(int(time.time())))
    start_ts = now_date_ts - DAY * len(activity_geo_dict)
    # step2: iter date to get month track
    for geo_item in activity_geo_dict:
        iter_date = ts2datetime(start_ts)
        sort_day_dict = sorted(geo_item.items(), key=lambda x: x[1], reverse=True)
        if sort_day_dict:
            results.append([iter_date, sort_day_dict[0][0]])
        else:
            results.append([iter_date, ""])
        start_ts = start_ts + DAY

    return results
Example #12
0
def get_group_user_track(uid):
    results = []
    #step1:get user_portrait activity_geo_dict
    try:
        portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\
                id=uid, _source=False, fields=['activity_geo_dict'])
    except:
        portrait_result = {}
    if portrait_result == {}:
        return 'uid is not in user_portrait'
    activity_geo_dict = json.loads(
        portrait_result['fields']['activity_geo_dict'][0])
    now_date_ts = datetime2ts(ts2datetime(int(time.time())))
    start_ts = now_date_ts - DAY * len(activity_geo_dict)
    #step2: iter date to get month track
    for geo_item in activity_geo_dict:
        iter_date = ts2datetime(start_ts)
        sort_day_dict = sorted(geo_item.items(),
                               key=lambda x: x[1],
                               reverse=True)
        if sort_day_dict:
            results.append([iter_date, sort_day_dict[0][0]])
        else:
            results.append([iter_date, ''])
        start_ts = start_ts + DAY

    return results
def end_track_task(task_name):
    status = 0
    try:
        task_exist = es.get(index=index_name, doc_type=index_type, id=task_name)['_source']
    except:
        return 'task name not exist'
    task_status = task_exist['status']
    if status == '0':
        return 'task have end'
    else:
        task_exist['status'] = 0
        # made end time
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        now_date_ts = datetime2ts(now_date)
        time_segment = int((now_ts - now_date_ts) / 900) + 1
        end_ts = now_date_ts + time_segment * 900
        end_date = ts2date(end_ts)
        task_exist['end_date'] = end_date
        task_user = task_exist['uid_list']
        status = change_user_count(task_user)
        if status == 0:
            return 'change user task count fail'
        else:
            es.index(index=index_name, doc_type=index_type, id=task_name, body=task_exist)
            status = delete_task_redis(task_name)
            if status == 0:
                return 'delete task from redis fail'
            else:
                return 'success change status to end'
def influence_distribute():

    row = [0, 200, 500, 700, 900, 1100, 10000]
    result = []
    ts = time.time()
    ts = datetime2ts('2013-09-08')  # test
    ts = ts - 8 * 3600 * 24
    for j in range(7):
        detail = []
        ts += 3600 * 24
        date = ts2datetime(ts).replace('-', '')
        for i in range(6):
            low_limit = row[i]
            upper_limit = row[i + 1]
            query_body = {
                "query": {
                    "filtered": {
                        "filter": {
                            "range": {
                                date: {
                                    "gte": low_limit,
                                    "lt": upper_limit
                                }
                            }
                        }
                    }
                }
            }
            number = es.count(index='copy_sensitive_user_portrait',
                              doc_type="user",
                              body=query_body)['count']
            detail.append(number)
        result.append(detail)
    return [row, result]
Example #15
0
def end_track_task(task_name):
    status = 0
    try:
        task_exist = es.get(index=index_name,
                            doc_type=index_type,
                            id=task_name)['_source']
    except:
        return 'task name not exist'
    task_status = task_exist['status']
    if status == '0':
        return 'task have end'
    else:
        task_exist['status'] = 0
        # made end time
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        now_date_ts = datetime2ts(now_date)
        time_segment = int((now_ts - now_date_ts) / 900) + 1
        end_ts = now_date_ts + time_segment * 900
        end_date = ts2date(end_ts)
        task_exist['end_date'] = end_date
        task_user = task_exist['uid_list']
        status = change_user_count(task_user)
        if status == 0:
            return 'change user task count fail'
        else:
            es.index(index=index_name,
                     doc_type=index_type,
                     id=task_name,
                     body=task_exist)
            status = delete_task_redis(task_name)
            if status == 0:
                return 'delete task from redis fail'
            else:
                return 'success change status to end'
def get_user_hashtag(uid):
    user_hashtag_dict = {}
    sensitive_user_hashtag_dict = {}
    now_ts = time.time()
    now_date = ts2datetime(now_ts)  # 2015-09-22
    ts = datetime2ts(now_date)

    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1, 8):
        ts = ts - 3600 * 24
        date = ts2datetime(ts).replace('-', '')
        results = r_cluster.hget('hashtag_' + str(date), uid)
        sensitive_results = r_cluster.hget('sensitive_hashtag_' + str(date),
                                           uid)
        if results:
            hashtag_dict = json.loads(results)
            for hashtag in hashtag_dict:
                if user_hashtag_dict.has_key(hashtag):
                    user_hashtag_dict[hashtag] += hashtag_dict[hashtag]
                else:
                    user_hashtag_dict[hashtag] = hashtag_dict[hashtag]
        if sensitive_results:
            sensitive_hashtag_dict = json.loads(sensitive_results)
            for hashtag in sensitive_hashtag_dict:
                if sensitive_user_hashtag_dict.has_key(hashtag):
                    sensitive_user_hashtag_dict[
                        hashtag] += sensitive_hashtag_dict[hashtag]
                else:
                    sensitive_user_hashtag_dict[
                        hashtag] = sensitive_hashtag_dict[hashtag]
    ordinary_key_set = set(user_hashtag_dict.keys())
    sensitive_key_set = set(sensitive_user_hashtag_dict.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            user_hashtag_dict[key] += sensitive_user_hashtag_dict[key]
        else:
            user_hashtag_dict[key] = sensitive_user_hashtag_dict[key]

    sort_hashtag_dict = sorted(user_hashtag_dict.items(),
                               key=lambda x: x[1],
                               reverse=True)
    sort_sensitive_dict = sorted(sensitive_user_hashtag_dict.items(),
                                 key=lambda x: x[1],
                                 reverse=True)
    return [sort_hashtag_dict, sort_sensitive_dict]
def get_text_index(date):
    now_ts = datetime2ts(date)
    index_list = []
    for i in range(7):
        ts = now_ts - i*DAY
        tmp_index = pre_text_index + ts2datetime(ts)
        index_list.append(tmp_index)

    return index_list
Example #18
0
def ajax_ip():
    uid = request.args.get('uid', '')
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts('2013-09-02')
    result = search_ip(now_ts, uid)
    if not result:
        result = {}
    return json.dumps(result)
def ajax_ip():
    uid = request.args.get('uid', '')
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts('2013-09-02')
    result = search_ip(now_ts, uid)
    if not result:
        result = {}
    return json.dumps(result)
Example #20
0
def ajax_activity_day():
    results = {}
    uid = str(request.args.get('uid', ''))
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts("2013-09-01")
    results = search_activity(now_ts, uid)
    if not results:
        results = {}
    return json.dumps(results)
Example #21
0
def ajax_location():
    uid = request.args.get('uid', '')
    uid = str(uid)
    time_type = request.args.get('time_type', '')  # type = day; week; month
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts("2013-09-01")
    results = search_location(now_ts, uid, time_type)

    return json.dumps(results)
def search_detect_task(task_name, submit_date, state, process, detect_type, submit_user):
    results = []
    query = [{'match':{'task_type': 'detect'}}]
    condition_num = 0
    if task_name:
        task_name_list = task_name.split(' ')
        for item in task_name_list:
            query.append({'wildcard':{'task_name': '*'+item+'*'}})
            condition_num += 1
    if submit_date:
        submit_date_ts = datetime2ts(submit_date)
        submit_date_from = submit_date_ts
        submit_date_to = submit_date_ts + DAY
        query.append({'range':{'submit_date':{'gte':submit_date_from, 'lt':submit_date_to}}})
        condition_num += 1
    if state:
        state_list = state.split(' ')
        for item in state_list:
            query.append({'wildcard':{'state': '*'+item+'*'}})
            condition_num += 1
    if process:
        query.append({'range':{'detect_process':{'from': int(process), 'to': MAX_PROCESS}}})
        condition_num += 1
    if detect_type:
        
        detect_type_list = detect_type.split(',')
        nest_body_list = []
        for type_item in detect_type_list:
            nest_body_list.append({'wildcard':{'detect_type': '*'+type_item+'*'}})
        query.append({'bool':{'should': nest_body_list}})
        
        condition_num += 1
    if submit_user:
        query.append({'wildcard':{'submit_user': '******'+submit_user+'*'}})
        condition_num += 1
    try:
        search_result = es_group_result.search(index=group_index_name, doc_type=group_index_type, \
                    body={'query':{'bool': {'must': query}}, 'sort':[{'submit_date': {'order': 'desc'}}], 'size':MAX_VALUE})['hits']['hits']
    except:
        search_result = []
    #get group information table
    for group_item in search_result:
        source = group_item['_source']
        task_name = source['task_name']
        submit_date = ts2datetime(int(source['submit_date']))
        submit_user = source['submit_user']
        detect_type = source['detect_type']
        state = source['state']
        process = source['detect_process']

        results.append([task_name, submit_user, submit_date, detect_type, state, process])
        
    return results
def ajax_activity_day():
    results = {}
    uid = str(request.args.get('uid', ''))
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts("2013-09-01")
    results = search_activity(now_ts, uid)
    if not results:
        results = {}
    return json.dumps(results)
def lastest_identify_in():
    results = dict()
    now_ts = time.time()
    now_ts = datetime2ts('2013-09-08')
    for i in range(1,8):
        ts = now_ts - i * 3600 *24
        date = ts2datetime(ts).replace('-','')
        words_dict = r.hgetall('history_in_'+date)
        for item in words_dict:
            results[item] = json.loads(words_dict[item])

    return results
def lastest_identify_in():
    results = dict()
    now_ts = time.time()
    now_ts = datetime2ts('2013-09-08')
    for i in range(1, 8):
        ts = now_ts - i * 3600 * 24
        date = ts2datetime(ts).replace('-', '')
        words_dict = r.hgetall('history_in_' + date)
        for item in words_dict:
            results[item] = json.loads(words_dict[item])

    return results
Example #26
0
def show_in_history(date):
    print date
    results = []
    sensitive_uid_list = []
    influence_uid_list = []
    sen_iden_in_name = "identify_in_sensitive_" + str(date)
    inf_iden_in_name = "identify_in_influence_" + str(date)
    man_iden_in_name = "identify_in_manual_" + str(date)
    sen_iden_in_results = r.hgetall(sen_iden_in_name)
    inf_iden_in_results = r.hgetall(inf_iden_in_name)
    man_iden_in_results = r.hgetall(man_iden_in_name)
    sensitive_uid_list = sen_iden_in_results.keys()
    influence_uid_list = inf_iden_in_results.keys()
    manual_uid_list = man_iden_in_results.keys()
    #compute_results = r.hgetall('compute')
    results = []
    work_date = ts2datetime(datetime2ts(date) - DAY)

    if sensitive_uid_list:
        sensitive_results = get_sensitive_user_detail(sensitive_uid_list,
                                                      work_date, 1)
    else:
        sensitive_results = []
    for item in sensitive_results:
        uid = item[0]
        status = sen_iden_in_results[uid]
        item.append(status)
        results.append(item)

    if influence_uid_list:
        influence_results = get_sensitive_user_detail(influence_uid_list,
                                                      work_date, 0)
    else:
        influence_results = []
    for item in influence_results:
        uid = item[0]
        status = inf_iden_in_results[uid]
        item.append(status)
        results.append(item)

    if manual_uid_list:
        manual_results = get_sensitive_user_detail(manual_uid_list, work_date,
                                                   0)
    else:
        manual_results = []
    for item in manual_results:
        uid = item[0]
        status = man_iden_in_results[uid]
        item.append(status)
        results.append(item)

    sorted_results = sorted(results, key=lambda x: x[5], reverse=True)
    return sorted_results
def ajax_location():
    uid = request.args.get('uid', '')
    uid = str(uid)
    time_type = request.args.get('time_type', '') # type = day; week; month
    #run_type
    if RUN_TYPE == 1:
        now_ts = time.time()
    else:
        now_ts = datetime2ts("2013-09-01")
    results = search_location(now_ts, uid, time_type)
    
    return json.dumps(results)
def get_user_hashtag(uid):
    user_hashtag_dict = {}
    sensitive_user_hashtag_dict = {}
    now_ts = time.time()
    now_date = ts2datetime(now_ts) # 2015-09-22
    ts = datetime2ts(now_date)

    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1,8):
        ts = ts - 3600*24
        date = ts2datetime(ts).replace('-','')
        results = r_cluster.hget('hashtag_'+str(date), uid)
        sensitive_results = r_cluster.hget('sensitive_hashtag_'+str(date), uid)
        if results:
            hashtag_dict = json.loads(results)
            for hashtag in hashtag_dict:
                if user_hashtag_dict.has_key(hashtag):
                    user_hashtag_dict[hashtag] += hashtag_dict[hashtag]
                else:
                    user_hashtag_dict[hashtag] = hashtag_dict[hashtag]
        if sensitive_results:
            sensitive_hashtag_dict = json.loads(sensitive_results)
            for hashtag in sensitive_hashtag_dict:
                if sensitive_user_hashtag_dict.has_key(hashtag):
                    sensitive_user_hashtag_dict[hashtag] += sensitive_hashtag_dict[hashtag]
                else:
                    sensitive_user_hashtag_dict[hashtag] = sensitive_hashtag_dict[hashtag]
    ordinary_key_set = set(user_hashtag_dict.keys())
    sensitive_key_set = set(sensitive_user_hashtag_dict.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            user_hashtag_dict[key] += sensitive_user_hashtag_dict[key]
        else:
            user_hashtag_dict[key] = sensitive_user_hashtag_dict[key]

    sort_hashtag_dict = sorted(user_hashtag_dict.items(), key=lambda x:x[1], reverse=True)
    sort_sensitive_dict = sorted(sensitive_user_hashtag_dict.items(), key=lambda x:x[1], reverse=True)
    return [sort_hashtag_dict, sort_sensitive_dict]
Example #29
0
def get_sensitive_user_detail(uid_list, date, sensitive):
    es_cluster = es_user_profile
    ts = datetime2ts(date)
    results = []
    index_name = pre_influence_index + str(date).replace(
        '-', '')  # index_name:20130901
    user_bci_results = es_bci.mget(index=index_name,
                                   doc_type='bci',
                                   body={'ids': uid_list},
                                   _source=False,
                                   fields=['user_index'])['docs']
    user_profile_results = es_user_profile.mget(index="weibo_user",
                                                doc_type="user",
                                                body={"ids": uid_list},
                                                _source=True)['docs']
    top_influnce_value = get_top_value("user_index", es_bci, index_name, "bci")
    for i in range(0, len(uid_list)):
        personal_info = [''] * 6
        uid = uid_list[i]
        personal_info[0] = uid_list[i]
        personal_info[1] = uid_list[i]
        if user_profile_results[i]['found']:
            profile_dict = user_profile_results[i]['_source']
            uname = profile_dict['nick_name']
            if uname:
                personal_info[1] = uname
            personal_info[2] = profile_dict['user_location']
            personal_info[3] = profile_dict['fansnum']
            personal_info[4] = profile_dict['statusnum']
        if user_bci_results[i]['found']:
            try:
                tmp_bci = user_bci_results[i]['fields']['user_index'][0]
                influence = math.log(
                    tmp_bci / float(top_influnce_value) * 9 + 1, 10) * 100
                personal_info[5] = influence
            except:
                personal_info[5] = 0
        else:
            personal_info[5] = 0
        if sensitive:
            sensitive_words = redis_cluster.hget('sensitive_' + str(ts),
                                                 str(uid))
            if sensitive_words:
                sensitive_dict = json.loads(sensitive_words)
                personal_info.append(sensitive_dict.keys())
            else:
                personal_info.append([])
        else:
            personal_info.append([])
        results.append(personal_info)
    return results
def show_in_history(date):
    print date
    results = []
    sensitive_uid_list = []
    influence_uid_list = []
    sen_iden_in_name = "identify_in_sensitive_" + str(date)
    inf_iden_in_name = "identify_in_influence_" + str(date)
    man_iden_in_name = "identify_in_manual_" + str(date)
    sen_iden_in_results = r.hgetall(sen_iden_in_name)
    inf_iden_in_results = r.hgetall(inf_iden_in_name)
    man_iden_in_results = r.hgetall(man_iden_in_name)
    sensitive_uid_list = sen_iden_in_results.keys()
    influence_uid_list = inf_iden_in_results.keys()
    manual_uid_list = man_iden_in_results.keys()
    #compute_results = r.hgetall('compute')
    results = []
    work_date = ts2datetime(datetime2ts(date)-DAY)

    if sensitive_uid_list:
        sensitive_results = get_sensitive_user_detail(sensitive_uid_list, work_date, 1)
    else:
        sensitive_results = []
    for item in sensitive_results:
        uid = item[0]
        status = sen_iden_in_results[uid]
        item.append(status)
        results.append(item)

    if influence_uid_list:
        influence_results = get_sensitive_user_detail(influence_uid_list, work_date, 0)
    else:
        influence_results = []
    for item in influence_results:
        uid = item[0]
        status = inf_iden_in_results[uid]
        item.append(status)
        results.append(item)

    if manual_uid_list:
        manual_results = get_sensitive_user_detail(manual_uid_list, work_date, 0)
    else:
        manual_results = []
    for item in manual_results:
        uid = item[0]
        status = man_iden_in_results[uid]
        item.append(status)
        results.append(item)


    sorted_results = sorted(results, key=lambda x:x[5], reverse=True)
    return sorted_results
Example #31
0
def ajax_full_text_search():
    if RUN_TYPE:
        ts = time.time()
    else:
        ts = datetime2ts("2013-09-02")
    now_date = ts2datetime(ts)
    start_time = request.args.get("start_time", now_date)  # 2013-09-01
    end_time = request.args.get("end_time", now_date)
    uid = request.args.get("uid", "")
    size = request.args.get("number", 100)
    keywords = request.args.get("keywords", "")  # 逗号分隔

    results = full_text_search(keywords, uid, start_time, end_time, size)

    return json.dumps(results)
def recommend_in_sensitive(date):
    sensitive_name = "recomment_" + str(date) + "_sensitive"
    compute_name = "compute"
    re_sen_set = r.hkeys(sensitive_name) # 敏感人物推荐
    iden_in_set = r.hkeys(compute_name) # 已经入库用户
    if not re_sen_set:
        return [] # 那一天不存在数据
    uid_list = list(set(re_sen_set) - set(iden_in_set))
    sensitive = 1
    work_date = ts2datetime(datetime2ts(date)-DAY)
    if uid_list:
        results = get_sensitive_user_detail(uid_list, work_date, sensitive)
    else:
        results = []
    return results
Example #33
0
def recommend_in_sensitive(date):
    sensitive_name = "recomment_" + str(date) + "_sensitive"
    compute_name = "compute"
    re_sen_set = r.hkeys(sensitive_name)  # 敏感人物推荐
    iden_in_set = r.hkeys(compute_name)  # 已经入库用户
    if not re_sen_set:
        return []  # 那一天不存在数据
    uid_list = list(set(re_sen_set) - set(iden_in_set))
    sensitive = 1
    work_date = ts2datetime(datetime2ts(date) - DAY)
    if uid_list:
        results = get_sensitive_user_detail(uid_list, work_date, sensitive)
    else:
        results = []
    return results
def ajax_full_text_search():
    if RUN_TYPE:
        ts = time.time()
    else:
        ts = datetime2ts("2013-09-02")
    now_date = ts2datetime(ts)
    start_time = request.args.get("start_time", now_date) # 2013-09-01
    end_time = request.args.get("end_time", now_date)
    uid = request.args.get("uid", "")
    size = request.args.get("number", 100)
    keywords = request.args.get("keywords", "") # 逗号分隔

    results = full_text_search(keywords, uid, start_time, end_time, size)

    return json.dumps(results)
def identify_in(date, words_list):
    # identify_in date and words_list(include level and category, [word, level, category])
    # date is date when new words were recommended
    ts = time.time()
    ts = datetime2ts('2013-09-07')
    time_list = []
    for i in range(7):
        now_ts = int(ts) - i * 24 * 3600
        now_date = ts2datetime(now_ts).replace('-', '')
        time_list.append(now_date)
    for item in words_list:
        r.hset('sensitive_words', item[0], json.dumps([item[1], item[2]]))
        r.hset('history_in_' + date, item[0], json.dumps([item[1], item[2]]))
        for date in time_list:
            r.hdel('recommend_sensitive_words_' + date, item[0])
    return '1'
def identify_in(date, words_list):
    # identify_in date and words_list(include level and category, [word, level, category])
    # date is date when new words were recommended
    ts = time.time()
    ts = datetime2ts('2013-09-07')
    time_list = []
    for i in range(7):
        now_ts = int(ts) - i*24*3600
        now_date = ts2datetime(now_ts).replace('-', '')
        time_list.append(now_date)
    for item in words_list:
        r.hset('sensitive_words', item[0], json.dumps([item[1], item[2]]))
        r.hset('history_in_'+date, item[0], json.dumps([item[1], item[2]]))
        for date in time_list:
            r.hdel('recommend_sensitive_words_'+date, item[0])
    return '1'
Example #37
0
def recommend_in_top_influence(date):
    influence_name = "recomment_" + date + "_influence"
    identify_in_name = "compute"
    re_inf_set = r.hkeys(influence_name)
    iden_in_set = r.hkeys(identify_in_name)  # 已经入库用户

    if not re_inf_set:
        return []
    else:
        uid_list = list(set(re_inf_set) - set(iden_in_set))
    sensitive = 0
    work_date = ts2datetime(datetime2ts(date) - DAY)
    if uid_list:
        results = get_sensitive_user_detail(uid_list, work_date, sensitive)
    else:
        results = []
    return results
def recommend_in_top_influence(date):
    influence_name = "recomment_" + date + "_influence"
    identify_in_name = "compute"
    re_inf_set = r.hkeys(influence_name)
    iden_in_set = r.hkeys(identify_in_name) # 已经入库用户

    if not re_inf_set:
        return []
    else:
        uid_list = list(set(re_inf_set) - set(iden_in_set))
    sensitive = 0
    work_date = ts2datetime(datetime2ts(date)-DAY)
    if uid_list:
        results = get_sensitive_user_detail(uid_list, work_date, sensitive)
    else:
        results = []
    return results
def ajax_show_sensitive_history_in():
    results = []
    now_date = ts2datetime(time.time())
    date = request.args.get('date', now_date) # in date:2013-09-01
    if str(date) == "all":
        ts = time.time()
        now_ts = datetime2ts(now_date)
        for i in range(7):
            ts = now_ts - i*24*3600
            date = ts2datetime(ts)
            temp = show_in_history(date, 1)
            results.extend(temp)
    else:
        results = show_in_history(date, 1) # history in, include status
    if results:
        return json.dumps(results)
    else:
        return json.dumps([])
Example #40
0
def ajax_show_sensitive_history_in():
    results = []
    now_date = ts2datetime(time.time())
    date = request.args.get('date', now_date) # in date:2013-09-01
    if str(date) == "all":
        ts = time.time()
        now_ts = datetime2ts(now_date)
        for i in range(7):
            ts = now_ts - i*24*3600
            date = ts2datetime(ts)
            temp = show_in_history(date, 1)
            results.extend(temp)
    else:
        results = show_in_history(date, 1) # history in, include status
    if results:
        return json.dumps(results)
    else:
        return json.dumps([])
def ajax_show_influence_history_in():
    results = []
    now_date = ts2datetime(time.time())
    date = request.args.get('date', now_date)
    if str(date) == "all":
        ts = time.time()
        now_ts = datetime2ts('2013-09-07')
        for i in range(7):
            ts = now_ts - i*24*3600
            date = ts2datetime(ts)
            date = str(date).replace('-', '')
            temp = show_in_history(date, 1)
            results.extend(temp)
    else:
        date = str(date).replace('-','')
        results = show_in_history(date, 0) # history in, include status
    if results:
        return json.dumps(results)
    else:
        return json.dumps([])
def ajax_show_influence_history_in():
    results = []
    now_date = ts2datetime(time.time())
    date = request.args.get('date', now_date)
    if str(date) == "all":
        ts = time.time()
        now_ts = datetime2ts('2013-09-07')
        for i in range(7):
            ts = now_ts - i * 24 * 3600
            date = ts2datetime(ts)
            date = str(date).replace('-', '')
            temp = show_in_history(date, 1)
            results.extend(temp)
    else:
        date = str(date).replace('-', '')
        results = show_in_history(date, 0)  # history in, include status
    if results:
        return json.dumps(results)
    else:
        return json.dumps([])
def get_sensitive_user_detail(uid_list, date, sensitive):
    es_cluster = es_user_profile
    ts = datetime2ts(date)
    results = []
    index_name = pre_influence_index + str(date).replace('-','') # index_name:20130901
    user_bci_results = es_bci.mget(index=index_name, doc_type='bci', body={'ids':uid_list}, _source=False, fields=['user_index'])['docs']
    user_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids":uid_list}, _source=True)['docs']
    top_influnce_value = get_top_value("user_index", es_bci, index_name, "bci")
    for i in range(0, len(uid_list)):
        personal_info = ['']*6
        uid = uid_list[i]
        personal_info[0] = uid_list[i]
        personal_info[1] = uid_list[i]
        if user_profile_results[i]['found']:
            profile_dict = user_profile_results[i]['_source']
            uname = profile_dict['nick_name']
            if uname:
                personal_info[1] = uname
            personal_info[2] = profile_dict['user_location']
            personal_info[3] = profile_dict['fansnum']
            personal_info[4] = profile_dict['statusnum']
        if user_bci_results[i]['found']:
            try:
                tmp_bci = user_bci_results[i]['fields']['user_index'][0]
                influence = math.log(tmp_bci/float(top_influnce_value)*9+1, 10)*100
                personal_info[5] = influence
            except:
                personal_info[5] = 0
        else:
            personal_info[5] = 0
        if sensitive:
            sensitive_words = redis_cluster.hget('sensitive_' + str(ts), str(uid))
            if sensitive_words:
                sensitive_dict = json.loads(sensitive_words)
                personal_info.append(sensitive_dict.keys())
            else:
                personal_info.append([])
        else:
            personal_info.append([])
        results.append(personal_info)
    return results
def get_inner_top_weibo(task_name, date, uid):
    result = []
    # step1: identify the task exist
    # step2: search weibo from monitor_user_text by condition: task_user, date
    task_exist = identify_task(task_name)
    if not task_exist:
        return 'the task is not exist'
    task_user = task_exist['uid_list']
    if uid not in task_user:
        return 'the user is not exist'
    end_ts = datetime2ts(date)
    time_segment = 24*3600
    start_ts = end_ts - time_segment
    query_body = []
    #term search: uid
    query_body.append({'term': uid})
    #range search: date-24*3600, date
    query_body.append({'range':{'timestamp': {'from': start_ts, 'to': end_ts}}})
    try:
        weibo_result = es.search(index=text_index_name, doc_type=text_index_type, \
                body={'query':{'bool':{'must': query_body}}, 'sort':[{'timestamp':{'order':'asc'}}], 'size':10000})['hits']['hits']
    except Exception, e:
        raise e
Example #45
0
def search_mention(uid, sensitive):
    date = ts2datetime(time.time()).replace('-', '')
    stat_results = dict()
    results = dict()
    test_ts = time.time()
    test_ts = datetime2ts('2013-09-07')
    for i in range(0, 7):
        ts = test_ts - i * 24 * 3600
        date = ts2datetime(ts).replace('-', '')
        if not sensitive:
            at_temp = r_cluster.hget('at_' + str(date), str(uid))
        else:
            at_temp = r_cluster.hget('sensitive_at_' + str(date), str(uid))
        if not at_temp:
            continue
        else:
            result_dict = json.loads(at_temp)
        for at_uid in result_dict:
            if stat_results.has_key(at_uid):
                stat_results[uid] += result_dict[at_uid]
            else:
                stat_results[uid] = result_dict[at_uid]
    if not stat_results:
        return [None, 0]

    in_status = identify_uid_list_in(result_dict.keys())
    for at_uid in result_dict:
        if at_uid in in_status:
            results[at_uid] = [result_dict[at_uid], '1']
        else:
            results[at_uid] = [result_dict[at_uid], '0']

    sorted_results = sorted(results.items(),
                            key=lambda x: x[1][0],
                            reverse=True)
    return [sorted_results[0:20], len(results)]
Example #46
0
def get_user_geo(uid):
    results = []
    user_geo_result = {}
    user_ip_dict = {}
    user_ip_result = {}  # ordinary ip
    user_sensitive_ip_result = {}  # sensitive ip
    if RUN_TYPE:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)  # 2015-09-22
    else:
        now_date = "2013-09-08"
    ts = datetime2ts(now_date)

    for i in range(1, 8):
        ts = ts - 3600 * 24
        date = ts2datetime(ts)
        if WORK_TYPE == 0:
            index_name = ip_index_pre + str(date)
            sensitive_index_name = sen_ip_index_pre + str(date)
            exist_bool = es_cluster.indices.exists(index=index_name)
            sensitive_exist_bool = es_cluster.indices.exists(
                index=sensitive_index_name)
            if exist_bool:
                try:
                    tmp_ip_result = es_cluster.get(index=index_name,
                                                   doc_type="ip",
                                                   id=uid)['_source']
                    results = tmp_ip_result['ip_dict']
                except:
                    results = dict()
            else:
                results = dict()
            if sensitive_exist_bool:
                try:
                    tmp_sensitive_ip_result = es_cluster.get(
                        index=sensitive_index_name,
                        doc_type="sensitive_ip",
                        id=uid)['_source']
                    sensitive_results = tmp_sensitive_ip_result[
                        'sensitive_ip_dict']
                except:
                    sensitive_results = dict()
            else:
                sensitive_results = dict()
        else:
            results = redis_ip.hget('ip_' + str(ts), uid)
            sensitive_results = redis_ip.hget('sensitive_ip' + str(ts), uid)
        if results:
            ip_results = json.loads(results)
            for ip in ip_results:
                if user_ip_result.has_key(ip):
                    user_ip_result[ip] += ip_results[ip]
                else:
                    user_ip_result[ip] = ip_results[ip]

        if sensitive_results:
            sensitive_ip_results = json.loads(sensitive_results)
            for ip in sensitive_ip_results:
                if user_sensitive_ip_result.has_key(ip):
                    user_sensitive_ip_result[ip] += sensitive_ip_results[ip]
                else:
                    user_sensitive_ip_result[ip] = sensitive_ip_results[ip]

    ordinary_key_set = set(user_ip_result.keys())
    sensitive_key_set = set(user_sensitive_ip_result.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            user_ip_result[key] += user_sensitive_ip_result[key]
        else:
            user_ip_result[key] = user_sensitive_ip_result[key]

    user_geo_dict = ip2geo(user_ip_result)
    sorted_user_geo_dict = sorted(user_geo_dict.items(),
                                  key=lambda x: x[1],
                                  reverse=True)
    sensitive_user_geo_dict = ip2geo(user_sensitive_ip_result)
    sorted_sensitive_user_geo_dict = sorted(sensitive_user_geo_dict.items(),
                                            key=lambda x: x[1],
                                            reverse=True)

    return_list = []
    return_list = [sorted_user_geo_dict,
                   sorted_sensitive_user_geo_dict]  # total and sensitive
    return return_list
Example #47
0
def ajax_event_detect():
    results = {}
    query_dict = {
    }  # {'attribute':attribute_query_list, 'event':event_query_list, 'filter':filter_dict}
    input_dict = {
    }  # {'task_information':task_information_dict, 'query_dict': query_dict}
    attribute_query_list = []
    event_query_list = []
    query_condition_num = 0
    #step1: get attribtue query dict
    for item in DETECT_EVENT_ATTRIBUTE:
        item_value_string = request.args.get(item, '')
        if item_value_string != '':
            item_value_list = item_value_string.split(',')
            nest_body_list = []
            nest_body_list.append({'terms': {item: item_value_list}})
            query_condition_num += 1
            attribute_query_list.extend(nest_body_list)

    for item in DETECT_EVENT_SELECT_ATTRIBUTE:
        item_value_string = request.args.get(item, '')
        if item_value_string != '':
            attribute_query_list.append({"term": {item: item_value_string}})
            query_condition_num += 1

    query_dict['attribute'] = attribute_query_list
    #step2: get event query dict
    #step2.1: get event fuzz item
    for item in DETECT_TEXT_FUZZ_ITEM:
        item_value_string = request.args.get(item, '')
        item_value_list = item_value_string.split(' ')
        nest_body_list = []
        if item_value_string != '':
            for item_value in item_value_list:
                nest_body_list.append(
                    {'wildcard': {
                        item: '*' + item_value + '*'
                    }})
            event_query_list.append({'bool': {'should': nest_body_list}})
            query_condition_num += 1

    #step2.2: get event range item
    for item in DETECT_EVENT_TEXT_RANGE_ITEM:
        now_time = int(time.time())
        now_date_ts = datetime2ts(ts2datetime(now_time))
        item_value_from = request.args.get(item + '_from', now_date_ts - DAY)
        item_value_to = request.args.get(item + '_to', now_date_ts)
        if item_value_from != '' and item_value_to != '':
            if int(item_value_from) > int(item_value_to):
                return 'invalid input for range'
            else:
                query_condition_num += 1
                event_query_list.append({
                    'range': {
                        item: {
                            'gte': int(item_value_from),
                            'lt': int(item_value_to)
                        }
                    }
                })
        else:
            return 'invalid input for range'
    query_dict['event'] = event_query_list
    #identify the query condition at least 1
    if query_condition_num < 1:
        return 'invalid input for query'
    #step3: get filter dict
    filter_dict = {}
    for filter_item in DETECT_QUERY_FILTER:
        if filter_item == 'count':
            filter_item_value = request.args.get(filter_item,
                                                 DETECT_DEFAULT_COUNT)
            filter_item_value = int(filter_item_value)
        else:
            filter_item_from = request.args.get(filter_item + '_from',
                                                DETECT_FILTER_VALUE_FROM)
            filter_item_to = request.args.get(filter_item + '_to',
                                              DETECT_FILTER_VALUE_TO)
            if int(filter_item_from) > int(filter_item_to) or (
                    not item_value_from) or (not item_value_to):
                return 'invalid input for filter'
            filter_item_value = {
                'gte': int(filter_item_from),
                'lt': int(filter_item_to)
            }
        filter_dict[filter_item] = filter_item_value
    if filter_dict['count'] == 0:
        return 'invalid input for count'
    query_dict['filter'] = filter_dict
    #step4: get task information dict
    task_information_dict = {}
    task_information_dict['task_name'] = request.args.get('task_name', '')
    task_information_dict['submit_date'] = int(time.time())
    task_information_dict['state'] = request.args.get('state', '')
    task_information_dict['submit_user'] = request.args.get(
        'submit_user', 'admin')
    task_information_dict['task_id'] = task_information_dict[
        'submit_user'] + task_information_dict['task_name']
    task_information_dict['task_type'] = 'detect'
    task_information_dict['detect_type'] = 'event'
    task_information_dict['detect_process'] = 0

    #step5: save to es and redis
    input_dict['task_information'] = task_information_dict
    input_dict['query_condition'] = query_dict

    status = save_detect_event_task(input_dict)

    return json.dumps(status)
def ajax_event_detect():
    results = {}
    query_dict = {} # {'attribute':attribute_query_list, 'event':event_query_list, 'filter':filter_dict}
    input_dict = {} # {'task_information':task_information_dict, 'query_dict': query_dict}
    attribute_query_list = []
    event_query_list = []
    query_condition_num = 0
    #step1: get attribtue query dict
    for item in DETECT_EVENT_ATTRIBUTE:
        item_value_string = request.args.get(item, '')
        if item_value_string != '':
            item_value_list = item_value_string.split(',')
            nest_body_list = []
            nest_body_list.append({'terms': {item: item_value_list}})
            query_condition_num += 1
            attribute_query_list.extend(nest_body_list)

    for item in DETECT_EVENT_SELECT_ATTRIBUTE:
        item_value_string = request.args.get(item, '')
        if item_value_string != '':
            attribute_query_list.append({"term": {item: item_value_string}})
            query_condition_num += 1

    query_dict['attribute']  = attribute_query_list
    #step2: get event query dict
    #step2.1: get event fuzz item
    for item in DETECT_TEXT_FUZZ_ITEM:
        item_value_string = request.args.get(item, '')
        item_value_list = item_value_string.split(' ')
        nest_body_list = []
        if item_value_string != '':
            for item_value in item_value_list:
                nest_body_list.append({'wildcard':{item: '*'+item_value+'*'}})
            event_query_list.append({'bool':{'should':nest_body_list}})
            query_condition_num += 1

    #step2.2: get event range item
    for item in DETECT_EVENT_TEXT_RANGE_ITEM:
        now_time = int(time.time())
        now_date_ts = datetime2ts(ts2datetime(now_time))
        item_value_from = request.args.get(item+'_from', now_date_ts - DAY)
        item_value_to = request.args.get(item+'_to', now_date_ts)
        if item_value_from != '' and item_value_to != '':
            if int(item_value_from) > int(item_value_to):
                return 'invalid input for range'
            else:
                query_condition_num += 1
                event_query_list.append({'range':{item: {'gte': int(item_value_from), 'lt':int(item_value_to)}}})
        else:
            return 'invalid input for range'
    query_dict['event'] =  event_query_list
    #identify the query condition at least 1
    if query_condition_num < 1:
        return 'invalid input for query'
    #step3: get filter dict
    filter_dict = {}
    for filter_item in DETECT_QUERY_FILTER:
        if filter_item == 'count':
            filter_item_value = request.args.get(filter_item, DETECT_DEFAULT_COUNT)
            filter_item_value = int(filter_item_value)
        else:
            filter_item_from = request.args.get(filter_item+'_from', DETECT_FILTER_VALUE_FROM)
            filter_item_to = request.args.get(filter_item+'_to', DETECT_FILTER_VALUE_TO)
            if int(filter_item_from) > int(filter_item_to) or (not item_value_from) or (not item_value_to):
                return 'invalid input for filter'
            filter_item_value = {'gte': int(filter_item_from), 'lt': int(filter_item_to)}
        filter_dict[filter_item] = filter_item_value
    if filter_dict['count'] == 0:
        return 'invalid input for count'
    query_dict['filter'] = filter_dict
    #step4: get task information dict
    task_information_dict = {}
    task_information_dict['task_name'] = request.args.get('task_name', '')
    task_information_dict['submit_date'] = int(time.time())
    task_information_dict['state'] = request.args.get('state', '')
    task_information_dict['submit_user'] = request.args.get('submit_user', 'admin')
    task_information_dict['task_id'] = task_information_dict['submit_user'] + task_information_dict['task_name']
    task_information_dict['task_type'] = 'detect'
    task_information_dict['detect_type'] = 'event'
    task_information_dict['detect_process'] = 0

    #step5: save to es and redis
    input_dict['task_information'] = task_information_dict
    input_dict['query_condition'] = query_dict

    status = save_detect_event_task(input_dict)
    
    return json.dumps(status)
def search_task(task_name, submit_user, submit_date, state, status):
    results = []
    # query = [{"term": {"submit_user": submit_user}}]
    query = []
    condition_num = 0
    if task_name:
        task_name_list = task_name.split(" ")
        for item in task_name_list:
            query.append({"wildcard": {"task_name": "*" + item + "*"}})
            condition_num += 1
    if submit_date:
        submit_date_ts = datetime2ts(submit_date)
        submit_date_start = submit_date_ts
        submit_date_end = submit_date_ts + DAY
        query.append({"range": {"submit_date": {"gte": submit_date_start, "lt": submit_date_end}}})
        condition_num += 1
    if state:
        state_list = state.split(" ")
        for item in state_list:
            query.append({"wildcard": {"state": "*" + item + "*"}})
            condition_num += 1
    if status:
        query.append({"match": {"status": status}})
        condition_num += 1
    if condition_num > 0:
        query.append({"term": {"task_type": "analysis"}})
        try:
            source = es_group_result.search(
                index=group_index_name,
                doc_type=group_index_type,
                body={"query": {"bool": {"must": query}}, "sort": [{"count": {"order": "desc"}}], "size": MAX_VALUE},
            )
        except Exception as e:
            raise e
    else:
        query.append({"term": {"task_type": "analysis"}})
        source = es.search(
            index=group_index_name,
            doc_type=group_index_type,
            body={"query": {"bool": {"must": query}}, "sort": [{"count": {"order": "desc"}}], "size": MAX_VALUE},
        )

    try:
        task_dict_list = source["hits"]["hits"]
    except:
        return None
    print "step yes"
    result = []
    for task_dict in task_dict_list:
        try:
            state = task_dict["_source"]["state"]
        except:
            state = ""
        try:
            status = task_dict["_source"]["status"]
        except:
            status = 0
        result.append(
            [
                task_dict["_source"]["task_name"],
                task_dict["_source"]["submit_date"],
                task_dict["_source"]["count"],
                state,
                status,
            ]
        )

    return result
def influenced_people(uid, mid, influence_style, date, default_number=20):
# uid 
# which weibo----mid, retweeted weibo ---seek for root_mid
# influence_style: retweeted(0) or comment(1)
    date1 = ts2datetime(datetime2ts(date)).replace('-', '')
    index_name = pre_index + date1
    index_flow_text = pre_text_index + date
    text_result = es.get(index=index_flow_text, doc_type=flow_text_index_type, id=mid)["_source"]
    temp_mid = text_result.get("root_mid",'') #判断微博是否是原创微博
    if temp_mid:
        mid_type = 1 # 非原创微博
    else:
        mid_type = 0 # 原创微博
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                        ]
                    }
                }
            }
        },
        "size": 30000
    }
    if RUN_TYPE:
        query_body["sort"] = {"user_fansnum":{"order":"desc"}}

    if int(mid_type) == 0:
        if int(influence_style) == 0: # origin weibo, all retweeted people
            query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"root_uid": uid}}, {"term": {"message_type": 3}}, {"term": {"root_mid": mid}}])
        else: # commented people
            query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 2}}, {"term": {"root_mid": mid}}])
    else:
        if int(influence_style) == 0: # origin weibo, all retweeted people
            query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 3}}, {"term": {"root_mid": temp_mid}}])
        else: # commented people
            query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 2}}, {"term": {"root_mid": temp_mid}}])
    search_results = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=query_body, _source=False, fields=["uid"], timeout=30)["hits"]["hits"]
    results = [] # uid_list
    if search_results:
        for item in search_results:
            if int(item["fields"]["uid"][0]) == int(uid):
                pass
            else:
                results.append(item["fields"]["uid"][0])
        results = list(set(results))
    else:
        results = []

    bci_index = "bci_" + date.replace('-','')

    if results:
        portrait_results = es_user_portrait.mget(index=user_portrait, doc_type=portrait_index_type, body={"ids": results}, fields=["domain", "topic_string", "activity_geo_dict","importance", "influence"])["docs"]
        bci_results = es_cluster.mget(index=bci_index, doc_type='bci', body={"ids":results}, fields=['user_index'])['docs']
    else:
        portrait_results = {}
        bci_results = {}


    in_portrait = []
    out_portrait = []
    in_portrait_info = []
    retweeted_domain = {}
    retweeted_topic = {}
    retweeted_geo = {}
    average_influence = 0
    total_influence = 0
    count = 0

    if bci_results:
        total_influence = 0
        for item in bci_results:
            if item['found']:
                total_influence += item['fields']['user_index'][0]
    try:
        average_influence = total_influence/len(results)
    except:
        average_influence = 0

    if portrait_results:
        for item in portrait_results:
            if item["found"]:
                temp = []
                count += 1
                temp.append(item['_id'])
                temp.append(item["fields"]["importance"][0])
                in_portrait.append(temp)
                temp_domain = item["fields"]["domain"][0].split('&')
                temp_topic = item["fields"]["topic_string"][0].split('&')
                temp_geo = json.loads(item["fields"]["activity_geo_dict"][0])[-1].keys()
                #total_influence += item["fields"]["influence"][0]
                retweeted_domain = aggregation(temp_domain, retweeted_domain)
                retweeted_topic = aggregation(temp_topic, retweeted_topic)
                retweeted_geo = aggregation(temp_geo, retweeted_geo)
            else:
                out_portrait.append(item['_id'])
        retweeted_domain = proportion(retweeted_domain)
        retweeted_topic = proportion(retweeted_topic)
        retweeted_geo = proportion(retweeted_geo)
        #try:
        #    average_influence = total_influence/count
        #except:
        #    average_influence = 0
    sorted_retweeted_domain = sorted(retweeted_domain.items(),key=lambda x:x[1], reverse=True)
    sorted_retweeted_topic = sorted(retweeted_topic.items(),key=lambda x:x[1], reverse=True)
    sorted_retweeted_geo = sorted(retweeted_geo.items(), key=lambda x:x[1], reverse=True)

    retweeted_results = dict()
    retweeted_results["domian"] = sorted_retweeted_domain[:5]
    retweeted_results["topic"] = sorted_retweeted_topic[:5]
    retweeted_results["geo"] = sorted_retweeted_geo[:5]
    retweeted_results["influence"] = average_influence
    in_portrait = sorted(in_portrait, key=lambda x:x[1], reverse=True)


    temp_list = []
    for item in in_portrait:
        temp_list.append(item[0])
    retweeted_results['in_portrait_number'] = len(temp_list)
    retweeted_results['out_portrait_number'] = len(out_portrait)
    in_portrait_url = get_user_url(temp_list[:default_number])
    out_portrait_url = get_user_url(out_portrait[:default_number])

    return_results = dict()
    return_results["influence_users"] = [in_portrait_url, out_portrait_url]
    return_results["influence_distribution"] = retweeted_results

    return return_results
def get_social_inter_content(uid1, uid2, type_mark):
    weibo_list = []
    # get two type relation about uid1 and uid2
    # search weibo list
    now_ts = int(time.time())
    # run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = datetime2ts(RUN_TEST_TIME)
    # uid2uname
    uid2uname = {}
    try:
        portrait_result = es_user_portrait.mget(
            index=portrait_index_name,
            doc_type=portrait_index_type,
            body={"ids": [uid1, uid2]},
            _source=False,
            fields=["uid", "uname"],
        )["docs"]
    except:
        portrait_result = []

    for item in portrait_result:
        uid = item["_id"]
        if item["found"] == True:
            uname = item["fields"]["uname"][0]
            uid2uname[uid] = uname
        else:
            uid2uname[uid] = "unknown"
    # iter date to search weibo list
    for i in range(7, 0, -1):
        iter_date_ts = now_date_ts - i * DAY
        iter_date = ts2datetime(iter_date_ts)
        flow_text_index_name = flow_text_index_name_pre + str(iter_date)
        query = []
        query.append({"bool": {"must": [{"term": {"uid": uid1}}, {"term": {"directed_uid": int(uid2)}}]}})
        if type_mark == "out":
            query.append({"bool": {"must": [{"term": {"uid": uid2}}, {"term": {"directed_uid": int(uid1)}}]}})
        try:
            flow_text_result = es_flow_text.search(
                index=flow_text_index_name,
                doc_type=flow_text_index_type,
                body={
                    "query": {"bool": {"should": query}},
                    "sort": [{"timestamp": {"order": "asc"}}],
                    "size": MAX_VALUE,
                },
            )["hits"]["hits"]
        except:
            flow_text_result = []
        for flow_text in flow_text_result:
            source = flow_text["_source"]
            weibo = {}
            weibo["timestamp"] = source["timestamp"]
            weibo["ip"] = source["ip"]
            weibo["geo"] = source["geo"]
            weibo["text"] = "\t".join(source["text"].split("&"))
            weibo["uid"] = source["uid"]
            weibo["uname"] = uid2uname[weibo["uid"]]
            weibo["directed_uid"] = str(source["directed_uid"])
            weibo["directed_uname"] = uid2uname[str(source["directed_uid"])]
            weibo_list.append(weibo)

    return weibo_list
Example #52
0
def search_full_text(uid, date):
    result = []
    ts = datetime2ts(date)
    next_ts = ts + 24 * 3600
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [{
                            "term": {
                                "uid": uid
                            }
                        }, {
                            "range": {
                                "timestamp": {
                                    "gte": ts,
                                    "lt": next_ts
                                }
                            }
                        }]
                    }
                }
            }
        },
        "size": 200
    }

    search_results = es.search(index='sensitive_user_text',
                               doc_type="user",
                               body=query_body)['hits']['hits']
    for item in search_results:
        detail = []
        source = item['_source']
        detail.append(source['sensitive'])
        detail.append(source['message_type'])
        ts = source['timestamp']
        re_time = time.strftime('%H:%M:%S', time.localtime(float(ts)))
        detail.append(re_time)
        geo_string = source['geo']
        geo_list = geo_string.split('/t')
        if len(geo_list) >= 3:
            geo = '/t'.join(geo_list[-2:])
        else:
            geo = geo_string
        detail.append(geo)
        detail.append(source['text'])
        date = date.replace('-', '')
        mid = source['mid']
        try:
            weibo_bci = es.get(index=date, doc_type='bci', id=uid)['_source']
        except:
            weibo_bci = {}
        retweeted_number = 0
        comment_number = 0
        if source['sensitive']:
            if int(source['message_type']) == 1:
                if weibo_bci:
                    if weibo_bci.get('s_origin_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(
                            weibo_bci['s_origin_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('s_origin_weibo_comment_detail', {}):
                        comment_detail = json.loads(
                            weibo_bci['s_origin_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            elif int(source['message_type']) == 2:
                if weibo_bci:
                    if weibo_bci.get('s_retweeted_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(
                            weibo_bci['s_retweeted_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('s_retweetd_weibo_comment_detail', {}):
                        comment_detail = json.loads(
                            weibo_bci['s_retweeted_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            else:
                pass
        else:
            if int(source['message_type']) == 1:
                if weibo_bci:
                    print weibo_bci['origin_weibo_retweeted_detail']
                    if weibo_bci.get('origin_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(
                            weibo_bci['origin_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('origin_weibo_comment_detail', {}):
                        comment_detail = json.loads(
                            weibo_bci['origin_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            elif int(source['message_type']) == 2:
                if weibo_bci:
                    if weibo_bci.get('retweeted_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(
                            weibo_bci['retweeted_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('retweetd_weibo_comment_detail', {}):
                        comment_detail = json.loads(
                            weibo_bci['retweeted_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            else:
                pass
        detail.append(retweeted_number)
        detail.append(comment_number)
        result.append(detail)

    return result
Example #53
0
def search_attribute_portrait(uid):
    return_results = {}
    index_name = "sensitive_user_portrait"
    index_type = "user"

    try:
        search_result = es.get(index=index_name, doc_type=index_type, id=uid)
    except:
        return None
    results = search_result['_source']
    #return_results = results
    user_sensitive = user_type(uid)
    if user_sensitive:
        #return_results.update(sensitive_attribute(uid))
        return_results['user_type'] = 1
        return_results['sensitive'] = 1
    else:
        return_results['user_type'] = 0
        return_results['sensitive'] = 0

    if results['photo_url'] == 0:
        results['photo_url'] = 'unknown'
    if results['location'] == 0:
        results['location'] = 'unknown'
    return_results['photo_url'] = results['photo_url']
    return_results['uid'] = results['uid']
    return_results['uname'] = results['uname']
    if return_results['uname'] == 0:
        return_results['uname'] = 'unknown'
    return_results['location'] = results['location']
    return_results['fansnum'] = results['fansnum']
    return_results['friendsnum'] = results['friendsnum']
    return_results['gender'] = results['gender']
    return_results['psycho_status'] = json.loads(results['psycho_status'])

    keyword_list = []
    if results['keywords']:
        keywords_dict = json.loads(results['keywords'])
        sort_word_list = sorted(keywords_dict.items(),
                                key=lambda x: x[1],
                                reverse=True)
        return_results['keywords'] = sort_word_list
    else:
        return_results['keywords'] = []

    return_results['retweet'] = search_retweet(uid, 0)
    return_results['follow'] = search_follower(uid, 0)
    return_results['at'] = search_mention(uid, 0)

    if results['ip'] and results['geo_activity']:
        ip_dict = json.loads(results['ip'])
        geo_dict = json.loads(results['geo_activity'])
        geo_description = active_geo_description(ip_dict, geo_dict)
        return_results['geo_description'] = geo_description
    else:
        return_results['geo_description'] = ''

    geo_top = []
    temp_geo = {}

    if results['geo_activity']:
        geo_dict = json.loads(results['geo_activity'])
        if len(geo_dict) < 7:
            ts = time.time()
            ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
            for i in range(7):
                ts = ts + 24 * 3600
                date = ts2datetime(ts).replace('-', '')
                if geo_dict.has_key(date):
                    pass
                else:
                    geo_dict[date] = {}
        activity_geo_list = sorted(geo_dict.items(),
                                   key=lambda x: x[0],
                                   reverse=False)
        geo_list = geo_dict.values()
        for k, v in activity_geo_list:
            sort_v = sorted(v.items(), key=lambda x: x[1], reverse=True)
            top_geo = [item[0] for item in sort_v]
            geo_top.append([k, top_geo[0:2]])
            for iter_key in v.keys():
                if temp_geo.has_key(iter_key):
                    temp_geo[iter_key] += v[iter_key]
                else:
                    temp_geo[iter_key] = v[iter_key]
        sort_geo_dict = sorted(temp_geo.items(),
                               key=lambda x: x[1],
                               reverse=True)
        return_results['top_activity_geo'] = sort_geo_dict
        return_results['activity_geo_distribute'] = geo_top
    else:
        return_results['top_activity_geo'] = []
        return_results['activity_geo_distribute'] = geo_top

    hashtag_dict = get_user_hashtag(uid)[0]
    return_results['hashtag'] = hashtag_dict
    '''
    emotion_result = {}
    emotion_conclusion_dict = {}
    if results['emotion_words']:
        emotion_words_dict = json.loads(results['emotion_words'])
        for word_type in emotion_mark_dict:
            try:
                word_dict = emotion_words_dict[word_type]
                if word_type=='126' or word_type=='127':
                    emotion_conclusion_dict[word_type] = word_dict
                sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True)
                word_list = sort_word_dict[:5]
            except:
                results['emotion_words'] = emotion_result
            emotion_result[emotion_mark_dict[word_type]] = word_list
    return_results['emotion_words'] = emotion_result
    '''

    # topic
    if results['topic']:
        topic_dict = json.loads(results['topic'])
        sort_topic_dict = sorted(topic_dict.items(),
                                 key=lambda x: x[1],
                                 reverse=True)
        return_results['topic'] = sort_topic_dict[:5]
    else:
        return_results['topic'] = []

    # domain
    if results['domain']:
        domain_string = results['domain']
        domain_list = domain_string.split('_')
        return_results['domain'] = domain_list
    else:
        return_results['domain'] = []
    '''
    # emoticon
    if results['emotion']:
        emotion_dict = json.loads(results['emotion'])
        sort_emotion_dict = sorted(emotion_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['emotion'] = sort_emotion_dict[:5]
    else:
        return_results['emotion'] = []
    '''

    # on_line pattern
    if results['online_pattern']:
        online_pattern_dict = json.loads(results['online_pattern'])
        sort_online_pattern_dict = sorted(online_pattern_dict.items(),
                                          key=lambda x: x[1],
                                          reverse=True)
        return_results['online_pattern'] = sort_online_pattern_dict[:5]
    else:
        return_results['online_pattern'] = []
    '''
    #psycho_feature
    if results['psycho_feature']:
        psycho_feature_list = results['psycho_feature'].split('_')
        return_results['psycho_feature'] = psycho_feature_list
    else:
        return_results['psycho_feature'] = []
    '''

    # self_state
    try:
        profile_result = es_user_profile.get(index='weibo_user',
                                             doc_type='user',
                                             id=uid)
        self_state = profile_result['_source'].get('description', '')
        return_results['description'] = self_state
    except:
        return_results['description'] = ''
    if results['importance']:
        query_body = {
            'query': {
                'range': {
                    'importance': {
                        'from': results['importance'],
                        'to': 100000
                    }
                }
            }
        }
        importance_rank = es.count(index='sensitive_user_portrait',
                                   doc_type='user',
                                   body=query_body)
        if importance_rank['_shards']['successful'] != 0:
            return_results['importance_rank'] = importance_rank['count']
        else:
            return_results['importance_rank'] = 0
    else:
        return_results['importance_rank'] = 0
    return_results['importance'] = results['importance']

    if results['activeness']:
        query_body = {
            'query': {
                'range': {
                    'activeness': {
                        'from': results['activeness'],
                        'to': 10000
                    }
                }
            }
        }
        activeness_rank = es.count(index='sensitive_user_portrait',
                                   doc_type='user',
                                   body=query_body)
        print activeness_rank
        if activeness_rank['_shards']['successful'] != 0:
            return_results['activeness_rank'] = activeness_rank['count']
        else:
            return_results['activeness_rank'] = 0
    else:
        return_results['activeness_rank'] = 0
    return_results['activeness'] = results['activeness']

    if results['influence']:
        query_body = {
            'query': {
                'range': {
                    'influence': {
                        'from': results['influence'],
                        'to': 100000
                    }
                }
            }
        }
        influence_rank = es.count(index='sensitive_user_portrait',
                                  doc_type='user',
                                  body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            return_results['influence_rank'] = influence_rank['count']
        else:
            return_results['influence_rank'] = 0
    else:
        return_results['influence_rank'] = 0
    return_results['influence'] = results['influence']

    if results['sensitive']:
        query_body = {
            'query': {
                'range': {
                    'sensitive': {
                        'from': results['sensitive'],
                        'to': 100000
                    }
                }
            }
        }
        influence_rank = es.count(index='sensitive_user_portrait',
                                  doc_type='user',
                                  body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            return_results['sensitive_rank'] = influence_rank['count']
        else:
            return_results['sensitive_rank'] = 0
    else:
        return_results['sensitive_rank'] = 0
    return_results['sensitive'] = results['sensitive']

    query_body = {'query': {"match_all": {}}}
    all_count = es.count(index='sensitive_user_portrait',
                         doc_type='user',
                         body=query_body)
    if all_count['_shards']['successful'] != 0:
        return_results['all_count'] = all_count['count']
    else:
        print 'es_sensitive_user_portrait error'
        return_results['all_count'] = 0

    # link
    link_ratio = results['link']
    return_results['link'] = link_ratio

    weibo_trend = get_user_trend(uid)[0]
    return_results['time_description'] = active_time_description(weibo_trend)
    return_results['time_trend'] = weibo_trend

    # user influence trend
    influence_detail = []
    influence_value = []
    attention_value = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
    for i in range(1, 8):
        date = ts2datetime(ts + i * 24 * 3600).replace('-', '')
        detail = [0] * 10
        try:
            item = es.get(index=date, doc_type='bci', id=uid)['_source']
            '''
            if return_results['utype']:
                detail[0] = item.get('s_origin_weibo_number', 0)
                detail[1] = item.get('s_retweeted_weibo_number', 0)
                detail[2] = item.get('s_origin_weibo_retweeted_total_number', 0) + item.get('s_retweeted_weibo_retweeted_total_number', 0)
                detail[3] = item.get('s_origin_weibo_comment_total_number', 0) + item.get('s_retweeted_weibo_comment_total_number', 0)
            else:
            '''
            if 1:
                detail[0] = item.get('origin_weibo_number', 0)
                detail[1] = item.get('retweeted_weibo_number', 0)
                detail[2] = item.get(
                    'origin_weibo_retweeted_total_number', 0) + item.get(
                        'retweeted_weibo_retweeted_total_number', 0)
                detail[3] = item.get(
                    'origin_weibo_comment_total_number', 0) + item.get(
                        'retweeted_weibo_comment_total_number', 0)
                retweeted_id = item.get('origin_weibo_top_retweeted_id', '0')
                detail[4] = retweeted_id
                if retweeted_id:
                    try:
                        detail[5] = es.get(index='sensitive_user_text',
                                           doc_type='user',
                                           id=retweeted_id)['_source']['text']
                    except:
                        detail[5] = ''
                else:
                    detail[5] = ''
                detail[6] = item.get('origin_weibo_retweeted_top_number', 0)
                detail[7] = item.get('origin_weibo_top_comment_id', '0')
                if detail[7]:
                    try:
                        detail[8] = es.get(index='sensitive_user_text',
                                           doc_type='user',
                                           id=detail[7])['_source']['text']
                    except:
                        detail[8] = ''
                else:
                    detail[8] = ''
                detail[9] = item.get('origin_weibo_comment_top_number', 0)
                attention_number = detail[2] + detail[3]
                attention = 2 / (1 + math.exp(-0.005 * attention_number)) - 1
            influence_value.append([date, item['user_index']])
            influence_detail.append([date, detail])
            attention_value.append(attention)
        except:
            influence_value.append([date, 0])
            influence_detail.append([date, detail])
            attention_value.append(0)
    return_results['influence_trend'] = influence_value
    return_results['common_influence_detail'] = influence_detail
    return_results['attention_degree'] = attention_value

    return return_results
Example #54
0
from search import sensitive_search_mention, sensitive_search_attention, sensitive_search_follower, sensitive_search_be_comment, \
                   sensitive_search_bidirect_interaction
from search import delete_action, search_identify_uid, get_activeness_trend
from search import get_activity_weibo, search_comment, search_be_comment, sensitive_search_comment
from search import search_bidirect_interaction, search_preference_attribute, search_sentiment_trend
from search import search_sentiment_weibo, get_influence_trend, search_remark, edit_remark, search_user_group
from sensitive_user_portrait.search_user_profile import es_get_source
from sensitive_user_portrait.global_utils import es_user_portrait as es
from sensitive_user_portrait.parameter import SOCIAL_DEFAULT_COUNT, SENTIMENT_TREND_DEFAULT_TYPE
from sensitive_user_portrait.parameter import DEFAULT_SENTIMENT, DAY
from sensitive_user_portrait.parameter import RUN_TYPE, RUN_TEST_TIME, WORK_TYPE
from sensitive_user_portrait.time_utils import ts2datetime, datetime2ts
from personal_influence import get_user_influence, influenced_detail, influenced_people, influenced_user_detail, statistics_influence_people, tag_vector, comment_on_influence, detail_weibo_influence, influence_summary

# use to test 13-09-08
test_time = datetime2ts(RUN_TEST_TIME)

# custom_attribute
attribute_index_name = 'custom_attribute'
attribute_index_type = 'attribute'

mod = Blueprint('attribute', __name__, url_prefix='/attribute')


@mod.route('/search_user_group/')
def ajax_search_user_group():
    uid = request.args.get('uid', '')
    results = search_user_group(uid)

    return json.dumps(results)
Example #55
0
def get_user_trend(uid):
    activity_dict = {}
    if RUN_TYPE:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)  # 2015-09-22
    else:
        now_date = "2013-09-08"
    ts = datetime2ts(now_date)

    #test
    #ts = datetime2ts('2013-09-08')
    timestamp = ts
    return_results = dict()
    return_sensitive_results = {}
    for i in range(1, 8):
        ts = timestamp - 24 * 3600 * i
        date = ts2datetime(ts)
        if WORK_TYPE == 0:
            index_name = act_index_pre + date
            sensitive_index_name = sen_act_index_pre + date
            exist_bool = es_cluster.indices.exists(index=index_name)
            sensitive_exist_bool = es_cluster.indices.exists(
                index=sensitive_index_name)
            if exist_bool:
                try:
                    tmp_act_result = es_cluster.get(index=index_name,
                                                    doc_type="activity",
                                                    id=uid)['_source']
                    results = tmp_act_result['activity_dict']
                except:
                    results = dict()
            else:
                results = dict()
            if sensitive_exist_bool:
                try:
                    tmp_sensitive_act_result = es_cluster.get(
                        index=sensitive_index_name,
                        doc_type="sensitive_activity",
                        id=uid)['_source']
                    sensitive_results = tmp_sensitive_ip_result[
                        'sensitive_activity_dict']
                except:
                    sensitive_results = dict()
            else:
                sensitive_results = dict()
        else:
            results = redis_activity.hget('activity_' + str(ts), uid)
            sensitive_results = redis_activity.hget(
                'sensitive_activity_' + str(ts), uid)
        if results:
            result_dict = json.loads(results)
            key_set = set(result_dict.keys())
            for key in result_dict.keys():
                return_results[int(key) * 900 + ts] = result_dict[key]
        else:
            pass
        if sensitive_results:
            sensitive_result_dict = json.loads(sensitive_results)
            for key in sensitive_result_dict.keys():
                return_sensitive_results[int(key) * 900 +
                                         ts] = sensitive_result_dict[key]
        else:
            pass

    trend_dict = {}
    for i in range(1, 8):
        ts = timestamp - i * 24 * 3600
        for j in range(0, 6):
            base_time = ts + j * 900 * 16
            num = 0
            for k in range(16):
                seg_time = base_time + k * 900
                if seg_time in return_results:
                    num += return_results[seg_time]
            trend_dict[base_time] = num

    sensitive_trend_dict = {}
    for i in range(1, 8):
        ts = timestamp - i * 24 * 3600
        for j in range(0, 6):
            base_time = ts + j * 900 * 16
            num = 0
            for k in range(16):
                seg_time = base_time + k * 900
                if seg_time in return_sensitive_results:
                    num += return_sensitive_results[seg_time]
            sensitive_trend_dict[base_time] = num

    ordinary_key_set = set(trend_dict.keys())
    sensitive_key_set = set(sensitive_trend_dict.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            trend_dict[key] += sensitive_trend_dict[key]
        else:
            trend_dict[key] = sensitive_trend_dict[key]

    sorted_dict = sorted(trend_dict.items(), key=lambda x: x[0], reverse=False)
    sorted_sensitive_dict = sorted(sensitive_trend_dict.items(),
                                   key=lambda x: x[0],
                                   reverse=False)
    return [sorted_dict, sorted_sensitive_dict]  # total and sensitive
def full_text_search(keywords, uid, start_time, end_time, size):
    results = []
    uid_list = []
    user_profile_list = []
    query_body = {
        "query": {
            "filtered":{
                "filter":{
                    "bool": {
                        "must": []
                    }
                }
            }
        },
        "size":size,
        "sort":{"timestamp":{"order": 'desc'}}
    }

    if RUN_TYPE:
        query_body["sort"] = {"user_fansnum":{"order": 'desc'}}

    if uid:
        query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"term":{"uid":uid}})

    if keywords:
        keywords_list = keywords.split(',')
        for word in keywords_list:
            query_body["query"]["filtered"]["filter"]["bool"]["must"].append({'wildcard':{'text':{'wildcard':'*'+word+'*'}}})

    index_list = []
    exist_bool = es_flow_text.indices.exists(index="flow_text_"+end_time)
    if start_time:
        start_ts = datetime2ts(start_time)
        end_ts = datetime2ts(end_time)
        ts = end_ts
        while 1:
            index_name = "flow_text_"+ts2datetime(ts)
            exist_bool = es_flow_text.indices.exists(index=index_name)
            if exist_bool:
                index_list.append(index_name)
            if ts == start_ts:
                break
            else:
                ts -= 3600*24

    print index_list
    #  没有可行的es
    if not index_list:
        return []

    search_results = es_flow_text.search(index=index_list, doc_type="text", body=query_body)["hits"]["hits"]
    for item in search_results:
        uid_list.append(item['_source']['uid'])
    history_max = get_history_max()
    personal_field = ["nick_name", "fansnum", "statusnum","user_location"]
    user_info = get_user_profile(uid_list, personal_field)
    bci_results = ES_CLUSTER_FLOW1.mget(index="bci_history", doc_type="bci", body={"ids":uid_list}, _source=False, fields=["bci_day_last"])["docs"]
    sensitive_results = es_sensitive_history.mget(index="sensitive_history", doc_type="sensitive", body={"ids":uid_list}, _source=False, fields=["last_value"])["docs"]

    count = 0
    for item in search_results:
        item = item['_source']
        uid_list.append(item['uid'])
        iter_item = []
        iter_item.append(item['uid'])
        iter_item.append(user_info[count][1])
        iter_item.append(item['text'])
        iter_item.append(ts2date(item['timestamp']))
        iter_item.append(item['geo'])
        if item.get("sensitive_words_string", ''):
            iter_item.append(item['sensitive_words_string'].split('&'))
        else:
            iter_item.append([])
        iter_item.append(item.get('retweeted', 0))
        iter_item.append(item.get('comment', 0))
        count += 1
        results.append(iter_item)

    user_set = set()
    count = 0
    for item in user_info:
        if item[0] in user_set:
            continue
        else:
            user_set.add(item[0])
        if bci_results[count]["found"]:
            bci_value = bci_results[count]["fields"]["bci_day_last"][0]
            item.append(normalize_index(bci_value, history_max["max_bci"]))
        else:
            item.append(0)
        if sensitive_results[count]["found"]:
            sensitive_value = sensitive_results[count]['fields']['last_value'][0]
            item.append(normalize_index(sensitive_value, history_max["max_sensitive"]))
        else:
            item.append(0)
        user_profile_list.append(item)

    return results, user_profile_list
Example #57
0
def full_text_search(keywords, uid, start_time, end_time, size):
    results = []
    uid_list = []
    user_profile_list = []
    query_body = {
        "query": {
                    "bool": {
                        "must": []
                    }
        },
        "size":size,
        "sort":{"timestamp":{"order": 'desc'}}
    }

    if RUN_TYPE:
        query_body["sort"] = {"user_fansnum":{"order": 'desc'}}

    if uid:
        query_body["query"]["bool"]["must"].append({"term":{"uid":uid}})

    if keywords:
        keywords_list = keywords.split(',')
        for word in keywords_list:
            query_body["query"]["bool"]["must"].append({'wildcard':{'text':{'wildcard':'*'+word+'*'}}})

    index_list = []
    exist_bool = es_flow_text.indices.exists(index="flow_text_"+end_time)
    if start_time:
        start_ts = datetime2ts(start_time)
        end_ts = datetime2ts(end_time)
        ts = end_ts
        while 1:
            index_name = "flow_text_"+ts2datetime(ts)
            exist_bool = es_flow_text.indices.exists(index=index_name)
            if exist_bool:
                index_list.append(index_name)
            if ts == start_ts:
                break
            else:
                ts -= 3600*24

    print index_list
    #  没有可行的es
    if not index_list:
        return [[], []]

    search_results = es_flow_text.search(index=index_list, doc_type="text", body=query_body)["hits"]["hits"]
    for item in search_results:
        uid_list.append(item['_source']['uid'])
    user_info = []
    if uid_list:
        history_max = get_history_max()
        personal_field = ["nick_name", "fansnum", "statusnum","user_location"]
        user_info = get_user_profile(uid_list, personal_field)
        bci_results = ES_CLUSTER_FLOW1.mget(index="bci_history", doc_type="bci", body={"ids":uid_list}, _source=False, fields=["bci_day_last"])["docs"]
        in_portrait = es_user_portrait.mget(index="sensitive_user_portrait", doc_type="user", body={"ids":uid_list}, _source=False)["docs"]
        sensitive_results = es_sensitive_history.mget(index="sensitive_history", doc_type="sensitive", body={"ids":uid_list}, _source=False, fields=["last_value"])["docs"]
    print "len search: ", len(search_results)

    count = 0
    # uid uname text date geo sensitive_words retweeted comment
    for item in search_results:
        item = item['_source']
        uid_list.append(item['uid'])
        iter_item = []
        iter_item.append(item['uid'])
        iter_item.append(user_info[count][1])
        iter_item.append(item['text'])
        iter_item.append(ts2date(item['timestamp']))
        iter_item.append(item['geo'])
        if item.get("sensitive_words_string", ''):
            iter_item.append(item['sensitive_words_string'].split('&'))
        else:
            iter_item.append([])
        iter_item.append(item.get('retweeted', 0))
        iter_item.append(item.get('comment', 0))
        count += 1
        results.append(iter_item)

    user_set = set()
    count = 0
    # uid "nick_name", "fansnum", "statusnum","user_location", bci, sensitive
    for item in user_info:
        if item[0] in user_set:
            continue
        else:
            user_set.add(item[0])
        if bci_results[count]["found"]:
            if bci_results[count].has_key("fields"):
                bci_value = bci_results[count]["fields"]["bci_day_last"][0]
            else:
                bci_value = 0
            item.append(normalize_index(bci_value, history_max["max_bci"]))
        else:
            item.append(0)
        if sensitive_results[count]["found"]:
            if sensitive_results[count].has_key("fields"):
                sensitive_value = sensitive_results[count]['fields']['last_value'][0]
            else:
                sensitive_value = 0
            item.append(normalize_index(sensitive_value, history_max["max_sensitive"]))
        else:
            item.append(0)
        if in_portrait[count]["found"]:
            item.append("1")
        else:
            item.append("0")
        user_profile_list.append(item)

    return results, user_profile_list
Example #58
0
def sensitive_attribute(uid, date):
    results = {}
    portrait = {}
    utype = user_type(uid)
    if not utype:
        results['utype'] = 0
        return results
    results['utype'] = 1

    results['uid'] = uid
    portrait_result = es.get(index='sensitive_user_portrait',
                             doc_type='user',
                             id=uid)['_source']
    results['uname'] = portrait_result['uname']
    if portrait_result['uname'] == 0:
        results['uname'] = 'unknown'
    if portrait_result['photo_url'] == 0:
        portrait_result['photo_url'] = 'unknown'
    if portrait_result['location'] == 0:
        portrait_result['location'] = 'unknown'
    results['photo_url'] = portrait_result['photo_url']

    # sensitive weibo number statistics
    date = ts2datetime(time.time() - 24 * 3600).replace('-', '')
    date = '20130907'  # test
    influence_results = []
    try:
        influence_results = es.get(index=date, doc_type='bci',
                                   id=uid)['_source']
        results['sensitive_origin_weibo_number'] = influence_results.get(
            's_origin_weibo_number', 0)
        results['sensitive_retweeted_weibo_number'] = influence_results.get(
            's_retweeted_weibo_number', 0)
        results['sensitive_comment_weibo_number'] = int(
            influence_results.get('s_comment_weibo_number', 0))
        results[
            'sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get(
                's_retweeted_weibo_retweeted_total_number', 0)
        results[
            'sensitive_origin_weibo_retweeted_total_number'] = influence_results.get(
                's_origin_weibo_retweeted_total_number', 0)
        results[
            'sensitive_origin_weibo_comment_total_number'] = influence_results.get(
                's_origin_weibo_comment_total_number', 0)
        results[
            'sensitive_retweeted_weibo_comment_total_number'] = influence_results.get(
                's_retweeted_weibo_comment_total_number', 0)
    except:
        results['sensitive_origin_weibo_number'] = 0
        results['sensitive_retweeted_weibo_number'] = 0
        results['sensitive_comment_weibo_number'] = 0
        results['sensitive_origin_weibo_retweeted_total_number'] = 0
        results['sensitive_origin_weibo_comment_total_number'] = 0
        results['sensitive_retweeted_weibo_retweeted_total_number'] = 0
        results['sensitive_retweeted_weibo_comment_total_number'] = 0

    try:
        item = es.get(index=date, doc_type='bci', id=uid)['_source']
    except:
        item = {}
    results['origin_weibo_total_number'] = item.get(
        'origin_weibo_number', 0) + results['sensitive_origin_weibo_number']
    results['retweeted_weibo_total_number'] = item.get(
        'retweeted_weibo_number',
        0) + results['sensitive_retweeted_weibo_number']
    results['comment_weibo_total_number'] = int(
        item.get('comment_weibo_number', 0)) + int(
            results['sensitive_comment_weibo_number'])
    results['origin_weibo_retweeted_total_number'] = item.get(
        'origin_weibo_retweeted_total_number',
        0) + results['sensitive_origin_weibo_retweeted_total_number']
    results['origin_weibo_comment_total_number'] = item.get(
        'origin_weibo_comment_total_number',
        0) + results['sensitive_origin_weibo_comment_total_number']
    results['retweeted_weibo_retweeted_total_number'] = item.get(
        'retweeted_weibo_retweeted_total_number',
        0) + results['sensitive_retweeted_weibo_retweeted_total_number']
    results['retweeted_weibo_comment_total_number'] = item.get(
        'retweeted_weibo_comment_total_number',
        0) + results['sensitive_retweeted_weibo_comment_total_number']

    results['sensitive_text'] = sort_sensitive_text(uid)

    results['sensitive_geo_distribute'] = []
    results['sensitive_time_distribute'] = get_user_trend(uid)[1]
    results['sensitive_hashtag'] = []
    results['sensitive_words'] = []
    results['sensitive_hashtag_dict'] = []
    results['sensitive_words_dict'] = []
    results['sensitive_hashtag_description'] = ''

    sentiment_trend = user_sentiment_trend(uid)
    emotion_number = sentiment_trend[0]
    results['negetive_index'] = float(emotion_number[2]) / (
        emotion_number[2] + emotion_number[1] + emotion_number[0])
    results['negetive_influence'] = float(emotion_number[1]) / (
        emotion_number[2] + emotion_number[1] + emotion_number[0])
    sentiment_dict = sentiment_trend[1]
    datetime = ts2datetime(time.time()).replace('-', '')
    return_sentiment = dict()
    return_sentiment['positive'] = []
    return_sentiment['neutral'] = []
    return_sentiment['negetive'] = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
    for i in range(1, 8):
        ts = ts + 24 * 3600
        date = ts2datetime(ts).replace('-', '')
        temp = sentiment_dict.get(date, {})
        return_sentiment['positive'].append([temp.get('positive', 0), date])
        return_sentiment['negetive'].append([temp.get('negetive', 0), date])
        return_sentiment['neutral'].append([temp.get('neutral', 0), date])
    results['sentiment_trend'] = return_sentiment

    if 1:
        portrait_results = es.get(index="sensitive_user_portrait",
                                  doc_type='user',
                                  id=uid)['_source']
        results['politics_trend'] = portrait_results['politics_trend']
        results['domain'] = portrait_results['domain']
        results['sensitive'] = portrait_results['sensitive']
        temp_hashtag = portrait_results['sensitive_hashtag_dict']
        temp_sensitive_words = portrait_results['sensitive_words_dict']
        temp_sensitive_geo = portrait_results['sensitive_geo_activity']
        if temp_sensitive_geo:
            sensitive_geo_dict = json.loads(temp_sensitive_geo)
            if len(sensitive_geo_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_geo_dict.has_key(date):
                        pass
                    else:
                        sensitive_geo_dict[date] = {}
            sorted_sensitive_geo = sorted(sensitive_geo_dict.items(),
                                          key=lambda x: x[0],
                                          reverse=False)
            sensitive_geo_list = []
            for k, v in sorted_sensitive_geo:
                temp_list = []
                sorted_geo = sorted(v.items(),
                                    key=lambda x: x[1],
                                    reverse=True)[0:2]
                # print sorted_geo
                temp_list.extend([k, sorted_geo])
                sensitive_geo_list.append(temp_list)
            results['sensitive_geo_distribute'] = sensitive_geo_list
        if temp_hashtag:
            hashtag_dict = json.loads(
                portrait_results['sensitive_hashtag_dict'])
            if len(hashtag_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if hashtag_dict.has_key(date):
                        hashtag_dict_detail = hashtag_dict[date]
                        hashtag_dict[date] = sorted(
                            hashtag_dict_detail.items(),
                            key=lambda x: x[1],
                            reverse=True)
                    else:
                        hashtag_dict[date] = {}
            results['sensitive_hashtag_description'] = hashtag_description(
                hashtag_dict)
        else:
            hashtag_dict = {}
        if temp_sensitive_words:
            sensitive_words_dict = json.loads(temp_sensitive_words)
            if len(sensitive_words_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_words_dict.has_key(date):
                        pass
                    else:
                        sensitive_words_dict[date] = {}
        else:
            sensitive_words_dict = {}
        date = ts2datetime(time.time() - 24 * 3600).replace('-', '')
        date = '20130907'
        today_sensitive_words = sensitive_words_dict.get(date, {})
        results['today_sensitive_words'] = today_sensitive_words
        all_hashtag_dict = {}
        for item in hashtag_dict:
            detail_hashtag_dict = hashtag_dict[item]
            for key in detail_hashtag_dict:
                if all_hashtag_dict.has_key(key[0]):
                    all_hashtag_dict[key[0]] += key[1]
                else:
                    all_hashtag_dict[key[0]] = key[1]

        all_sensitive_words_dict = {}
        for item in sensitive_words_dict:
            detail_words_dict = sensitive_words_dict[item]
            for key in detail_words_dict:
                if all_sensitive_words_dict.has_key(key):
                    all_sensitive_words_dict[key] += detail_words_dict[key]
                else:
                    all_sensitive_words_dict[key] = detail_words_dict[key]

        sorted_hashtag = sorted(all_hashtag_dict.items(),
                                key=lambda x: x[1],
                                reverse=True)
        sorted_words = sorted(all_sensitive_words_dict.items(),
                              key=lambda x: x[1],
                              reverse=True)
        sorted_hashtag_dict = sorted(hashtag_dict.items(),
                                     key=lambda x: x[0],
                                     reverse=False)
        sorted_words_dict = sorted(sensitive_words_dict.items(),
                                   key=lambda x: x[0],
                                   reverse=False)
        new_sorted_dict = sort_sensitive_words(sorted_words)
        results['sensitive_hashtag'] = sorted_hashtag
        results['sensitive_words'] = new_sorted_dict
        results['sensitive_hashtag_dict'] = sorted_hashtag_dict
        results['sensitive_words_dict'] = sorted_words_dict

    results['sensitive_retweet'] = search_retweet(uid, 1)
    results['sensitive_follow'] = search_follower(uid, 1)
    results['sensitive_at'] = search_mention(uid, 1)

    return results