Ejemplo n.º 1
0
def search_mention(uid, sensitive):
    date = ts2datetime(time.time()).replace("-", "")
    stat_results = dict()
    results = dict()
    test_ts = time.time()
    test_ts = datetime2ts("2013-09-07")
    for i in range(0, 7):
        ts = test_ts - i * 24 * 3600
        date = ts2datetime(ts).replace("-", "")
        if not sensitive:
            at_temp = r_cluster.hget("at_" + str(date), str(uid))
        else:
            at_temp = r_cluster.hget("sensitive_at_" + str(date), str(uid))
        if not at_temp:
            continue
        else:
            result_dict = json.loads(at_temp)
        for at_uid in result_dict:
            if stat_results.has_key(at_uid):
                stat_results[uid] += result_dict[at_uid]
            else:
                stat_results[uid] = result_dict[at_uid]
    if not stat_results:
        return [None, 0]

    in_status = identify_uid_list_in(result_dict.keys())
    for at_uid in result_dict:
        if at_uid in in_status:
            results[at_uid] = [result_dict[at_uid], "1"]
        else:
            results[at_uid] = [result_dict[at_uid], "0"]

    sorted_results = sorted(results.items(), key=lambda x: x[1][0], reverse=True)
    return [sorted_results[0:20], len(results)]
Ejemplo n.º 2
0
def get_user_geo(uid):
    results = []
    user_geo_result = {}
    user_ip_dict = {}
    user_ip_result = {}  # ordinary ip
    user_sensitive_ip_result = {}  # sensitive ip
    now_ts = time.time()
    now_date = ts2datetime(now_ts)  # 2015-09-22
    ts = datetime2ts(now_date)

    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1, 8):
        ts = ts - 3600 * 24
        date = ts2datetime(ts).replace('-', '')
        results = r_cluster.hget('ip_' + str(date), uid)
        sensitive_results = r_cluster.hget('sensitive_ip' + str(date), uid)
        if results:
            ip_results = json.loads(results)
            for ip in ip_results:
                if user_ip_result.has_key(ip):
                    user_ip_result[ip] += ip_results[ip]
                else:
                    user_ip_result[ip] = ip_results[ip]

        if sensitive_results:
            sensitive_ip_results = json.loads(sensitive_results)
            for ip in sensitive_ip_results:
                if user_sensitive_ip_result.has_key(ip):
                    user_sensitive_ip_result[ip] += sensitive_ip_results[ip]
                else:
                    user_sensitive_ip_result[ip] = sensitive_ip_results[ip]

    ordinary_key_set = set(user_ip_result.keys())
    sensitive_key_set = set(user_sensitive_ip_result.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            user_ip_result[key] += user_sensitive_ip_result[key]
        else:
            user_ip_result[key] = user_sensitive_ip_result[key]

    user_geo_dict = ip2geo(user_ip_result)
    sorted_user_geo_dict = sorted(user_geo_dict.items(),
                                  key=lambda x: x[1],
                                  reverse=True)
    sensitive_user_geo_dict = ip2geo(user_sensitive_ip_result)
    sorted_sensitive_user_geo_dict = sorted(sensitive_user_geo_dict.items(),
                                            key=lambda x: x[1],
                                            reverse=True)

    return_list = []
    return_list = [sorted_user_geo_dict,
                   sorted_sensitive_user_geo_dict]  # total and sensitive
    return return_list
Ejemplo n.º 3
0
def get_user_geo(uid):
    results = []
    user_geo_result = {}
    user_ip_dict = {}
    user_ip_result = {} # ordinary ip
    user_sensitive_ip_result = {} # sensitive ip
    now_ts = time.time()
    now_date = ts2datetime(now_ts) # 2015-09-22
    ts = datetime2ts(now_date)

    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1,8):
        ts = ts - 3600*24
        date = ts2datetime(ts).replace('-','')
        results = r_cluster.hget('ip_'+str(date), uid)
        sensitive_results = r_cluster.hget('sensitive_ip'+str(date), uid)
        if results:
            ip_results = json.loads(results)
            for ip in ip_results:
                if user_ip_result.has_key(ip):
                    user_ip_result[ip] += ip_results[ip]
                else:
                    user_ip_result[ip] = ip_results[ip]

        if sensitive_results:
            sensitive_ip_results = json.loads(sensitive_results)
            for ip in sensitive_ip_results:
                if user_sensitive_ip_result.has_key(ip):
                    user_sensitive_ip_result[ip] += sensitive_ip_results[ip]
                else:
                    user_sensitive_ip_result[ip] = sensitive_ip_results[ip]

    ordinary_key_set = set(user_ip_result.keys())
    sensitive_key_set = set(user_sensitive_ip_result.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            user_ip_result[key] += user_sensitive_ip_result[key]
        else:
            user_ip_result[key] = user_sensitive_ip_result[key]

    user_geo_dict = ip2geo(user_ip_result)
    sorted_user_geo_dict = sorted(user_geo_dict.items(), key=lambda x:x[1], reverse=True)
    sensitive_user_geo_dict = ip2geo(user_sensitive_ip_result)
    sorted_sensitive_user_geo_dict = sorted(sensitive_user_geo_dict.items(), key=lambda x:x[1], reverse=True)


    return_list = []
    return_list = [sorted_user_geo_dict, sorted_sensitive_user_geo_dict] # total and sensitive
    return return_list
Ejemplo n.º 4
0
def get_sensitive_user_detail(uid_list, date, sensitive):
    results = []
    index_name = str(date).replace('-','') # index_name:20130901
    user_bci_results = es_cluster.mget(index=index_name, doc_type='bci', body={'ids':uid_list}, _source=True)['docs']
    user_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids":uid_list}, _source=True)['docs']
    for i in range(0, len(uid_list)):
        personal_info = ['']*6
        uid = uid_list[i]
        personal_info[0] = uid_list[i]
        if user_profile_results[i]['found']:
            profile_dict = user_profile_results[i]['_source']
            personal_info[1] = profile_dict['nick_name']
            personal_info[2] = profile_dict['user_location']
            personal_info[3] = profile_dict['fansnum']
            personal_info[4] = profile_dict['statusnum']
        if user_bci_results[i]['found']:
            personal_info[5] = user_bci_results[i]['_source'].get('user_index', 0)
        else:
            personal_info[5] = 0
        if sensitive:
            sensitive_words = r_cluster.hget('sensitive_' + index_name, str(uid))
            if sensitive_words:
                sensitive_dict = json.loads(sensitive_words)
                personal_info.append(sensitive_dict.keys())
            else:
                personal_info.append([])
        results.append(personal_info)
    return results
Ejemplo n.º 5
0
def get_user_sensitive_words(uid):
    user_sensitive_words_dict = {}
    now_ts = time.time()
    now_date = ts2datetime(now_ts)  # 2015-09-22
    ts = datetime2ts(now_date)

    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1, 8):
        ts = ts - 3600 * 24
        date = ts2datetime(ts).replace('-', '')
        results = r_cluster.hget('sensitive_' + str(date), uid)
        if results:
            sensitive_words_dict = json.loads(results)
            for word in sensitive_words_dict:
                if user_sensitive_words_dict.has_key(word):
                    user_sensitive_words_dict[word] += sensitive_words_dict[
                        word]
                else:
                    user_sensitive_words_dict[word] = sensitive_words_dict[
                        word]
    sort_sensitive_words_dict = sorted(user_sensitive_words_dict.items(),
                                       key=lambda x: x[1],
                                       reverse=True)

    return sort_sensitive_words_dict
Ejemplo n.º 6
0
def get_user_hashtag(uid):
    user_hashtag_dict = {}
    sensitive_user_hashtag_dict = {}
    now_ts = time.time()
    now_date = ts2datetime(now_ts)  # 2015-09-22
    ts = datetime2ts(now_date)

    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1, 8):
        ts = ts - 3600 * 24
        date = ts2datetime(ts).replace('-', '')
        results = r_cluster.hget('hashtag_' + str(date), uid)
        sensitive_results = r_cluster.hget('sensitive_hashtag_' + str(date),
                                           uid)
        if results:
            hashtag_dict = json.loads(results)
            for hashtag in hashtag_dict:
                if user_hashtag_dict.has_key(hashtag):
                    user_hashtag_dict[hashtag] += hashtag_dict[hashtag]
                else:
                    user_hashtag_dict[hashtag] = hashtag_dict[hashtag]
        if sensitive_results:
            sensitive_hashtag_dict = json.loads(sensitive_results)
            for hashtag in sensitive_hashtag_dict:
                if sensitive_user_hashtag_dict.has_key(hashtag):
                    sensitive_user_hashtag_dict[
                        hashtag] += sensitive_hashtag_dict[hashtag]
                else:
                    sensitive_user_hashtag_dict[
                        hashtag] = sensitive_hashtag_dict[hashtag]
    ordinary_key_set = set(user_hashtag_dict.keys())
    sensitive_key_set = set(sensitive_user_hashtag_dict.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            user_hashtag_dict[key] += sensitive_user_hashtag_dict[key]
        else:
            user_hashtag_dict[key] = sensitive_user_hashtag_dict[key]

    sort_hashtag_dict = sorted(user_hashtag_dict.items(),
                               key=lambda x: x[1],
                               reverse=True)
    sort_sensitive_dict = sorted(sensitive_user_hashtag_dict.items(),
                                 key=lambda x: x[1],
                                 reverse=True)
    return [sort_hashtag_dict, sort_sensitive_dict]
Ejemplo n.º 7
0
def get_user_hashtag(uid):
    user_hashtag_dict = {}
    sensitive_user_hashtag_dict = {}
    now_ts = time.time()
    now_date = ts2datetime(now_ts) # 2015-09-22
    ts = datetime2ts(now_date)

    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1,8):
        ts = ts - 3600*24
        date = ts2datetime(ts).replace('-','')
        results = r_cluster.hget('hashtag_'+str(date), uid)
        sensitive_results = r_cluster.hget('sensitive_hashtag_'+str(date), uid)
        if results:
            hashtag_dict = json.loads(results)
            for hashtag in hashtag_dict:
                if user_hashtag_dict.has_key(hashtag):
                    user_hashtag_dict[hashtag] += hashtag_dict[hashtag]
                else:
                    user_hashtag_dict[hashtag] = hashtag_dict[hashtag]
        if sensitive_results:
            sensitive_hashtag_dict = json.loads(sensitive_results)
            for hashtag in sensitive_hashtag_dict:
                if sensitive_user_hashtag_dict.has_key(hashtag):
                    sensitive_user_hashtag_dict[hashtag] += sensitive_hashtag_dict[hashtag]
                else:
                    sensitive_user_hashtag_dict[hashtag] = sensitive_hashtag_dict[hashtag]
    ordinary_key_set = set(user_hashtag_dict.keys())
    sensitive_key_set = set(sensitive_user_hashtag_dict.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            user_hashtag_dict[key] += sensitive_user_hashtag_dict[key]
        else:
            user_hashtag_dict[key] = sensitive_user_hashtag_dict[key]

    sort_hashtag_dict = sorted(user_hashtag_dict.items(), key=lambda x:x[1], reverse=True)
    sort_sensitive_dict = sorted(sensitive_user_hashtag_dict.items(), key=lambda x:x[1], reverse=True)
    return [sort_hashtag_dict, sort_sensitive_dict]
Ejemplo n.º 8
0
def search_mention(uid, sensitive):
    date = ts2datetime(time.time()).replace('-', '')
    stat_results = dict()
    results = dict()
    test_ts = time.time()
    test_ts = datetime2ts('2013-09-07')
    for i in range(0, 7):
        ts = test_ts - i * 24 * 3600
        date = ts2datetime(ts).replace('-', '')
        if not sensitive:
            at_temp = r_cluster.hget('at_' + str(date), str(uid))
        else:
            at_temp = r_cluster.hget('sensitive_at_' + str(date), str(uid))
        if not at_temp:
            continue
        else:
            result_dict = json.loads(at_temp)
        for at_uid in result_dict:
            if stat_results.has_key(at_uid):
                stat_results[uid] += result_dict[at_uid]
            else:
                stat_results[uid] = result_dict[at_uid]
    if not stat_results:
        return [None, 0]

    in_status = identify_uid_list_in(result_dict.keys())
    for at_uid in result_dict:
        if at_uid in in_status:
            results[at_uid] = [result_dict[at_uid], '1']
        else:
            results[at_uid] = [result_dict[at_uid], '0']

    sorted_results = sorted(results.items(),
                            key=lambda x: x[1][0],
                            reverse=True)
    return [sorted_results[0:20], len(results)]
Ejemplo n.º 9
0
def get_user_sensitive_words(uid):
    user_sensitive_words_dict = {}
    now_ts = time.time()
    now_date = ts2datetime(now_ts) # 2015-09-22
    ts = datetime2ts(now_date)

    #test
    ts = datetime2ts('2013-09-08')
    for i in range(1,8):
        ts = ts - 3600*24
        date = ts2datetime(ts).replace('-','')
        results = r_cluster.hget('sensitive_'+str(date), uid)
        if results:
            sensitive_words_dict = json.loads(results)
            for word in sensitive_words_dict:
                if user_sensitive_words_dict.has_key(word):
                    user_sensitive_words_dict[word] += sensitive_words_dict[word]
                else:
                    user_sensitive_words_dict[word] = sensitive_words_dict[word]
    sort_sensitive_words_dict = sorted(user_sensitive_words_dict.items(), key=lambda x:x[1], reverse=True)

    return sort_sensitive_words_dict
Ejemplo n.º 10
0
def get_sensitive_user_detail(uid_list, date, sensitive):
    results = []
    index_name = str(date).replace('-', '')  # index_name:20130901
    user_bci_results = es_cluster.mget(index=index_name,
                                       doc_type='bci',
                                       body={'ids': uid_list},
                                       _source=True)['docs']
    user_profile_results = es_user_profile.mget(index="weibo_user",
                                                doc_type="user",
                                                body={"ids": uid_list},
                                                _source=True)['docs']
    for i in range(0, len(uid_list)):
        personal_info = [''] * 6
        uid = uid_list[i]
        personal_info[0] = uid_list[i]
        if user_profile_results[i]['found']:
            profile_dict = user_profile_results[i]['_source']
            personal_info[1] = profile_dict['nick_name']
            personal_info[2] = profile_dict['user_location']
            personal_info[3] = profile_dict['fansnum']
            personal_info[4] = profile_dict['statusnum']
        if user_bci_results[i]['found']:
            personal_info[5] = user_bci_results[i]['_source'].get(
                'user_index', 0)
        else:
            personal_info[5] = 0
        if sensitive:
            sensitive_words = r_cluster.hget('sensitive_' + index_name,
                                             str(uid))
            if sensitive_words:
                sensitive_dict = json.loads(sensitive_words)
                personal_info.append(sensitive_dict.keys())
            else:
                personal_info.append([])
        results.append(personal_info)
    return results
Ejemplo n.º 11
0
def get_user_trend(uid):
    activity_dict = {}
    now_ts = time.time()
    date = ts2datetime(now_ts)
    ts = datetime2ts(date)

    #test
    ts = datetime2ts('2013-09-08')
    timestamp = ts
    results = dict()
    sensitive_results = {}
    for i in range(1,8):
        ts = timestamp -24*3600*i
        date = ts2datetime(ts).replace('-','')
        result_string = r_cluster.hget('activity_'+str(date), uid)
        sensitive_string = r_cluster.hget('sensitive_activity_'+str(date), uid)
        if result_string:
            result_dict = json.loads(result_string)
            key_set = set(result_dict.keys())
            for key in result_dict.keys():
                results[int(key)*900+ts] = result_dict[key]
        else:
            pass
        if sensitive_string:
            sensitive_result_dict = json.loads(sensitive_string)
            for key in sensitive_result_dict.keys():
                sensitive_results[int(key)*900+ts] = sensitive_result_dict[key]
        else:
            pass

    trend_dict = {}
    for i in range(1,8):
        ts = timestamp - i*24*3600
        for j in range(0,6):
            base_time = ts + j*900*16
            num = 0
            for k in range(16):
                seg_time = base_time + k*900
                if seg_time in results:
                    num += results[seg_time]
            trend_dict[base_time] = num

    sensitive_trend_dict = {}
    for i in range(1,8):
        ts = timestamp - i*24*3600
        for j in range(0,6):
            base_time = ts + j*900*16
            num = 0
            for k in range(16):
                seg_time = base_time + k*900
                if seg_time in sensitive_results:
                    num += sensitive_results[seg_time]
            sensitive_trend_dict[base_time] = num

    ordinary_key_set = set(trend_dict.keys())
    sensitive_key_set = set(sensitive_trend_dict.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            trend_dict[key] += sensitive_trend_dict[key]
        else:
            trend_dict[key] = sensitive_trend_dict[key]

    sorted_dict = sorted(trend_dict.items(), key=lambda x:x[0], reverse=False)
    sorted_sensitive_dict = sorted(sensitive_trend_dict.items(), key=lambda x:x[0], reverse=False)
    return [sorted_dict, sorted_sensitive_dict] # total and sensitive
Ejemplo n.º 12
0
def get_user_trend(uid):
    activity_dict = {}
    now_ts = time.time()
    date = ts2datetime(now_ts)
    ts = datetime2ts(date)

    #test
    ts = datetime2ts('2013-09-08')
    timestamp = ts
    results = dict()
    sensitive_results = {}
    for i in range(1, 8):
        ts = timestamp - 24 * 3600 * i
        date = ts2datetime(ts).replace('-', '')
        result_string = r_cluster.hget('activity_' + str(date), uid)
        sensitive_string = r_cluster.hget('sensitive_activity_' + str(date),
                                          uid)
        if result_string:
            result_dict = json.loads(result_string)
            key_set = set(result_dict.keys())
            for key in result_dict.keys():
                results[int(key) * 900 + ts] = result_dict[key]
        else:
            pass
        if sensitive_string:
            sensitive_result_dict = json.loads(sensitive_string)
            for key in sensitive_result_dict.keys():
                sensitive_results[int(key) * 900 +
                                  ts] = sensitive_result_dict[key]
        else:
            pass

    trend_dict = {}
    for i in range(1, 8):
        ts = timestamp - i * 24 * 3600
        for j in range(0, 6):
            base_time = ts + j * 900 * 16
            num = 0
            for k in range(16):
                seg_time = base_time + k * 900
                if seg_time in results:
                    num += results[seg_time]
            trend_dict[base_time] = num

    sensitive_trend_dict = {}
    for i in range(1, 8):
        ts = timestamp - i * 24 * 3600
        for j in range(0, 6):
            base_time = ts + j * 900 * 16
            num = 0
            for k in range(16):
                seg_time = base_time + k * 900
                if seg_time in sensitive_results:
                    num += sensitive_results[seg_time]
            sensitive_trend_dict[base_time] = num

    ordinary_key_set = set(trend_dict.keys())
    sensitive_key_set = set(sensitive_trend_dict.keys())
    for key in sensitive_key_set:
        if key in ordinary_key_set:
            trend_dict[key] += sensitive_trend_dict[key]
        else:
            trend_dict[key] = sensitive_trend_dict[key]

    sorted_dict = sorted(trend_dict.items(), key=lambda x: x[0], reverse=False)
    sorted_sensitive_dict = sorted(sensitive_trend_dict.items(),
                                   key=lambda x: x[0],
                                   reverse=False)
    return [sorted_dict, sorted_sensitive_dict]  # total and sensitive