def save_at(uid, at_uid, timestamp, sensitive):
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    key = str(uid)
    ruid_count_dict = dict()
    sensitive_ruid_count_dict = dict()
    ruid_count_string = redis_cluster.hget('at_' + str(ts), str(uid))
    if ruid_count_string:
        ruid_count_dict = json.loads(ruid_count_string)
        if ruid_count_dict.has_key(str(at_uid)):
            ruid_count_dict[str(at_uid)] += 1
        else:
            ruid_count_dict[str(at_uid)] = 1
    else:
        ruid_count_dict[str(at_uid)] = 1
    redis_cluster.hset('at_' + str(ts), str(uid), json.dumps(ruid_count_dict))

    if sensitive:
        sensitive_ruid_count_string = redis_cluster.hget(
            'sensitive_at_' + str(ts), str(uid))
        if sensitive_ruid_count_string:
            sensitive_ruid_count_dict = json.loads(sensitive_ruid_count_string)
            if sensitive_ruid_count_dict.has_key(str(at_uid)):
                sensitive_ruid_count_dict[str(at_uid)] += 1
            else:
                sensitive_ruid_count_dict[str(at_uid)] = 1
        else:
            sensitive_ruid_count_dict[str(at_uid)] = 1
        redis_cluster.hset('sensitive_at_' + str(ts), str(uid),
                           json.dumps(sensitive_ruid_count_dict))
def save_at(uid, at_uid, timestamp, sensitive):
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    key = str(uid)
    ruid_count_dict = dict()
    sensitive_ruid_count_dict = dict()
    ruid_count_string = redis_cluster.hget('at_'+str(ts), str(uid))
    if ruid_count_string:
        ruid_count_dict = json.loads(ruid_count_string)
        if ruid_count_dict.has_key(str(at_uid)):
            ruid_count_dict[str(at_uid)] += 1
        else:
            ruid_count_dict[str(at_uid)] = 1
    else:
        ruid_count_dict[str(at_uid)] = 1
    redis_cluster.hset('at_'+str(ts), str(uid), json.dumps(ruid_count_dict))


    if sensitive:
        sensitive_ruid_count_string = redis_cluster.hget('sensitive_at_'+str(ts), str(uid))
        if sensitive_ruid_count_string:
            sensitive_ruid_count_dict = json.loads(sensitive_ruid_count_string)
            if sensitive_ruid_count_dict.has_key(str(at_uid)):
                sensitive_ruid_count_dict[str(at_uid)] += 1
            else:
                sensitive_ruid_count_dict[str(at_uid)] = 1
        else:
            sensitive_ruid_count_dict[str(at_uid)] = 1
        redis_cluster.hset('sensitive_at_'+str(ts), str(uid), json.dumps(sensitive_ruid_count_dict))
def cal_hashtag_work(uid, hashtag_list, timestamp, sensitive):
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    key = str(uid)

    hashtag_dict = {}
    sensitive_hashtag_dict = dict()
    for hashtag in hashtag_list:
        try:
            hashtag_dict[hashtag] += 1
        except:
            hashtag_dict[hashtag] = 1
    hashtag_count_string = redis_cluster.hget('hashtag_' + str(ts), str(uid))
    if hashtag_count_string:
        hashtag_count_dict = json.loads(hashtag_count_string)
        for item in hashtag_list:
            if hashtag_count_dict.has_key(item):
                hashtag_count_dict[item] += 1
            else:
                hashtag_count_dict[item] = 1
    else:
        hashtag_count_dict = hashtag_dict
    redis_cluster.hset('hashtag_' + str(ts), str(uid),
                       json.dumps(hashtag_count_dict))

    if sensitive:
        sensitive_hashtag_count_string = redis_cluster.hget(
            'sensitive_hashtag_' + str(ts), str(uid))
        if sensitive_hashtag_count_string:
            sensitive_hashtag_count_dict = json.loads(
                sensitive_hashtag_count_string)
            for hashtag in hashtag_list:
                if sensitive_hashtag_count_dict.has_key(hashtag):
                    sensitive_hashtag_count_dict[hashtag] += 1
                else:
                    sensitive_hashtag_count_dict[hashtag] = 1
        else:
            sensitive_hashtag_count_dict = hashtag_dict

        redis_cluster.hset('sensitive_hashtag_' + str(ts), str(uid),
                           json.dumps(sensitive_hashtag_count_dict))
def cal_hashtag_work(uid, hashtag_list, timestamp, sensitive):
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    key = str(uid)

    hashtag_dict = {}
    sensitive_hashtag_dict = dict()
    for hashtag in hashtag_list:
        try:
            hashtag_dict[hashtag] += 1
        except:
            hashtag_dict[hashtag] = 1
    hashtag_count_string = redis_cluster.hget('hashtag_'+str(ts), str(uid))
    if hashtag_count_string:
        hashtag_count_dict = json.loads(hashtag_count_string)
        for item in hashtag_list:
            if hashtag_count_dict.has_key(item):
                hashtag_count_dict[item] += 1
            else:
                hashtag_count_dict[item] = 1
    else:
        hashtag_count_dict = hashtag_dict
    redis_cluster.hset('hashtag_'+str(ts), str(uid), json.dumps(hashtag_count_dict))

    if sensitive:
        sensitive_hashtag_count_string = redis_cluster.hget('sensitive_hashtag_'+str(ts), str(uid))
        if sensitive_hashtag_count_string:
            sensitive_hashtag_count_dict = json.loads(sensitive_hashtag_count_string)
            for hashtag in hashtag_list:
                if sensitive_hashtag_count_dict.has_key(hashtag):
                    sensitive_hashtag_count_dict[hashtag] += 1
                else:
                    sensitive_hashtag_count_dict[hashtag] = 1
        else:
            sensitive_hashtag_count_dict = hashtag_dict

        redis_cluster.hset('sensitive_hashtag_'+str(ts), str(uid), json.dumps(sensitive_hashtag_count_dict))
Beispiel #5
0
def filter_mention(user_set):
    results = []
    #run_type
    if RUN_TYPE == 1:
        now_date = ts2datetime(time.time())
    else:
        now_date = RUN_TEST_TIME
    timestamp = datetime2ts(now_date) - DAY
    date = ts2datetime(timestamp)
    for user in user_set:
        mention_set = set()
        for i in range(0,7):
            ts = timestamp - DAY*i
            result = redis_cluster.hget('at_'+str(ts), str(user))
            if result:
                item_dict = json.loads(result)
                for at_user in item_dict:
                    mention_set.add(at_user)
        at_count = len(mention_set)
        if at_count < mention_threshold:
            results.append(user)
        else:
            writer.writerow([user, 'mention'])
    return results
            #item['keywords_string'] = keywords_string         # use to search

            sensitive_words_dict = searchWord(text.encode('utf-8', 'ignore'), DFA)
            if sensitive_words_dict:
                item['sensitive_words_string'] = "&".join(sensitive_words_dict.keys())
                item['sensitive_words_dict'] = json.dumps(sensitive_words_dict)
            else:
                item['sensitive_words_string'] = ""
                item['sensitive_words_dict'] = json.dumps({})

            timestamp = item['timestamp']
            date = ts2datetime(timestamp)
            ts = datetime2ts(date)
            if sensitive_words_dict:
                print sensitive_words_dict.keys()[0]
                sensitive_count_string = r_cluster.hget('sensitive_'+str(ts), str(uid))
                if sensitive_count_string: #redis取空
                    sensitive_count_dict = json.loads(sensitive_count_string)
                    for word in sensitive_words_dict.keys():
                        if sensitive_count_dict.has_key(word):
                            sensitive_count_dict[word] += sensitive_words_dict[word]
                        else:
                            sensitive_count_dict[word] = sensitive_words_dict[word]
                    r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_count_dict))
                else:
                    r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_words_dict))

            """
            #identify whether to mapping new es
            weibo_timestamp = item['timestamp']
            should_index_name_date = ts2datetime(weibo_timestamp)