def save_at(uid, at_uid, timestamp, sensitive): date = ts2datetime(timestamp) ts = datetime2ts(date) key = str(uid) ruid_count_dict = dict() sensitive_ruid_count_dict = dict() ruid_count_string = redis_cluster.hget('at_' + str(ts), str(uid)) if ruid_count_string: ruid_count_dict = json.loads(ruid_count_string) if ruid_count_dict.has_key(str(at_uid)): ruid_count_dict[str(at_uid)] += 1 else: ruid_count_dict[str(at_uid)] = 1 else: ruid_count_dict[str(at_uid)] = 1 redis_cluster.hset('at_' + str(ts), str(uid), json.dumps(ruid_count_dict)) if sensitive: sensitive_ruid_count_string = redis_cluster.hget( 'sensitive_at_' + str(ts), str(uid)) if sensitive_ruid_count_string: sensitive_ruid_count_dict = json.loads(sensitive_ruid_count_string) if sensitive_ruid_count_dict.has_key(str(at_uid)): sensitive_ruid_count_dict[str(at_uid)] += 1 else: sensitive_ruid_count_dict[str(at_uid)] = 1 else: sensitive_ruid_count_dict[str(at_uid)] = 1 redis_cluster.hset('sensitive_at_' + str(ts), str(uid), json.dumps(sensitive_ruid_count_dict))
def save_at(uid, at_uid, timestamp, sensitive): date = ts2datetime(timestamp) ts = datetime2ts(date) key = str(uid) ruid_count_dict = dict() sensitive_ruid_count_dict = dict() ruid_count_string = redis_cluster.hget('at_'+str(ts), str(uid)) if ruid_count_string: ruid_count_dict = json.loads(ruid_count_string) if ruid_count_dict.has_key(str(at_uid)): ruid_count_dict[str(at_uid)] += 1 else: ruid_count_dict[str(at_uid)] = 1 else: ruid_count_dict[str(at_uid)] = 1 redis_cluster.hset('at_'+str(ts), str(uid), json.dumps(ruid_count_dict)) if sensitive: sensitive_ruid_count_string = redis_cluster.hget('sensitive_at_'+str(ts), str(uid)) if sensitive_ruid_count_string: sensitive_ruid_count_dict = json.loads(sensitive_ruid_count_string) if sensitive_ruid_count_dict.has_key(str(at_uid)): sensitive_ruid_count_dict[str(at_uid)] += 1 else: sensitive_ruid_count_dict[str(at_uid)] = 1 else: sensitive_ruid_count_dict[str(at_uid)] = 1 redis_cluster.hset('sensitive_at_'+str(ts), str(uid), json.dumps(sensitive_ruid_count_dict))
def cal_hashtag_work(uid, hashtag_list, timestamp, sensitive): date = ts2datetime(timestamp) ts = datetime2ts(date) key = str(uid) hashtag_dict = {} sensitive_hashtag_dict = dict() for hashtag in hashtag_list: try: hashtag_dict[hashtag] += 1 except: hashtag_dict[hashtag] = 1 hashtag_count_string = redis_cluster.hget('hashtag_' + str(ts), str(uid)) if hashtag_count_string: hashtag_count_dict = json.loads(hashtag_count_string) for item in hashtag_list: if hashtag_count_dict.has_key(item): hashtag_count_dict[item] += 1 else: hashtag_count_dict[item] = 1 else: hashtag_count_dict = hashtag_dict redis_cluster.hset('hashtag_' + str(ts), str(uid), json.dumps(hashtag_count_dict)) if sensitive: sensitive_hashtag_count_string = redis_cluster.hget( 'sensitive_hashtag_' + str(ts), str(uid)) if sensitive_hashtag_count_string: sensitive_hashtag_count_dict = json.loads( sensitive_hashtag_count_string) for hashtag in hashtag_list: if sensitive_hashtag_count_dict.has_key(hashtag): sensitive_hashtag_count_dict[hashtag] += 1 else: sensitive_hashtag_count_dict[hashtag] = 1 else: sensitive_hashtag_count_dict = hashtag_dict redis_cluster.hset('sensitive_hashtag_' + str(ts), str(uid), json.dumps(sensitive_hashtag_count_dict))
def cal_hashtag_work(uid, hashtag_list, timestamp, sensitive): date = ts2datetime(timestamp) ts = datetime2ts(date) key = str(uid) hashtag_dict = {} sensitive_hashtag_dict = dict() for hashtag in hashtag_list: try: hashtag_dict[hashtag] += 1 except: hashtag_dict[hashtag] = 1 hashtag_count_string = redis_cluster.hget('hashtag_'+str(ts), str(uid)) if hashtag_count_string: hashtag_count_dict = json.loads(hashtag_count_string) for item in hashtag_list: if hashtag_count_dict.has_key(item): hashtag_count_dict[item] += 1 else: hashtag_count_dict[item] = 1 else: hashtag_count_dict = hashtag_dict redis_cluster.hset('hashtag_'+str(ts), str(uid), json.dumps(hashtag_count_dict)) if sensitive: sensitive_hashtag_count_string = redis_cluster.hget('sensitive_hashtag_'+str(ts), str(uid)) if sensitive_hashtag_count_string: sensitive_hashtag_count_dict = json.loads(sensitive_hashtag_count_string) for hashtag in hashtag_list: if sensitive_hashtag_count_dict.has_key(hashtag): sensitive_hashtag_count_dict[hashtag] += 1 else: sensitive_hashtag_count_dict[hashtag] = 1 else: sensitive_hashtag_count_dict = hashtag_dict redis_cluster.hset('sensitive_hashtag_'+str(ts), str(uid), json.dumps(sensitive_hashtag_count_dict))
def filter_mention(user_set): results = [] #run_type if RUN_TYPE == 1: now_date = ts2datetime(time.time()) else: now_date = RUN_TEST_TIME timestamp = datetime2ts(now_date) - DAY date = ts2datetime(timestamp) for user in user_set: mention_set = set() for i in range(0,7): ts = timestamp - DAY*i result = redis_cluster.hget('at_'+str(ts), str(user)) if result: item_dict = json.loads(result) for at_user in item_dict: mention_set.add(at_user) at_count = len(mention_set) if at_count < mention_threshold: results.append(user) else: writer.writerow([user, 'mention']) return results
#item['keywords_string'] = keywords_string # use to search sensitive_words_dict = searchWord(text.encode('utf-8', 'ignore'), DFA) if sensitive_words_dict: item['sensitive_words_string'] = "&".join(sensitive_words_dict.keys()) item['sensitive_words_dict'] = json.dumps(sensitive_words_dict) else: item['sensitive_words_string'] = "" item['sensitive_words_dict'] = json.dumps({}) timestamp = item['timestamp'] date = ts2datetime(timestamp) ts = datetime2ts(date) if sensitive_words_dict: print sensitive_words_dict.keys()[0] sensitive_count_string = r_cluster.hget('sensitive_'+str(ts), str(uid)) if sensitive_count_string: #redis取空 sensitive_count_dict = json.loads(sensitive_count_string) for word in sensitive_words_dict.keys(): if sensitive_count_dict.has_key(word): sensitive_count_dict[word] += sensitive_words_dict[word] else: sensitive_count_dict[word] = sensitive_words_dict[word] r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_count_dict)) else: r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_words_dict)) """ #identify whether to mapping new es weibo_timestamp = item['timestamp'] should_index_name_date = ts2datetime(weibo_timestamp)