def save_at(uid, at_uid, timestamp, sensitive): date = ts2datetime(timestamp) ts = datetime2ts(date) key = str(uid) ruid_count_dict = dict() sensitive_ruid_count_dict = dict() ruid_count_string = redis_cluster.hget('at_' + str(ts), str(uid)) if ruid_count_string: ruid_count_dict = json.loads(ruid_count_string) if ruid_count_dict.has_key(str(at_uid)): ruid_count_dict[str(at_uid)] += 1 else: ruid_count_dict[str(at_uid)] = 1 else: ruid_count_dict[str(at_uid)] = 1 redis_cluster.hset('at_' + str(ts), str(uid), json.dumps(ruid_count_dict)) if sensitive: sensitive_ruid_count_string = redis_cluster.hget( 'sensitive_at_' + str(ts), str(uid)) if sensitive_ruid_count_string: sensitive_ruid_count_dict = json.loads(sensitive_ruid_count_string) if sensitive_ruid_count_dict.has_key(str(at_uid)): sensitive_ruid_count_dict[str(at_uid)] += 1 else: sensitive_ruid_count_dict[str(at_uid)] = 1 else: sensitive_ruid_count_dict[str(at_uid)] = 1 redis_cluster.hset('sensitive_at_' + str(ts), str(uid), json.dumps(sensitive_ruid_count_dict))
def save_at(uid, at_uid, timestamp, sensitive): date = ts2datetime(timestamp) ts = datetime2ts(date) key = str(uid) ruid_count_dict = dict() sensitive_ruid_count_dict = dict() ruid_count_string = redis_cluster.hget('at_'+str(ts), str(uid)) if ruid_count_string: ruid_count_dict = json.loads(ruid_count_string) if ruid_count_dict.has_key(str(at_uid)): ruid_count_dict[str(at_uid)] += 1 else: ruid_count_dict[str(at_uid)] = 1 else: ruid_count_dict[str(at_uid)] = 1 redis_cluster.hset('at_'+str(ts), str(uid), json.dumps(ruid_count_dict)) if sensitive: sensitive_ruid_count_string = redis_cluster.hget('sensitive_at_'+str(ts), str(uid)) if sensitive_ruid_count_string: sensitive_ruid_count_dict = json.loads(sensitive_ruid_count_string) if sensitive_ruid_count_dict.has_key(str(at_uid)): sensitive_ruid_count_dict[str(at_uid)] += 1 else: sensitive_ruid_count_dict[str(at_uid)] = 1 else: sensitive_ruid_count_dict[str(at_uid)] = 1 redis_cluster.hset('sensitive_at_'+str(ts), str(uid), json.dumps(sensitive_ruid_count_dict))
def cal_hashtag_work(uid, hashtag_list, timestamp, sensitive): date = ts2datetime(timestamp) ts = datetime2ts(date) key = str(uid) hashtag_dict = {} sensitive_hashtag_dict = dict() for hashtag in hashtag_list: try: hashtag_dict[hashtag] += 1 except: hashtag_dict[hashtag] = 1 hashtag_count_string = redis_cluster.hget('hashtag_' + str(ts), str(uid)) if hashtag_count_string: hashtag_count_dict = json.loads(hashtag_count_string) for item in hashtag_list: if hashtag_count_dict.has_key(item): hashtag_count_dict[item] += 1 else: hashtag_count_dict[item] = 1 else: hashtag_count_dict = hashtag_dict redis_cluster.hset('hashtag_' + str(ts), str(uid), json.dumps(hashtag_count_dict)) if sensitive: sensitive_hashtag_count_string = redis_cluster.hget( 'sensitive_hashtag_' + str(ts), str(uid)) if sensitive_hashtag_count_string: sensitive_hashtag_count_dict = json.loads( sensitive_hashtag_count_string) for hashtag in hashtag_list: if sensitive_hashtag_count_dict.has_key(hashtag): sensitive_hashtag_count_dict[hashtag] += 1 else: sensitive_hashtag_count_dict[hashtag] = 1 else: sensitive_hashtag_count_dict = hashtag_dict redis_cluster.hset('sensitive_hashtag_' + str(ts), str(uid), json.dumps(sensitive_hashtag_count_dict))
def cal_hashtag_work(uid, hashtag_list, timestamp, sensitive): date = ts2datetime(timestamp) ts = datetime2ts(date) key = str(uid) hashtag_dict = {} sensitive_hashtag_dict = dict() for hashtag in hashtag_list: try: hashtag_dict[hashtag] += 1 except: hashtag_dict[hashtag] = 1 hashtag_count_string = redis_cluster.hget('hashtag_'+str(ts), str(uid)) if hashtag_count_string: hashtag_count_dict = json.loads(hashtag_count_string) for item in hashtag_list: if hashtag_count_dict.has_key(item): hashtag_count_dict[item] += 1 else: hashtag_count_dict[item] = 1 else: hashtag_count_dict = hashtag_dict redis_cluster.hset('hashtag_'+str(ts), str(uid), json.dumps(hashtag_count_dict)) if sensitive: sensitive_hashtag_count_string = redis_cluster.hget('sensitive_hashtag_'+str(ts), str(uid)) if sensitive_hashtag_count_string: sensitive_hashtag_count_dict = json.loads(sensitive_hashtag_count_string) for hashtag in hashtag_list: if sensitive_hashtag_count_dict.has_key(hashtag): sensitive_hashtag_count_dict[hashtag] += 1 else: sensitive_hashtag_count_dict[hashtag] = 1 else: sensitive_hashtag_count_dict = hashtag_dict redis_cluster.hset('sensitive_hashtag_'+str(ts), str(uid), json.dumps(sensitive_hashtag_count_dict))
item['sensitive_words_dict'] = json.dumps({}) timestamp = item['timestamp'] date = ts2datetime(timestamp) ts = datetime2ts(date) if sensitive_words_dict: print sensitive_words_dict.keys()[0] sensitive_count_string = r_cluster.hget('sensitive_'+str(ts), str(uid)) if sensitive_count_string: #redis取空 sensitive_count_dict = json.loads(sensitive_count_string) for word in sensitive_words_dict.keys(): if sensitive_count_dict.has_key(word): sensitive_count_dict[word] += sensitive_words_dict[word] else: sensitive_count_dict[word] = sensitive_words_dict[word] r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_count_dict)) else: r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_words_dict)) """ #identify whether to mapping new es weibo_timestamp = item['timestamp'] should_index_name_date = ts2datetime(weibo_timestamp) if should_index_name_date != now_index_name_date: if action != [] and xdata != []: index_name = index_name_pre + now_index_name_date if bulk_action: es.bulk(bulk_action, index=index_name, doc_type=index_type, timeout=60) bulk_action = [] count = 0 now_index_name_date = should_index_name_date