def cal_propage_work(item): uid = item['uid'] timestamp = item['timestamp'] text = item['text'] sw_list = searchWord(text.encode('utf-8')) sensitive = len(sw_list) #ip = item['geo'] ip = item['send_ip'] # attribute location if ip: save_city(uid, ip, timestamp, sensitive) # attribute activity date = ts2datetime(timestamp) ts = datetime2ts(date) date = date.replace('-','') time_segment = (timestamp - ts) / Fifteenminutes save_activity(uid, date, time_segment, sensitive) # attribute mention at_uname_list = extract_uname(text) try: at_uname = at_uname_list[0] save_at(uid, at_uname, timestamp, sensitive) except: pass
def get_one_click_evaluation(task_detail): results = [] text = task_detail['text'].encode('utf-8') node = createWordTree() #print 'text...',text sensitive_words_dict = searchWord(text, node) #print 'sensitive_words_dict..',sensitive_words_dict if sensitive_words_dict: score = 0 sensitive_words_list = [] for k, v in sensitive_words_dict.iteritems(): tmp_stage = r_sensitive.hget("sensitive_words", k) #print 'tmp_stage..',tmp_stage if tmp_stage: score += v * sensitive_score_dict[str(tmp_stage)] sensitive_words_list.append(k.decode('utf-8')) results.append(score) results.append(sensitive_words_list) else: results = [0, []] return results
def sensitive_process(text,timestamp): ## 人物敏感度 iter_results = {} # iter_results = {uid:{}} now_ts = time.time() #run_type today_sensitive_results = {} if S_TYPE != 'test': now_date_ts = datetime2ts(ts2datetime(now_ts)) else: now_date_ts = datetime2ts(S_DATE) for i in range(WEEK,0,-1): ts = now_date_ts - DAY*i sensitive_results = r_cluster_3.hmget('sensitive_'+str(ts), uid_list) count = 0 for uid in uid_list: if uid not in today_sensitive_results: today_sensitive_results[uid] = {} #compute sensitive sensitive_item = sensitive_results[count] if sensitive_item: uid_sensitive_dict = json.loads(sensitive_item) else: uid_sensitive_dict = {} for sensitive_word in uid_sensitive_dict: try: iter_results[uid]['sensitive'][sensitive_word] += uid_sensitive_dict[sensitive_word] except: iter_results[uid]['sensitive'][sensitive_word] = uid_sensitive_dict[sensitive_word] if ts == now_date_ts - DAY: try: today_sensitive_results[uid][sensitive_word] += uid_sensitive_dict[sensitive_word] except: today_sensitive_results[uid][sensitive_word] = uid_sensitive_dict[sensitive_word] for uid in uid_list: results[uid] = {} ## 信息敏感度 sensitive_words_dict = searchWord(text.encode('utf-8', 'ignore'), DFA) if sensitive_words_dict: item['sensitive_words_string'] = "&".join(sensitive_words_dict.keys()) item['sensitive_words_dict'] = json.dumps(sensitive_words_dict) else: item['sensitive_words_string'] = "" item['sensitive_words_dict'] = json.dumps({}) sensitive_words_dict = json.loads(item['sensitive_words_dict']) if sensitive_words_dict: score = 0 for k,v in sensitive_words_dict.iteritems(): tmp_stage = r_sensitive.hget("sensitive_words", k) if tmp_stage: score += v*sensitive_score_dict[str(tmp_stage)] index_body['sensitive'] = score
def compute_sensitive(text): score = 0 node = createWordTree() sensitive_words_dict = searchWord(text.encode('utf-8'), node) if sensitive_words_dict: for k, v in sensitive_words_dict.iteritems(): tmp_stage = r_sensitive.hget("sensitive_words", k) if tmp_stage: score += v * sensitive_score_dict[str(tmp_stage)] return score
def sensitive_check(text): DFA = createWordTree() item = {} count = 0 sensitive_words_dict = searchWord(text, DFA) if sensitive_words_dict: item['sensitive_words_string'] = "&".join(sensitive_words_dict.keys()) item['sensitive_words_dict'] = json.dumps(sensitive_words_dict) else: item['sensitive_words_string'] = "" item['sensitive_words_dict'] = json.dumps({}) for i in sensitive_words_dict: count += sensitive_words_dict[i] return count, item
def comment_uname2uid(item): direct_uid = None uid = item['uid'] root_uid = item['root_uid'] timestamp = item['timestamp'] text = item['text'] sensitive_words_dict = searchWord(text.encode('utf-8', 'ignore'), DFA) sensitive = len(sensitive_words_dict) direct_uid = '' if isinstance(text, str): text = text.decode('utf-8', 'ignore') RE = re.compile(u'回复@([a-zA-Z-_⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]+):', re.UNICODE) comment_chains = RE.findall(text) if comment_chains != []: direct_uname = comment_chains[0] direct_uid = uname2uid(direct_uname) if direct_uid == '': direct_uid = root_uid save_comment(uid, direct_uid, timestamp, sensitive)
def get_sensitive_user(timestamp, uid): score = 0 query_body = { 'query':{ 'term':{'uid':uid} }, 'size':50 } index_name = flow_text_index_name_pre + ts2datetime(timestamp) search_results = es_flow_text.search(index=index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits'] for result in search_results: text = result['_source']['text'].encode('utf-8') node = createWordTree(); sensitive_words_dict = searchWord(text, node) if sensitive_words_dict: sensitive_words_list = [] for k,v in sensitive_words_dict.iteritems(): tmp_stage = r_sensitive.hget("sensitive_words", k) if tmp_stage: score += v*sensitive_score_dict[str(tmp_stage)] print '\n' print '\n' print '\n' print '\n' print '\n' print 'score=============',score print '\n' print '\n' print '\n' print '\n' return score
def cal_propage_work(item): uid = item['uid'] timestamp = item['timestamp'] text = item['text'] sensitive_words_dict = searchWord(text.encode('utf-8', 'ignore'), DFA) sensitive = len(sensitive_words_dict) #if sensitive: # r.sadd('sensitive_user', uid) # 敏感微博用户集合 #ip = item['geo'] ip = item['send_ip'] # attribute location if ip: save_city(uid, ip, timestamp, sensitive) # attribute activity date = ts2datetime(timestamp) ts = datetime2ts(date) time_segment = (timestamp - ts) / Fifteenminutes save_activity(uid, timestamp, time_segment, sensitive) # attribute mention at_uname_list = extract_uname(text) try: if at_uname_list: at_uname = at_uname_list[0] if at_uname != '': save_at(uid, at_uname, timestamp, sensitive) except: pass # hashtag hashtag_list = extract_hashtag(text) if hashtag_list: cal_hashtag_work(uid, hashtag_list, timestamp, sensitive)
def cal_propage_work(item): uid = item['uid'] timestamp = item['timestamp'] text = item['text'] sensitive_words_dict = searchWord(text.encode('utf-8', 'ignore'), DFA) sensitive = len(sensitive_words_dict) #if sensitive: # r.sadd('sensitive_user', uid) # 敏感微博用户集合 #ip = item['geo'] ip = item['send_ip'] # attribute location if ip: save_city(uid, ip, timestamp, sensitive) """ # attribute activity date = ts2datetime(timestamp) ts = datetime2ts(date) time_segment = (timestamp - ts) / Fifteenminutes save_activity(uid, timestamp, time_segment, sensitive) # attribute mention at_uname_list = extract_uname(text) try: at_uname = at_uname_list[0] save_at(uid, at_uname, timestamp, sensitive) except: pass """ # hashtag hashtag_list = extract_hashtag(text) if hashtag_list: cal_hashtag_work(uid, hashtag_list, timestamp, sensitive)
def test(ft_type): print ft_type if ft_type == 'facebook': index_name_pre = facebook_flow_text_index_name_pre index_type = facebook_flow_text_index_type user_index_name = facebook_user_index_name user_index_type = facebook_user_index_type else: index_name_pre = twitter_flow_text_index_name_pre index_type = twitter_flow_text_index_type user_index_name = twitter_user_index_name user_index_type = twitter_user_index_type # date_list = load_date_list(True) date_list = load_date_list() DFA = createWordTree() query_body = { 'post_filter': { 'missing': { 'field': 'keywords_string' } }, 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'range': { 'flag_ch': { 'gte': -1 } } }] } } } } } for date in date_list: count = 0 bulk_action = [] index_name = index_name_pre + date try: es_scan_results = scan(es, query=query_body, size=1000, index=index_name, doc_type=index_type) while True: try: scan_data = es_scan_results.next() item = scan_data['_source'] text = item['text_ch'] uid = item['uid'] if ft_type == 'facebook': _id = item['fid'] else: _id = item['tid'] ts = datetime2ts(date) #add sentiment field to weibo sentiment, keywords_list = triple_classifier(item) #add key words to weibo keywords_dict, keywords_string = get_weibo_keywords( keywords_list) #sensitive_words_dict sensitive_words_dict = searchWord( text.encode('utf-8', 'ignore'), DFA) if sensitive_words_dict: sensitive_words_string_data = "&".join( sensitive_words_dict.keys()) sensitive_words_dict_data = json.dumps( sensitive_words_dict) else: sensitive_words_string_data = "" sensitive_words_dict_data = json.dumps({}) #redis if sensitive_words_dict: sensitive_count_string = r_cluster.hget( 'sensitive_' + str(ts), str(uid)) if sensitive_count_string: #redis取空 sensitive_count_dict = json.loads( sensitive_count_string) for word in sensitive_words_dict.keys(): if sensitive_count_dict.has_key(word): sensitive_count_dict[ word] += sensitive_words_dict[word] else: sensitive_count_dict[ word] = sensitive_words_dict[word] r_cluster.hset('sensitive_' + str(ts), str(uid), json.dumps(sensitive_count_dict)) else: r_cluster.hset('sensitive_' + str(ts), str(uid), json.dumps(sensitive_words_dict)) #sensitive sensitive_score = 0 if sensitive_words_dict: for k, v in sensitive_words_dict.iteritems(): tmp_stage = r_sensitive.hget("sensitive_words", k) if tmp_stage: sensitive_score += v * sensitive_score_dict[ str(tmp_stage)] #directed_uid directed_uid_data = 0 directed_uid, directed_uname = get_root_retweet( text, uid, ft_type) if directed_uid: directed_uid_data = long(directed_uid) # hashtag hashtag = '' RE = re.compile( u'#([0-9a-zA-Z-_⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]+)[ ," =.。: :、]' ) hashtag_list = re.findall(RE, text) if hashtag_list: hashtag = '&'.join(hashtag_list) #action action = {'update': {'_id': _id}} # action_data action_data = { 'sentiment': str(sentiment), 'keywords_dict': json.dumps(keywords_dict), 'keywords_string': keywords_string, 'sensitive_words_string': sensitive_words_string_data, 'sensitive_words_dict': sensitive_words_dict_data, 'sensitive': sensitive_score, 'directed_uid': directed_uid_data, 'directed_uname': directed_uname, 'hashtag': hashtag, } bulk_action.extend([action, {'doc': action_data}]) count += 1 if count % 1000 == 0 and count != 0: if bulk_action: es.bulk(bulk_action, index=index_name, doc_type=facebook_flow_text_index_type, timeout=600) bulk_action = [] count = 0 except StopIteration: break if bulk_action: es.bulk(bulk_action, index=index_name, doc_type=facebook_flow_text_index_type, timeout=600) except Exception, e: #es文档不存在 print e
continue if int(item['sp_type']) == 1: read_count += 1 text = item['text'] uid = item['uid'] #add sentiment field to weibo sentiment, keywords_list = triple_classifier(item) item['sentiment'] = str(sentiment) #add key words to weibo keywords_dict, keywords_string = get_weibo_keywords(keywords_list) item['keywords_dict'] = json.dumps(keywords_dict) # use to compute item['keywords_string'] = keywords_string # use to search sensitive_words_dict = searchWord(text.encode('utf-8', 'ignore'), DFA) if sensitive_words_dict: item['sensitive_words_string'] = "&".join(sensitive_words_dict.keys()) item['sensitive_words_dict'] = json.dumps(sensitive_words_dict) else: item['sensitive_words_string'] = "" item['sensitive_words_dict'] = json.dumps({}) timestamp = item['timestamp'] date = ts2datetime(timestamp) ts = datetime2ts(date) if sensitive_words_dict: print sensitive_words_dict.keys()[0] sensitive_count_string = r_cluster.hget('sensitive_'+str(ts), str(uid)) if sensitive_count_string: #redis取空 sensitive_count_dict = json.loads(sensitive_count_string)
def get_sensitive_user(timestamp, uid): score = 0 query_body = {'query': {'term': {'uid': uid}}, 'size': 50} index_name = flow_text_index_name_pre + ts2datetime(timestamp) try: search_results = es_xnr.search(index=index_name, doc_type=flow_text_index_type, body=query_body)['hits']['hits'] except Exception, e: pass search_results = [] for result in search_results: text = result['_source']['text'].encode('utf-8') node = createWordTree() sensitive_words_dict = searchWord(text, node) if sensitive_words_dict: sensitive_words_list = [] for k, v in sensitive_words_dict.iteritems(): tmp_stage = r_sensitive.hget("sensitive_words", k) if tmp_stage: score += v * sensitive_score_dict[str(tmp_stage)] return score if __name__ == '__main__': # '2017-10-15' # print get_sensitive_user(timestamp=1507996800, uid='100003271864059') print get_sensitive_info(timestamp=1507996800, mid='123124323', text=u"64和达赖太阳花")
text = text_ch[0] item['text'] = text_ch[0] except: pass #add sentiment field to weibo sentiment, keywords_list = triple_classifier(item) item['sentiment'] = str(sentiment) #add key words to weibo keywords_dict, keywords_string = get_weibo_keywords( keywords_list) item['keywords_dict'] = json.dumps( keywords_dict) # use to compute item['keywords_string'] = keywords_string # use to search sensitive_words_dict = searchWord( text.encode('utf-8', 'ignore'), DFA) if sensitive_words_dict: item['sensitive_words_string'] = "&".join( sensitive_words_dict.keys()) item['sensitive_words_dict'] = json.dumps( sensitive_words_dict) else: item['sensitive_words_string'] = "" item['sensitive_words_dict'] = json.dumps({}) timestamp = item['timestamp'] date = ts2datetime(timestamp) ts = datetime2ts(date) if sensitive_words_dict: #print 'sensitive_words_dict...keys[0]...',sensitive_words_dict.keys()[0] sensitive_count_string = r_cluster.hget(
def cal_propage_work(item, sensitive_words): cluster_redis = R_CLUSTER_FLOW1 user = str(item['uid']) followers_count = item['user_fansnum'] friends_count = item.get("user_friendsnum", 0) cluster_redis.hset(user, 'user_fansnum', followers_count) cluster_redis.hset(user, 'user_friendsnum', friends_count) retweeted_uid = str(item['root_uid']) retweeted_mid = str(item['root_mid']) message_type = int(item['message_type']) mid = str(item['mid']) timestamp = item['timestamp'] text = item['text'] sw_list = searchWord(text.encode('utf-8')) sensitive_result = len(sw_list) if sensitive_result: date = ts2datetime(timestamp) ts = datetime2ts(date) map = {} for w in sw_list: word = "".join([chr(x) for x in w]) word = word.decode('utf-8') print word if not map.__contains__(word): map[word] = 1 else: map[word] += 1 try: sensitive_count_string = r_cluster.hget('sensitive_'+str(ts), str(uid)) sensitive_count_dict = json.loads(sensitive_count_string) for word in map: count = map[word] if sensitive_count_dict.__contains__(word): sensitive_count_dict[word] += count else: sensitive_count_dict[word] = count r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_count_dict)) except: r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_dict)) if message_type == 1: cluster_redis.sadd('user_set', user) if sensitive_result: cluster_redis.hset('s_'+user, mid + '_origin_weibo_timestamp', timestamp) else: cluster_redis.hset(user, mid + '_origin_weibo_timestamp', timestamp) elif message_type == 2: # comment weibo cluster_redis.sadd('user_set', user) if cluster_redis.sismember(user + '_comment_weibo', retweeted_mid) or cluster_redis.sismember('s_' + user + '_comment_weibo', retweeted_mid): return #RE = re.compile(u'//@([a-zA-Z-_⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]+):', re.UNICODE) #nicknames = RE.findall(text) if not sensitive_result: cluster_redis.sadd(user + '_comment_weibo', retweeted_mid) queue_index = get_queue_index(timestamp) cluster_redis.hincrby(user, 'comment_weibo', 1) if 1: #if len(nicknames) == 0: cluster_redis.hincrby(retweeted_uid, retweeted_mid + '_origin_weibo_comment', 1) cluster_redis.hincrby(retweeted_uid, 'origin_weibo_comment_timestamp_%s' % queue_index, 1) cluster_redis.hset(retweeted_uid, retweeted_mid + '_origin_weibo_comment_timestamp', timestamp) """ else: nick_id_ = nicknames[0] _id = single_redis.hget(NICK_UID_NAMESPACE, nick_id_) print _id single_redis.hset(ACTIVE_NICK_UID_NAMESPACE, nick_id_, _id) if _id: cluster_redis.hincrby(str(_id), retweeted_mid + '_retweeted_weibo_comment', 1) cluster_redis.hincrby(str(_id), 'retweeted_weibo_comment_timestamp_%s' % queue_index, 1) cluster_redis.hset(str(_id), retweeted_mid + '_retweeted_weibo_comment_timestamp', timestamp) """ else: cluster_redis.sadd('s_' + user + '_comment_weibo', retweeted_mid) queue_index = get_queue_index(timestamp) cluster_redis.hincrby('s_'+user, 'comment_weibo', 1) if 1: #if len(nicknames) == 0: cluster_redis.hincrby('s_' + retweeted_uid, retweeted_mid + '_origin_weibo_comment', 1) cluster_redis.hincrby('s_' + retweeted_uid, 'origin_weibo_comment_timestamp_%s' % queue_index, 1) cluster_redis.hset('s_' + retweeted_uid, retweeted_mid + '_origin_weibo_comment_timestamp', timestamp) """ else: nick_id_ = nicknames[0] _id = single_redis.hget(NICK_UID_NAMESPACE, nick_id_) print _id single_redis.hset(ACTIVE_NICK_UID_NAMESPACE, nick_id_, _id) if _id: cluster_redis.hincrby('s_' + str(_id), retweeted_mid + '_retweeted_weibo_comment', 1) cluster_redis.hincrby('s_' + str(_id), 'retweeted_weibo_comment_timestamp_%s' % queue_index, 1) cluster_redis.hset('s_' + str(_id), retweeted_mid + '_retweeted_weibo_comment_timestamp', timestamp) """ elif message_type == 3: cluster_redis.sadd('user_set', user) if cluster_redis.sismember(user + '_retweeted_weibo', retweeted_mid) or cluster_redis.sismember('s_' + user + '_retweeted_weibo', retweeted_mid): return """ RE = re.compile(u'//@([a-zA-Z-_⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]+):', re.UNICODE) nicknames = RE.findall(text) """ if not sensitive_result: cluster_redis.sadd(user + '_retweeted_weibo', retweeted_mid) cluster_redis.hset(user, retweeted_mid + '_retweeted_weibo_timestamp', timestamp) queue_index = get_queue_index(timestamp) cluster_redis.hincrby(retweeted_uid, 'origin_weibo_retweeted_timestamp_%s' % queue_index, 1) cluster_redis.hincrby(retweeted_uid, retweeted_mid + '_origin_weibo_retweeted', 1) """ if len(nicknames) != 0: for nick_id in nicknames: _id = single_redis.hget(NICK_UID_NAMESPACE, nick_id) print _id single_redis.hset(ACTIVE_NICK_UID_NAMESPACE, nick_id, _id) if _id: cluster_redis.hincrby(str(_id), retweeted_mid+'_retweeted_weibo_retweeted', 1) cluster_redis.hset(str(_id), 'retweeted_weibo_retweeted_timestamp', timestamp) cluster_redis.hincrby(str(_id), 'retweeted_weibo_retweeted_timestamp_%s' % queue_index, 1) """ else: cluster_redis.sadd('s_' + user + '_retweeted_weibo', retweeted_mid) cluster_redis.hset('s_' + user, retweeted_mid + '_retweeted_weibo_timestamp', timestamp) queue_index = get_queue_index(timestamp) cluster_redis.hincrby('s_' +retweeted_uid, 'origin_weibo_retweeted_timestamp_%s' %queue_index, 1) cluster_redis.hincrby('s_' +retweeted_uid, retweeted_mid + '_origin_weibo_retweeted', 1) """
if (update_user_te - update_user_ts) % 900 == 0: print 'update track user list' monitor_user_list = get_track_task_user() item = receiver.recv_json() #test if not item: continue if item['sp_type'] == '1': read_count += 1 #print 'item:', item if str(item['uid']) in monitor_user_list: text = item['text'] # add sensitive field and sensitive_word field to weibo sw_list = searchWord(text.encode('utf-8')) sensitive = len(sw_list) if sensitive: item['sensitive'] = 1 word_set = set() for w in sw_list: word = ''.join([chr(x) for x in w]) word = word.decode('utf-8') word_set.add(word) sensitive_word_string = '&'.join(list(word_set)) item['sensitive_word'] = sensitive_word_string else: item['sensitive'] = 0 # add sentiment field to weibo sentiment = get_sentiment_attribute(text) item['sentiment'] = sentiment
print 'update track user list' monitor_user_list = get_track_task_user() item = receiver.recv_json() #test if not item: continue if item['sp_type'] == '1': read_count += 1 #print 'item:', item if str(item['uid']) in monitor_user_list: text = item['text'] # add sensitive field and sensitive_word field to weibo sw_list = searchWord(text.encode('utf-8')) sensitive = len(sw_list) if sensitive: item['sensitive'] = 1 word_set = set() for w in sw_list: word = ''.join([chr(x) for x in w]) word = word.decode('utf-8') word_set.add(word) sensitive_word_string = '&'.join(list(word_set)) item['sensitive_word'] = sensitive_word_string else: item['sensitive'] = 0 # add sentiment field to weibo sentiment = get_sentiment_attribute(text) item['sentiment'] = sentiment
def cal_propage_work(item, sensitive_words): cluster_redis = R_CLUSTER_FLOW1 user = str(item['uid']) uid = str(item['uid']) followers_count = item['user_fansnum'] friends_count = item.get("user_friendsnum", 0) cluster_redis.hset(user, 'user_fansnum', followers_count) cluster_redis.hset(user, 'user_friendsnum', friends_count) retweeted_uid = str(item['root_uid']) retweeted_mid = str(item['root_mid']) message_type = int(item['message_type']) mid = str(item['mid']) timestamp = item['timestamp'] text = item['text'] sw_list = searchWord(text.encode('utf-8')) sensitive_result = len(sw_list) if sensitive_result: ts = ts2datetime(timestamp).replace('-','') map = {} for w in sw_list: word = "".join([chr(x) for x in w]) word = word.decode('utf-8') if not map.__contains__(word): map[word] = 1 else: map[word] += 1 try: sensitive_count_string = r_cluster.hget('sensitive_'+str(ts), str(uid)) sensitive_count_dict = json.loads(sensitive_count_string) for word in map: count = map[word] if sensitive_count_dict.__contains__(word): sensitive_count_dict[word] += count else: sensitive_count_dict[word] = count r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_count_dict)) except: r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(map)) if message_type == 1: cluster_redis.sadd('user_set', user) if sensitive_result: cluster_redis.hset('s_'+user, mid + '_origin_weibo_timestamp', timestamp) else: cluster_redis.hset(user, mid + '_origin_weibo_timestamp', timestamp) elif message_type == 2: # comment weibo cluster_redis.sadd('user_set', user) if cluster_redis.sismember(user + '_comment_weibo', retweeted_mid) or cluster_redis.sismember('s_' + user + '_comment_weibo', retweeted_mid): return #RE = re.compile(u'//@([a-zA-Z-_⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]+):', re.UNICODE) #nicknames = RE.findall(text) if not sensitive_result: cluster_redis.sadd(user + '_comment_weibo', retweeted_mid) queue_index = get_queue_index(timestamp) cluster_redis.hincrby(user, 'comment_weibo', 1) if 1: #if len(nicknames) == 0: cluster_redis.hincrby(retweeted_uid, retweeted_mid + '_origin_weibo_comment', 1) cluster_redis.hincrby(retweeted_uid, 'origin_weibo_comment_timestamp_%s' % queue_index, 1) cluster_redis.hset(retweeted_uid, retweeted_mid + '_origin_weibo_comment_timestamp', timestamp) """ else: nick_id_ = nicknames[0] _id = single_redis.hget(NICK_UID_NAMESPACE, nick_id_) print _id single_redis.hset(ACTIVE_NICK_UID_NAMESPACE, nick_id_, _id) if _id: cluster_redis.hincrby(str(_id), retweeted_mid + '_retweeted_weibo_comment', 1) cluster_redis.hincrby(str(_id), 'retweeted_weibo_comment_timestamp_%s' % queue_index, 1) cluster_redis.hset(str(_id), retweeted_mid + '_retweeted_weibo_comment_timestamp', timestamp) """ else: cluster_redis.sadd('s_' + user + '_comment_weibo', retweeted_mid) queue_index = get_queue_index(timestamp) cluster_redis.hincrby('s_'+user, 'comment_weibo', 1) if 1: #if len(nicknames) == 0: cluster_redis.hincrby('s_' + retweeted_uid, retweeted_mid + '_origin_weibo_comment', 1) cluster_redis.hincrby('s_' + retweeted_uid, 'origin_weibo_comment_timestamp_%s' % queue_index, 1) cluster_redis.hset('s_' + retweeted_uid, retweeted_mid + '_origin_weibo_comment_timestamp', timestamp) """ else: nick_id_ = nicknames[0] _id = single_redis.hget(NICK_UID_NAMESPACE, nick_id_) print _id single_redis.hset(ACTIVE_NICK_UID_NAMESPACE, nick_id_, _id) if _id: cluster_redis.hincrby('s_' + str(_id), retweeted_mid + '_retweeted_weibo_comment', 1) cluster_redis.hincrby('s_' + str(_id), 'retweeted_weibo_comment_timestamp_%s' % queue_index, 1) cluster_redis.hset('s_' + str(_id), retweeted_mid + '_retweeted_weibo_comment_timestamp', timestamp) """ elif message_type == 3: cluster_redis.sadd('user_set', user) if cluster_redis.sismember(user + '_retweeted_weibo', retweeted_mid) or cluster_redis.sismember('s_' + user + '_retweeted_weibo', retweeted_mid): return """ RE = re.compile(u'//@([a-zA-Z-_⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]+):', re.UNICODE) nicknames = RE.findall(text) """ if not sensitive_result: cluster_redis.sadd(user + '_retweeted_weibo', retweeted_mid) cluster_redis.hset(user, retweeted_mid + '_retweeted_weibo_timestamp', timestamp) queue_index = get_queue_index(timestamp) cluster_redis.hincrby(retweeted_uid, 'origin_weibo_retweeted_timestamp_%s' % queue_index, 1) cluster_redis.hincrby(retweeted_uid, retweeted_mid + '_origin_weibo_retweeted', 1) """ if len(nicknames) != 0: for nick_id in nicknames: _id = single_redis.hget(NICK_UID_NAMESPACE, nick_id) print _id single_redis.hset(ACTIVE_NICK_UID_NAMESPACE, nick_id, _id) if _id: cluster_redis.hincrby(str(_id), retweeted_mid+'_retweeted_weibo_retweeted', 1) cluster_redis.hset(str(_id), 'retweeted_weibo_retweeted_timestamp', timestamp) cluster_redis.hincrby(str(_id), 'retweeted_weibo_retweeted_timestamp_%s' % queue_index, 1) """ else: cluster_redis.sadd('s_' + user + '_retweeted_weibo', retweeted_mid) cluster_redis.hset('s_' + user, retweeted_mid + '_retweeted_weibo_timestamp', timestamp) queue_index = get_queue_index(timestamp) cluster_redis.hincrby('s_' +retweeted_uid, 'origin_weibo_retweeted_timestamp_%s' %queue_index, 1) cluster_redis.hincrby('s_' +retweeted_uid, retweeted_mid + '_origin_weibo_retweeted', 1) """