r_cluster.delete('activity_'+str(delete_ts)) #delete hashtag r_cluster.delete('hashtag_'+str(delete_ts)) #delete sensitive words r_cluster.delete('sensitive_'+str(delete_ts)) #delete recommendation r.delete('recomment_'+str(delete_date)) if __name__ == "__main__": now_ts = time.time() current_path = os.getcwd() file_path_redis = os.path.join(current_path, 'delete_redis.py') print_log = "&".join([file_path_redis, "start", ts2datetime(now_ts)]) print print_log now_datetime = datetime2ts(ts2datetime(now_ts)) new_ip_number = r_cluster.hlen('new_ip_'+str(now_datetime)) new_hashtag_number = r_cluster.hlen('hashtag_'+str(now_datetime)) #if new_ip_number and new_hashtag_number: # flow2/flow4写入新数据,可以清楚8天前数据 # main() now_ts = time.time() print_log = "&".join([file_path_redis, "end", ts2datetime(now_ts)]) print print_log
def main(): if RUN_TYPE: now_ts = time.time() - DAY # 前一天 ts = str(datetime2ts(ts2datetime(now_ts))) else: ts = str(datetime2ts('2016-03-16')) now_ts = int(ts) print now_ts sensitive_string = "sensitive_" + ts date_string = ts update_sensitive_key = "sensitive_score_" + ts # 更新的键 sensitive_dict_key = "sensitive_dict_" + ts sensitive_string_key = "sensitive_string_" + ts sensitive_day_change_key = "sensitive_" + ts + "_day_change" del_month = datetime2ts(ts2datetime(now_ts - MONTH)) del_sensitive_key = "sensitive_score_" + str(del_month) # 要删除的键 former_ts = int(ts) - DAY former_date = str(datetime2ts(ts2datetime(former_ts))) former_sensitive_key = "sensitive_score_" + former_date iter_count = 0 bulk_action = [] mappings(ES_SENSITIVE_INDEX) total_number = r.hlen(sensitive_string) scan_cursor = 0 print total_number while 1: re_scan = r.hscan(sensitive_string, scan_cursor, count=1000) scan_cursor = re_scan[0] if len(re_scan[1]) != 0: sensitive_info = re_scan[1] # 字典形式,uid:sensitive_words_dict uid_list = sensitive_info.keys() sensitive_results = es.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={"ids": uid_list})['docs'] if sensitive_results: for item in sensitive_results: uid = item['_id'] sensitive_words_dict = json.loads( sensitive_info[uid]) # json.loads current_sensitive_score = 0 for k, v in sensitive_words_dict.iteritems(): tmp_stage = r_sensitive.hget("sensitive_words", k) if tmp_stage: current_sensitive_score += v * sensitive_score_dict[ str(tmp_stage)] if item['found']: # 之前存在相关信息 revise_item = item["_source"] if del_sensitive_key in revise_item: item.remove(del_sensitive_key) revise_item['uid'] = uid # 新更新的敏感度 revise_item[ update_sensitive_key] = current_sensitive_score # 新更新的敏感词 revise_item[sensitive_dict_key] = sensitive_info[uid] # 新更新的string revise_item[sensitive_string_key] = "&".join( sensitive_words_dict.keys()) # 当天和之前一天、一周和一月均值的差异 revise_item[ 'sensitive_day_change'] = current_sensitive_score - revise_item.get( former_sensitive_key, 0) revise_item[ 'sensitive_week_change'] = current_sensitive_score - revise_item.get( 'sensitive_week_ave', 0) revise_item[ 'sensitive_month_change'] = current_sensitive_score - revise_item.get( 'sensitive_month_ave', 0) # 更新后week、month的均值和方差 revise_item['sensitive_week_ave'], revise_item[ 'sensitive_week_var'], revise_item[ 'sensitive_week_sum'] = compute_week( revise_item, now_ts) revise_item['senstiive_month_ave'], revise_item[ 'sensitive_month_var'], revise_item[ 'sensitive_month_sum'] = compute_month( revise_item, now_ts) else: revise_item = dict() revise_item['uid'] = uid revise_item[ update_sensitive_key] = current_sensitive_score revise_item[sensitive_dict_key] = sensitive_info[uid] revise_item[sensitive_string_key] = "&".join( sensitive_words_dict.keys()) revise_item[ 'sensitive_day_change'] = current_sensitive_score revise_item[ 'sensitive_week_change'] = current_sensitive_score revise_item[ 'sensitive_month_change'] = current_sensitive_score revise_item['sensitive_week_ave'], revise_item[ 'sensitive_week_var'], revise_item[ 'sensitive_week_sum'] = compute_week( revise_item, now_ts) revise_item['senstiive_month_ave'], revise_item[ 'sensitive_month_var'], revise_item[ 'sensitive_month_sum'] = compute_month( revise_item, now_ts) action = {'index': {'_id': uid}} bulk_action.extend([action, revise_item]) iter_count += 1 if iter_count % 1000 == 0: es.bulk(bulk_action, index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX) bulk_action = [] print iter_count if int(scan_cursor) == 0: break if bulk_action: es.bulk(bulk_action, index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX) print iter_count
def main(): if RUN_TYPE: now_ts = time.time()-DAY # 前一天 ts = str(datetime2ts(ts2datetime(now_ts))) else: ts = str(datetime2ts('2013-09-07')) now_ts = int(ts) sensitive_string = "sensitive_" + ts date_string = ts update_sensitive_key = "sensitive_score_" + ts # 更新的键 sensitive_dict_key = "sensitive_dict_" + ts sensitive_string_key = "sensitive_string_" + ts sensitive_day_change_key = "sensitive_" + ts +"_day_change" del_month = datetime2ts(ts2datetime(now_ts - MONTH)) del_sensitive_key = "sensitive_score_"+str(del_month) # 要删除的键 former_ts = int(ts) - DAY former_date = str(datetime2ts(ts2datetime(former_ts))) former_sensitive_key = "sensitive_score_" + former_date iter_count = 0 bulk_action = [] mappings(ES_SENSITIVE_INDEX) total_number = r.hlen(sensitive_string) scan_cursor = 0 print total_number while 1: re_scan = r.hscan(sensitive_string, scan_cursor, count=1000) scan_cursor = re_scan[0] if len(re_scan[1]) != 0: sensitive_info = re_scan[1] # 字典形式,uid:sensitive_words_dict uid_list = sensitive_info.keys() sensitive_results = es.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={"ids":uid_list})['docs'] if sensitive_results: for item in sensitive_results: uid = item['_id'] sensitive_words_dict = json.loads(sensitive_info[uid]) # json.loads current_sensitive_score = 0 for k,v in sensitive_words_dict.iteritems(): tmp_stage = r_sensitive.hget("sensitive_words", k) if tmp_stage: current_sensitive_score += v*sensitive_score_dict[str(tmp_stage)] if item['found']: # 之前存在相关信息 revise_item = item["_source"] if del_sensitive_key in revise_item: item.pop(del_sensitive_key) revise_item['uid'] = uid # 新更新的敏感度 revise_item[update_sensitive_key] = current_sensitive_score # 新更新的敏感词 revise_item[sensitive_dict_key] = sensitive_info[uid] # 新更新的string revise_item[sensitive_string_key] = "&".join(sensitive_words_dict.keys()) # 当天和之前一天、一周和一月均值的差异 revise_item['sensitive_day_change'] = current_sensitive_score - revise_item.get(former_sensitive_key, 0) revise_item['sensitive_week_change'] = current_sensitive_score - revise_item.get('sensitive_week_ave', 0) revise_item['sensitive_month_change'] = current_sensitive_score - revise_item.get('sensitive_month_ave', 0) # 更新后week、month的均值和方差 revise_item['sensitive_week_ave'], revise_item['sensitive_week_var'], revise_item['sensitive_week_sum'] = compute_week(revise_item, now_ts) revise_item['senstiive_month_ave'], revise_item['sensitive_month_var'], revise_item['sensitive_month_sum'] = compute_month(revise_item, now_ts) else: revise_item = dict() revise_item['uid'] = uid revise_item[update_sensitive_key] = current_sensitive_score revise_item[sensitive_dict_key] = sensitive_info[uid] revise_item[sensitive_string_key] = "&".join(sensitive_words_dict.keys()) revise_item['sensitive_day_change'] = current_sensitive_score revise_item['sensitive_week_change'] = current_sensitive_score revise_item['sensitive_month_change'] = current_sensitive_score revise_item['sensitive_week_ave'], revise_item['sensitive_week_var'], revise_item['sensitive_week_sum'] = compute_week(revise_item, now_ts) revise_item['senstiive_month_ave'], revise_item['sensitive_month_var'], revise_item['sensitive_month_sum'] = compute_month(revise_item, now_ts) action = {'index':{'_id': uid}} bulk_action.extend([action, revise_item]) iter_count += 1 if iter_count % 1000 == 0: es.bulk(bulk_action, index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX) bulk_action = [] print iter_count if int(scan_cursor) == 0: break if bulk_action: es.bulk(bulk_action, index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX)