def __init__(self, data_name, hidden_dim=256, seed=0, learning_rate=3e-4, batch_size=128, training_ratio=0.8, validation_ratio=0.1, max_epochs=100): np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) use_cuda = torch.cuda.is_available() # if data_name == 'shuttle' self.device = torch.device("cuda" if use_cuda else "cpu") self.result_path = "./results/{}/0.0/RobustRealNVP/{}/".format( data_name, seed) data_path = "./data/" + data_name + ".npy" self.model_save_path = "./trained_model/{}/RobustRealNVP/{}/".format( data_name, seed) os.makedirs(self.model_save_path, exist_ok=True) self.learning_rate = learning_rate # self.dataset = RealGraphDataset(data_path, missing_ratio=0, radius=2) self.dataset = RealDataset(data_path, missing_ratio=0) self.seed = seed self.hidden_dim = hidden_dim self.max_epochs = max_epochs self.data_path = data_path self.data_anomaly_ratio = self.dataset.__anomalyratio__() self.batch_size = batch_size self.input_dim = self.dataset.__dim__() self.data_normaly_ratio = 1 - self.data_anomaly_ratio n_sample = self.dataset.__len__() self.n_train = int(n_sample * training_ratio) self.n_test = n_sample - self.n_train print('|data dimension: {}|data noise ratio:{}'.format( self.dataset.__dim__(), self.data_anomaly_ratio)) training_data, testing_data = data.random_split( dataset=self.dataset, lengths=[self.n_train, self.n_test]) self.training_loader = data.DataLoader(training_data, batch_size=batch_size, shuffle=True, drop_last=True) self.testing_loader = data.DataLoader(testing_data, batch_size=self.n_test, shuffle=False) self.ae = None self.discriminator = None self.build_model() self.print_network()
def __init__( self, data_name, seed=0, learning_rate=1e-3, training_ratio=0.8, validation_ratio=0.1, missing_ratio=0.5, ): # Data loader # read data here np.random.seed(seed) data_path = "./data/" + data_name + ".npy" self.result_path = "./results/{}/{}/LOF/{}/".format(data_name, missing_ratio, seed) self.learning_rate = learning_rate self.dataset = RealDataset(data_path, missing_ratio=missing_ratio) self.seed = seed self.data_path = data_path self.data_anomaly_ratio = self.dataset.__anomalyratio__() self.input_dim = self.dataset.__dim__() self.data_normaly_ratio = 1 - self.data_anomaly_ratio n_sample = self.dataset.__len__() self.n_train = int(n_sample * training_ratio) self.n_validation = int(n_sample * validation_ratio) self.n_test = n_sample - self.n_train - self.n_validation self.best_model = None if missing_ratio == 0.0: self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( self.dataset.x, self.dataset.y, test_size=1 - config.training_ratio - config.validation_ratio, random_state=seed, ) if missing_ratio > 0.0: # TODO: impute x = self.dataset.x m = self.dataset.m x_with_missing = x x_with_missing[m == 0] = np.nan imputer = KNNImputer(n_neighbors=2) x = imputer.fit_transform(x_with_missing) self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( x, self.dataset.y, test_size=1 - config.training_ratio - config.validation_ratio, random_state=seed, ) print( "|data dimension: {}|data noise ratio:{}".format( self.dataset.__dim__(), self.data_anomaly_ratio ) )
def __init__( self, data_name, seed=0, learning_rate=1e-3, training_ratio=0.8, validation_ratio=0.1, missing_ratio=0.5, max_epochs=100, z_dim=10, batch_size=64, ): # Data loader # read data here self.max_epochs = max_epochs self.z_dim = z_dim self.batch_size = batch_size np.random.seed(seed) data_path = "./data/" + data_name + ".npy" self.result_path = "./results/{}/{}/SO_GAAL/{}/".format( data_name, missing_ratio, seed ) self.learning_rate = learning_rate self.dataset = RealDataset(data_path, missing_ratio=missing_ratio) self.seed = seed self.data_path = data_path self.data_anomaly_ratio = self.dataset.__anomalyratio__() self.input_dim = self.dataset.__dim__() self.data_normaly_ratio = 1 - self.data_anomaly_ratio n_sample = self.dataset.__len__() self.n_train = int(n_sample * training_ratio) self.n_validation = int(n_sample * validation_ratio) self.n_test = n_sample - self.n_train - self.n_validation self.best_model = None self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( self.dataset.x, self.dataset.y, test_size=1 - config.training_ratio - config.validation_ratio, random_state=seed, ) print( "|data dimension: {}|data noise ratio:{}".format( self.dataset.__dim__(), self.data_anomaly_ratio ) )
def __init__(self, data_name, start_ratio=0.0, decay_ratio=0.01, hidden_dim=128, z_dim=10, seed=0, learning_rate=1e-3, batch_size=128, training_ratio=0.8, validation_ratio=0.1, max_epochs=100, coteaching=0.0, knn_impute=False, missing_ratio=0.0): # Data loader # read data here np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) use_cuda = torch.cuda.is_available() self.data_name = data_name self.device = torch.device("cuda" if use_cuda else "cpu") data_path = "./data/" + data_name + ".npy" self.model_save_path = "./trained_model/{}/{}/SVDD/{}/".format( data_name, missing_ratio, seed) self.result_path = "./results/{}/{}/SVDD/{}/".format( data_name, missing_ratio, seed) os.makedirs(self.model_save_path, exist_ok=True) self.learning_rate = learning_rate self.missing_ratio = missing_ratio self.dataset = RealDataset(data_path, missing_ratio=self.missing_ratio, knn_impute=knn_impute) self.seed = seed self.start_ratio = start_ratio self.decay_ratio = decay_ratio self.hidden_dim = hidden_dim self.z_dim = z_dim self.max_epochs = max_epochs self.coteaching = coteaching self.data_path = data_path self.data_anomaly_ratio = self.dataset.__anomalyratio__() self.input_dim = self.dataset.__dim__() self.data_normaly_ratio = 1 - self.data_anomaly_ratio n_sample = self.dataset.__len__() self.n_train = int(n_sample * (training_ratio + validation_ratio)) # self.n_validation = int(n_sample * validation_ratio) self.n_test = n_sample - self.n_train print("|data dimension: {}|data noise ratio:{}".format( self.dataset.__dim__(), self.data_anomaly_ratio)) self.decay_ratio = abs(self.start_ratio - (1 - self.data_anomaly_ratio)) / ( self.max_epochs / 2) training_data, testing_data = data.random_split( dataset=self.dataset, lengths=[self.n_train, self.n_test]) self.training_loader = data.DataLoader(training_data, batch_size=batch_size, shuffle=True) self.testing_loader = data.DataLoader(testing_data, batch_size=self.n_test, shuffle=False) self.ae = None self.discriminator = None self.build_model() self.print_network()
def __init__( self, data_name, hidden_dim=128, # number of hidden neurons in RCA z_dim=10, # bottleneck dimension seed=0, # random seed learning_rate=1e-3, # learning rate batch_size=128, # batchsize training_ratio=0.8, # training data percentage max_epochs=100, # training epochs coteaching=1.0, # whether selects sample based on loss value oe=0.0, # how much we overestimate the ground-truth anomaly ratio missing_ratio=0.0, # missing ratio in the data ): # Data loader # read data here np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) use_cuda = torch.cuda.is_available() self.data_name = data_name self.device = torch.device("cuda" if use_cuda else "cpu") data_path = "./data/" + data_name + ".npy" self.missing_ratio = missing_ratio self.model_save_path = "./trained_model/{}/{}/RCA/{}/".format( data_name, missing_ratio, seed ) if oe == 0.0: self.result_path = "./results/{}/{}/RCA/{}/".format( data_name, missing_ratio, seed ) else: self.result_path = "./results/{}/{}/RCA_{}/{}/".format( data_name, missing_ratio, oe, seed ) os.makedirs(self.model_save_path, exist_ok=True) self.learning_rate = learning_rate self.dataset = RealDataset( data_path, missing_ratio=self.missing_ratio ) self.seed = seed self.hidden_dim = hidden_dim self.z_dim = z_dim self.max_epochs = max_epochs self.coteaching = coteaching self.beta = 0.0 # initially, select all data self.alpha = 0.5 self.data_path = data_path self.data_anomaly_ratio = self.dataset.__anomalyratio__() + oe self.input_dim = self.dataset.__dim__() self.data_normaly_ratio = 1 - self.data_anomaly_ratio n_sample = self.dataset.__len__() self.n_train = int(n_sample * (training_ratio)) self.n_test = n_sample - self.n_train print( "|data dimension: {}|data noise ratio:{}".format( self.dataset.__dim__(), self.data_anomaly_ratio ) ) self.decay_ratio = abs(self.beta - (1 - self.data_anomaly_ratio)) / ( self.max_epochs / 2 ) training_data, testing_data = data.random_split( dataset=self.dataset, lengths=[self.n_train, self.n_test] ) self.training_loader = data.DataLoader( training_data, batch_size=batch_size, shuffle=True ) self.testing_loader = data.DataLoader( testing_data, batch_size=self.n_test, shuffle=False ) self.ae = None self.discriminator = None self.build_model() self.print_network()
def __init__(self, data_name, lambda_energy=0.1, lambda_cov_diag=0.005, hidden_dim=128, z_dim=10, seed=0, learning_rate=1e-3, gmm_k=2, batch_size=128, training_ratio=0.8, validation_ratio=0.1, max_epochs=100, missing_ratio=0.0): # Data loader self.gmm_k = gmm_k self.lambda_energy = lambda_energy self.lambda_cov_diag = lambda_cov_diag # read data here np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) use_cuda = torch.cuda.is_available() self.device = torch.device("cuda" if use_cuda else "cpu") data_path = "./data/" + data_name + ".npy" self.model_save_path = "./trained_model/{}/{}/DAGMM/{}/".format( data_name, missing_ratio, seed) self.result_path = "./results/{}/{}/DAGMM/{}/".format( data_name, missing_ratio, seed) os.makedirs(self.model_save_path, exist_ok=True) self.learning_rate = learning_rate self.missing_ratio = missing_ratio self.dataset = RealDataset(data_path, missing_ratio=self.missing_ratio) self.seed = seed self.hidden_dim = hidden_dim self.z_dim = z_dim self.max_epochs = max_epochs self.data_path = data_path self.data_anomaly_ratio = self.dataset.__anomalyratio__() self.input_dim = self.dataset.__dim__() self.data_normaly_ratio = 1 - self.data_anomaly_ratio n_sample = self.dataset.__len__() self.n_train = int(n_sample * (training_ratio + validation_ratio)) # self.n_validation = int(n_sample * validation_ratio) self.n_test = n_sample - self.n_train print('|data dimension: {}|data noise ratio:{}'.format( self.dataset.__dim__(), self.data_anomaly_ratio)) training_data, testing_data = data.random_split( dataset=self.dataset, lengths=[self.n_train, self.n_test]) self.training_loader = data.DataLoader(training_data, batch_size=batch_size, shuffle=True) # self.validation_loader = data.DataLoader(validation_data, batch_size=self.n_validation, shuffle=False) self.testing_loader = data.DataLoader(testing_data, batch_size=self.n_test, shuffle=False) self.build_model() self.print_network()