def run_data_autoencoder(self): """ Run all the data and create an auto encoder :return: """ show_struct = True for data in self.data_list: print( "----------------------------------All Data: %s----------------------------------" % data.name) print("___________________________________\nData Set: ", data.name) data.df = (data.df - data.df.mean()) / ( data.df.max() - data.df.min()) # normalize data.split_data(data_frame=data.df) # sets test and train data if data.name is "Segmentation": auto = AutoEncoder(3, False, [ data.df.shape[1] - 1, data.df.shape[1] - 3, data.df.shape[1] - 1 ], data.train_df.shape[1], 0.01, 0.45) # set hyperparameters else: auto = AutoEncoder(3, False, [ data.df.shape[1] - 1, data.df.shape[1] - 3, data.df.shape[1] - 1 ], data.train_df.shape[1], 0.01, 0.45) # set hyperparameters auto.fit_auto_encoder(data.train_df) # run with stack if show_struct: auto.print_layer_neuron_data() # print the network show_struct = False auto.test(data.test_df) # test network
def test_stack_encoder_structure(self): data = Data('abalone', pd.read_csv(r'data/abalone.data', header=None), 8, False) # load data # data.df = data.df.sample(n= 500) # minimal data frame data.df = (data.df - data.df.mean()) / (data.df.max() - data.df.min()) data.split_data(data_frame=data.df) # sets test and train data auto = AutoEncoder(3, False, [7, 5, 7], data.train_df.shape[1], 0.03, 0.45) auto.fit_stacked_auto_encoder(data.train_df) auto.print_layer_neuron_data() auto.test(data.test_df)
def Compute_AUC_RE(data, train_X, test_X, actual, h_size, epoch, k_log, l_rate, bw, exp): #******************* Call autoencoder ********************** i_size = train_X.shape[1] AE = AutoEncoder(input_size = i_size, hidden_size = h_size, n_epochs = epoch, learning_rate = l_rate, K = k_log) AE.fit(train_X) #get reconstruction of train_X and test_X, get hidden data of train_X and test_X output_train = AE.get_output(train_X) output_test = AE.get_output(test_X) train_hidden = AE.get_hidden(train_X) test_hidden = AE.get_hidden(test_X) #*************** RE-based one-class classifier *************" RE_MSE = (((test_X - output_test[0])**2).mean(1)) """RE (MSE) between output and input is used as anomalous score. We put minus "-" to RE_MSE to for computing FPR and TPR using roc_curve""" predictions_auto = -RE_MSE FPR_ae, TPR_ae, thresholds_auto = roc_curve(actual, predictions_auto) auc_ae = auc(FPR_ae, TPR_ae) #***************** Centroid on hidden data *****************" CEN = CentroidBasedOneClassClassifier() CEN.fit(train_hidden[0]) predictions_cen = -CEN.get_density(test_hidden[0]) FPR_cen, TPR_cen, thresholds_cen = roc_curve(actual, predictions_cen) auc_cen = auc(FPR_cen, TPR_cen) #****************** KDE on hidden layer *****************" KDE = DensityBasedOneClassClassifier(bandwidth = bw, kernel="gaussian", metric="euclidean") KDE.fit(train_hidden[0]) predictions_kde = KDE.get_density(test_hidden[0]) FPR_kde, TPR_kde, thresholds_kde = roc_curve(actual, predictions_kde) auc_kde = auc(FPR_kde, TPR_kde) #************ RE (MEAN-MSE) on training set *************" RE = (((train_X - output_train)**2).mean(1)).mean() if (exp == "ME"): #main experiment Plotting_AUC(FPR_ae, TPR_ae, auc_ae, FPR_cen, TPR_cen, auc_cen, FPR_kde, TPR_kde, auc_kde, data) elif (exp == "HD"): #investigate hidden size #Save hidden data to csv file" np.savetxt("Results/Hidden_data/" + data + "_train_" + str(k_log) + ".csv", train_hidden[0], delimiter=",",fmt='%f') np.savetxt("Results/Hidden_data/" + data + "_test_" + str(k_log) + ".csv", test_hidden[0], delimiter=",",fmt='%f') return train_hidden[0], test_hidden[0] return auc_ae, auc_cen, auc_kde, RE
def test_auto_encoder_structure(self): """ Test the layers in the auto encoder Test format of structures by visual as well Test traversing forwards and backwards. Visual check as well :return: None """ data = Data('abalone', pd.read_csv(r'data/abalone.data', header=None), 8, False) # load data df = data.df.sample(n=100) # minimal data frame data.split_data(data_frame=df) # sets test and train data auto = AutoEncoder(1, False, [3, 2, 3], df.shape[1], 0.2, 0.45) auto.fit_auto_encoder(data_obj=data) auto.print_layer_neuron_data() auto.test(data.test_df) """Structure is good to go""" current = auto.output_layer while True: print(current.no_of_nodes) if current is auto.input_layer: break current = current.get_previous_layer() """traversing is good; from printing above (forwards) and right above (backwards)"""
from Autoencoder import AutoEncoder import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.autograd import Variable from thermometerEncoder import ThermometerEncoder train_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), ])), batch_size=100, shuffle=True) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), ])), batch_size=100, shuffle=True) dat = train_loader.dataset[0][0].cpu().numpy() Tfit = ThermometerEncoder(dat, 15) Z = Tfit.quantization() oneEnc = Tfit.OneHotEncode() print(Z) print(oneEnc) NormalAutoencoder= AutoEncoder() AdversarialAutoencoder = AutoEncoder()
def Compute_AUC_RE(data, train_X, test_X, actual, h_size, epoch, k_log, l_rate, bw, exp): #******************* Call autoencoder ********************** i_size = train_X.shape[1] AE = AutoEncoder(input_size=i_size, hidden_size=h_size, n_epochs=epoch, learning_rate=l_rate, K=k_log) AE.fit(train_X) #get reconstruction of train_X and test_X, get hidden data of train_X and test_X output_train = AE.get_output(train_X) output_test = AE.get_output(test_X) train_hidden = AE.get_hidden(train_X) test_hidden = AE.get_hidden(test_X) #*************** RE-based one-class classifier *************" RE_MSE = (((test_X - output_test[0])**2).mean(1)) """RE (MSE) between output and input is used as anomalous score. We put minus "-" to RE_MSE to for computing FPR and TPR using roc_curve""" predictions_auto = -RE_MSE FPR_ae, TPR_ae, thresholds_auto = roc_curve(actual, predictions_auto) auc_ae = auc(FPR_ae, TPR_ae) #***************** Centroid on hidden data *****************" CEN = CentroidBasedOneClassClassifier() CEN.fit(train_hidden[0]) predictions_cen = -CEN.get_density(test_hidden[0]) FPR_cen, TPR_cen, thresholds_cen = roc_curve(actual, predictions_cen) auc_cen = auc(FPR_cen, TPR_cen) #****************** KDE on hidden layer *****************" KDE = DensityBasedOneClassClassifier(bandwidth=bw, kernel="gaussian", metric="euclidean") KDE.fit(train_hidden[0]) predictions_kde = KDE.get_density(test_hidden[0]) FPR_kde, TPR_kde, thresholds_kde = roc_curve(actual, predictions_kde) auc_kde = auc(FPR_kde, TPR_kde) #************ RE (MEAN-MSE) on training set *************" RE = (((train_X - output_train)**2).mean(1)).mean() if (exp == "ME"): #main experiment Plotting_AUC(FPR_ae, TPR_ae, auc_ae, FPR_cen, TPR_cen, auc_cen, FPR_kde, TPR_kde, auc_kde, data) elif (exp == "HD"): #investigate hidden size #Save hidden data to csv file" np.savetxt("Results/Hidden_data/" + data + "_train_" + str(k_log) + ".csv", train_hidden[0], delimiter=",", fmt='%f') np.savetxt("Results/Hidden_data/" + data + "_test_" + str(k_log) + ".csv", test_hidden[0], delimiter=",", fmt='%f') return train_hidden[0], test_hidden[0] return auc_ae, auc_cen, auc_kde, RE
transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]) testset = torchvision.datasets.MNIST(root='../data', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) # Loading the model autoencoder = AutoEncoder() autoencoder.load_state_dict(torch.load("../model-states/AE-[25-Epochs]")) autoencoder.to(device) # Extract images and labes from testloader images, labels = next(iter(testloader)) # Generate images without grad attribute with torch.no_grad(): images = images.to(device) encoded_data, decoded = autoencoder(images) # Helper function to show images and their corresponding generated ones def imshow(inputs, labels, outputs): # create subplots