def load_dataset_path(self): path = '../log/hybrid/' + Cfg.dataset + '/seed_' + str(Cfg.seed) print("Loading data from path: ...", path) self._X_train = np.loadtxt(path + "/repsTrain_ae.txt", delimiter=',') self._y_train = np.zeros(len(self._X_train), dtype=np.uint8) self._X_val = np.loadtxt(path + "/repsVal_ae.txt", delimiter=',') self._y_val = np.zeros(len(self._X_val), dtype=np.uint8) self.n_train = len(self._y_train) self.n_val = len(self._y_val) # Adjust number of batches Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size)) # test set normal = eval(Cfg.mnist_normal) outliers = eval(Cfg.mnist_outlier) X_test = np.loadtxt(path + "/repsTest_ae.txt", delimiter=',') y_test = np.loadtxt(path + "/ltest_ae.txt", delimiter=',') X_norm, X_out, y_norm, y_out, idx_norm, idx_out = extract_norm_and_out( X_test, y_test, normal=normal, outlier=outliers) #zahra yo_norm = y_test[idx_norm] yo_out = y_test[idx_out] self._yo_test = np.append(yo_norm, yo_out) self._X_test = np.concatenate((X_norm, X_out)) self._y_test = np.append(y_norm, y_out) perm_test = np.random.permutation(len(self._y_test)) self._X_test = self._X_test[perm_test] self._y_test = self._y_test[perm_test] self._yo_test = self._yo_test[perm_test] self.n_test = len(self._y_test)
def load_data(self, original_scale=False): print("Loading data...") # load training data X, y = [], [] count = 1 filename = '%s/data_batch_%i' % (self.data_path, count) while os.path.exists(filename): with open(filename, 'rb') as f: batch = pickle.load(f) X.append(batch['data']) y.append(batch['labels']) count += 1 filename = '%s/data_batch_%i' % (self.data_path, count) # reshape data and cast them properly X = np.concatenate(X).reshape(-1, 3, 32, 32).astype(np.float32) y = np.concatenate(y).astype(np.int32) # load test set path = '%s/test_batch' % self.data_path with open(path, 'rb') as f: batch = pickle.load(f) # reshaping and casting for test data X_test = batch['data'].reshape(-1, 3, 32, 32).astype(np.float32) y_test = np.array(batch['labels'], dtype=np.int32) if Cfg.ad_experiment: normal = eval(Cfg.cifar10_normal) outliers = eval(Cfg.cifar10_outlier) # extract normal and anomalous class X_norm, X_out, y_norm, y_out, _, _ = extract_norm_and_out( X, y, normal=normal, outlier=outliers) # reduce outliers to fraction defined n_norm = len(y_norm) n_out = int(np.ceil(Cfg.out_frac * n_norm / (1 - Cfg.out_frac))) # shuffle to obtain random validation splits np.random.seed(self.seed) perm_norm = np.random.permutation(len(y_norm)) perm_out = np.random.permutation(len(y_out)) # split into training and validation set n_norm_split = int(Cfg.cifar10_val_frac * n_norm) n_out_split = int(Cfg.cifar10_val_frac * n_out) self._X_train = np.concatenate( (X_norm[perm_norm[n_norm_split:]], X_out[perm_out[:n_out][n_out_split:]])) self._y_train = np.append(y_norm[perm_norm[n_norm_split:]], y_out[perm_out[:n_out][n_out_split:]]) self._X_val = np.concatenate( (X_norm[perm_norm[:n_norm_split]], X_out[perm_out[:n_out][:n_out_split]])) self._y_val = np.append(y_norm[perm_norm[:n_norm_split]], y_out[perm_out[:n_out][:n_out_split]]) # shuffle data (since batches are extracted block-wise) self.n_train = len(self._y_train) self.n_val = len(self._y_val) perm_train = np.random.permutation(self.n_train) perm_val = np.random.permutation(self.n_val) self._X_train = self._X_train[perm_train] self._y_train = self._y_train[perm_train] self._X_val = self._X_val[perm_val] self._y_val = self._y_val[perm_val] # Subset train set such that we only get batches of the same size self.n_train = (self.n_train / Cfg.batch_size) * Cfg.batch_size subset = np.random.choice(len(self._X_train), self.n_train, replace=False) self._X_train = self._X_train[subset] self._y_train = self._y_train[subset] # Adjust number of batches Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size)) # test set X_norm, X_out, y_norm, y_out, idx_norm, idx_out = extract_norm_and_out( X_test, y_test, normal=normal, outlier=outliers) # store original test labels for visualisation yo_norm = y_test[idx_norm] yo_out = y_test[idx_out] self._yo_test = np.append(yo_norm, yo_out) self._X_test = np.concatenate((X_norm, X_out)) self._y_test = np.append(y_norm, y_out) perm_test = np.random.permutation(len(self._y_test)) self._X_test = self._X_test[perm_test] self._y_test = self._y_test[perm_test] self._yo_test = self._yo_test[perm_test] self.n_test = len(self._y_test) else: # split into training and validation sets with stored seed np.random.seed(self.seed) perm = np.random.permutation(len(X)) self._X_train = X[perm[self.n_val:]] self._y_train = y[perm[self.n_val:]] self._X_val = X[perm[:self.n_val]] self._y_val = y[perm[:self.n_val]] self._X_test = X_test self._y_test = y_test # normalize data (if original scale should not be preserved) if not original_scale: # simple rescaling to [0,1] normalize_data(self._X_train, self._X_val, self._X_test, scale=np.float32(255)) # global contrast normalization if Cfg.gcn: global_contrast_normalization(self._X_train, self._X_val, self._X_test, scale=Cfg.unit_norm_used) # ZCA whitening if Cfg.zca_whitening: self._X_train, self._X_val, self._X_test = zca_whitening( self._X_train, self._X_val, self._X_test) # rescale to [0,1] (w.r.t. min and max in train data) rescale_to_unit_interval(self._X_train, self._X_val, self._X_test) # PCA if Cfg.pca: self._X_train, self._X_val, self._X_test = pca( self._X_train, self._X_val, self._X_test, 0.95) flush_last_line() print("Data loaded.")
def load_data(self, original_scale=False): print("[INFO ]: ", "Please wait while ", self.dataset_name, " data is being loaded...") [X, y] = load_lhc_train_images(self.data_path) [X_test, y_test] = load_lhc_test_images(self.data_path) if Cfg.ad_experiment: # set normal and anomalous class normal = [1] outliers = [0] # extract normal and anomalous class X_norm, X_out, y_norm, y_out = extract_norm_and_out( X, y, normal=normal, outlier=outliers) # reduce outliers to fraction defined n_norm = len(y_norm) n_out = int( np.ceil( float(Cfg.out_frac) * n_norm / (1 - float(Cfg.out_frac)))) # shuffle to obtain random validation splits np.random.seed(self.seed) perm_norm = np.random.permutation(len(y_norm)) perm_out = np.random.permutation(len(y_out)) # split into training and validation set n_norm_split = int(Cfg.lhc_val_frac * n_norm) n_out_split = int(Cfg.lhc_val_frac * n_out) self._X_train = np.concatenate( (X_norm[perm_norm[n_norm_split:]], X_out[perm_out[:n_out][n_out_split:]])) self._y_train = np.append(y_norm[perm_norm[n_norm_split:]], y_out[perm_out[:n_out][n_out_split:]]) self._X_val = np.concatenate( (X_norm[perm_norm[:n_norm_split]], X_out[perm_out[:n_out][:n_out_split]])) self._y_val = np.append(y_norm[perm_norm[:n_norm_split]], y_out[perm_out[:n_out][:n_out_split]]) # shuffle data (since batches are extracted block-wise) self.n_train = len(self._y_train) self.n_val = len(self._y_val) perm_train = np.random.permutation(self.n_train) perm_val = np.random.permutation(self.n_val) self._X_train = self._X_train[perm_train] self._y_train = self._y_train[perm_train] self._X_val = self._X_train[perm_val] self._y_val = self._y_train[perm_val] # Subset train set such that we only get batches of the same size self.n_train = (self.n_train / Cfg.batch_size) * Cfg.batch_size subset = np.random.choice(len(self._X_train), int(self.n_train), replace=False) self._X_train = self._X_train[subset] self._y_train = self._y_train[subset] # Adjust number of batches Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size)) # test set X_norm, X_out, y_norm, y_out = extract_norm_and_out( X_test, y_test, normal=normal, outlier=outliers) self._X_test = np.concatenate((X_norm, X_out)) self._y_test = np.append(y_norm, y_out) perm_test = np.random.permutation(len(self._y_test)) self._X_test = self._X_test[perm_test] self._y_test = self._y_test[perm_test] self.n_test = len(self._y_test) else: # split into training, validation, and test sets np.random.seed(self.seed) perm = np.random.permutation(len(X)) self._X_train = X[perm[self.n_val:]] self._y_train = y[perm[self.n_val:]] self._X_val = X[perm[:self.n_val]] self._y_val = y[perm[:self.n_val]] self._X_test = X_test self._y_test = y_test # normalize data (if original scale should not be preserved) if not original_scale: # simple rescaling to [0,1] normalize_data(self._X_train, self._X_val, self._X_test, scale=np.float32(255)) # global contrast normalization if Cfg.gcn: global_contrast_normalization(self._X_train, self._X_val, self._X_test, scale=Cfg.unit_norm_used) # ZCA whitening if Cfg.zca_whitening: self._X_train, self._X_val, self._X_test = zca_whitening( self._X_train, self._X_val, self._X_test) # rescale to [0,1] (w.r.t. min and max in train data) rescale_to_unit_interval(self._X_train, self._X_val, self._X_test) # PCA if Cfg.pca: self._X_train, self._X_val, self._X_test = pca( self._X_train, self._X_val, self._X_test, 0.95) flush_last_line() print("[INFO] : Data loaded.")
def load_data(self, original_scale=False): print("Loading data...") X = load_mnist_images('%strain-images-idx3-ubyte.gz' % self.data_path) y = load_mnist_labels('%strain-labels-idx1-ubyte.gz' % self.data_path) X_test = load_mnist_images('%st10k-images-idx3-ubyte.gz' % self.data_path) y_test = load_mnist_labels('%st10k-labels-idx1-ubyte.gz' % self.data_path) if Cfg.ad_experiment: # set normal and anomalous class normal = [] outliers = [] if Cfg.mnist_normal == -1: normal = list(range(0, 10)) normal.remove(Cfg.mnist_outlier) else: normal.append(Cfg.mnist_normal) if Cfg.mnist_outlier == -1: outliers = list(range(0, 10)) outliers.remove(Cfg.mnist_normal) else: outliers.append(Cfg.mnist_outlier) # extract normal and anomalous class X_norm, X_out, y_norm, y_out = extract_norm_and_out( X, y, normal=normal, outlier=outliers) # reduce outliers to fraction defined n_norm = len(y_norm) n_out = int(np.ceil(Cfg.out_frac * n_norm / (1 - Cfg.out_frac))) # shuffle to obtain random validation splits np.random.seed(self.seed) perm_norm = np.random.permutation(len(y_norm)) perm_out = np.random.permutation(len(y_out)) # split into training and validation set n_norm_split = int(Cfg.mnist_val_frac * n_norm) n_out_split = int(Cfg.mnist_val_frac * n_out) self._X_train = np.concatenate( (X_norm[perm_norm[n_norm_split:]], X_out[perm_out[:n_out][n_out_split:]])) self._y_train = np.append(y_norm[perm_norm[n_norm_split:]], y_out[perm_out[:n_out][n_out_split:]]) self._X_val = np.concatenate( (X_norm[perm_norm[:n_norm_split]], X_out[perm_out[:n_out][:n_out_split]])) self._y_val = np.append(y_norm[perm_norm[:n_norm_split]], y_out[perm_out[:n_out][:n_out_split]]) # shuffle data (since batches are extracted block-wise) self.n_train = len(self._y_train) self.n_val = len(self._y_val) perm_train = np.random.permutation(self.n_train) perm_val = np.random.permutation(self.n_val) self._X_train = self._X_train[perm_train] self._y_train = self._y_train[perm_train] self._X_val = self._X_train[perm_val] self._y_val = self._y_train[perm_val] # Subset train set such that we only get batches of the same size self.n_train = (self.n_train / Cfg.batch_size) * Cfg.batch_size subset = np.random.choice(len(self._X_train), int(self.n_train), replace=False) self._X_train = self._X_train[subset] self._y_train = self._y_train[subset] # Adjust number of batches Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size)) # test set X_norm, X_out, y_norm, y_out = extract_norm_and_out( X_test, y_test, normal=normal, outlier=outliers) self._X_test = np.concatenate((X_norm, X_out)) self._y_test = np.append(y_norm, y_out) perm_test = np.random.permutation(len(self._y_test)) self._X_test = self._X_test[perm_test] self._y_test = self._y_test[perm_test] self.n_test = len(self._y_test) else: # split into training, validation, and test sets np.random.seed(self.seed) perm = np.random.permutation(len(X)) self._X_train = X[perm[self.n_val:]] self._y_train = y[perm[self.n_val:]] self._X_val = X[perm[:self.n_val]] self._y_val = y[perm[:self.n_val]] self._X_test = X_test self._y_test = y_test # normalize data (if original scale should not be preserved) if not original_scale: # simple rescaling to [0,1] normalize_data(self._X_train, self._X_val, self._X_test, scale=np.float32(255)) # global contrast normalization if Cfg.gcn: global_contrast_normalization(self._X_train, self._X_val, self._X_test, scale=Cfg.unit_norm_used) # ZCA whitening if Cfg.zca_whitening: self._X_train, self._X_val, self._X_test = zca_whitening( self._X_train, self._X_val, self._X_test) # rescale to [0,1] (w.r.t. min and max in train data) rescale_to_unit_interval(self._X_train, self._X_val, self._X_test) # PCA if Cfg.pca: self._X_train, self._X_val, self._X_test = pca( self._X_train, self._X_val, self._X_test, 0.95) flush_last_line() print("Data loaded.")
def load_data(self, original_scale=False): print("Loading data...") X, y = load_mobiFall_data('%smobiFall/train_mobiFall' % self.data_path) X_test, y_test = load_mobiFall_data('%smobiFall/test_mobiFall' % self.data_path) if Cfg.ad_experiment: # set normal and anomalous class normal = eval(Cfg.mobiFall_normal) outliers = eval(Cfg.mobiFall_outlier) # extract normal and anomalous class X_norm, X_out, y_norm, y_out, idx_norm, idx_out = extract_norm_and_out( X, y, normal=normal, outlier=outliers) # reduce outliers to fraction defined n_norm = len(y_norm) n_out = int(np.ceil(Cfg.out_frac * n_norm / (1 - Cfg.out_frac))) # shuffle to obtain random validation splits np.random.seed(self.seed) perm_norm = np.random.permutation(len(y_norm)) perm_out = np.random.permutation(len(y_out)) # split into training and validation set n_norm_split = int(Cfg.mobiFall_val_frac * n_norm) n_out_split = int(Cfg.mobiFall_val_frac * n_out) self._X_train = np.concatenate( (X_norm[perm_norm[n_norm_split:]], X_out[perm_out[:n_out][n_out_split:]])) self._y_train = np.append(y_norm[perm_norm[n_norm_split:]], y_out[perm_out[:n_out][n_out_split:]]) self._X_val = np.concatenate( (X_norm[perm_norm[:n_norm_split]], X_out[perm_out[:n_out][:n_out_split]])) self._y_val = np.append(y_norm[perm_norm[:n_norm_split]], y_out[perm_out[:n_out][:n_out_split]]) # shuffle data (since batches are extracted block-wise) self.n_train = len(self._y_train) self.n_val = len(self._y_val) perm_train = np.random.permutation(self.n_train) perm_val = np.random.permutation(self.n_val) self._X_train = self._X_train[perm_train] self._y_train = self._y_train[perm_train] self._X_val = self._X_val[perm_val] self._y_val = self._y_val[perm_val] # Subset train set such that we only get batches of the same size self.n_train = (self.n_train / Cfg.batch_size) * Cfg.batch_size subset = np.random.choice(len(self._X_train), self.n_train, replace=False) self._X_train = self._X_train[subset] self._y_train = self._y_train[subset] # Adjust number of batches Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size)) # test set X_norm, X_out, y_norm, y_out, idx_norm, idx_out = extract_norm_and_out( X_test, y_test, normal=normal, outlier=outliers) yo_norm = y_test[idx_norm] yo_out = y_test[idx_out] self._yo_test = np.append(yo_norm, yo_out) self._X_test = np.concatenate((X_norm, X_out)) self._y_test = np.append(y_norm, y_out) perm_test = np.random.permutation(len(self._y_test)) self._X_test = self._X_test[perm_test] self._y_test = self._y_test[perm_test] self._yo_test = self._yo_test[perm_test] self.n_test = len(self._y_test) else: # split into training, validation, and test sets np.random.seed(self.seed) perm = np.random.permutation(len(X)) self._X_train = X[perm[self.n_val:]] self._y_train = y[perm[self.n_val:]] self._X_val = X[perm[:self.n_val]] self._y_val = y[perm[:self.n_val]] self._X_test = X_test self._y_test = y_test # normalize data (if original scale should not be preserved) if not original_scale: # rescale to [0,1] (w.r.t. min and max in train data) rescale_to_unit_interval(self._X_train, self._X_val, self._X_test) # PCA if Cfg.pca: self._X_train, self._X_val, self._X_test = pca( self._X_train, self._X_val, self._X_test, 0.95) flush_last_line() print("Data loaded.")