def process_unsw(root='/home/naruto/NetLearner'): unsw.generate_dataset(False) raw_X_train = np.load('%s/UNSW/train_dataset.npy' % root) y_train = np.load('%s/UNSW/train_labels.npy' % root) raw_X_test = np.load('%s/UNSW/test_dataset.npy' % root) y_test = np.load('%s/UNSW/test_labels.npy' % root) [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test) permutate_dataset(X_train, y_train) permutate_dataset(X_test, y_test) print('Training set', X_train.shape, y_train.shape) print('Test set', X_test.shape, y_test.shape) return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
def process_unsw(root='SharedAutoEncoder/'): unsw.generate_dataset(one_hot_encode=True, root_dir=root) raw_X_train = np.load(root + 'UNSW/train_dataset.npy') y_train = np.load(root + 'UNSW/train_labels.npy') raw_X_test = np.load(root + 'UNSW/test_dataset.npy') y_test = np.load(root + 'UNSW/test_labels.npy') [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test) permutate_dataset(X_train, y_train) permutate_dataset(X_test, y_test) print('Training set', X_train.shape, y_train.shape) print('Test set', X_test.shape, y_test.shape) return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
def process_unsw(): unsw.generate_dataset(True) raw_X_train = np.load('UNSW/train_dataset.npy') y_train = np.load('UNSW/train_labels.npy') raw_X_test = np.load('UNSW/test_dataset.npy') y_test = np.load('UNSW/test_labels.npy') [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test) permutate_dataset(X_train, y_train) permutate_dataset(X_test, y_test) print('Training set', X_train.shape, y_train.shape) print('Test set', X_test.shape, y_test.shape) return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
def load_unsw_dataset(): unsw.generate_dataset(False, True, model_dir) raw_train_dataset = np.load(data_dir + 'train_dataset.npy') train_labels = np.load(data_dir + 'train_labels.npy') raw_test_dataset = np.load(data_dir + 'test_dataset.npy') test_labels = np.load(data_dir + 'test_labels.npy') # train_dataset, valid_dataset, test_dataset = min_max_normalize( # raw_train_dataset, raw_valid_dataset, raw_test_dataset) # print('Min-Max normalizing dataset') train_dataset, _, test_dataset = standard_scale(raw_train_dataset, None, raw_test_dataset) print('Mean normalizing dataset') print('Training set', train_dataset.shape, train_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) return (train_dataset, train_labels, test_dataset, test_labels)
from __future__ import print_function import numpy as np import tensorflow as tf from preprocess.unsw import generate_dataset from netlearner.utils import hyperparameter_summary from netlearner.utils import augment_quantiled, permutate_dataset from netlearner.multilayer_perceptron import MultilayerPerceptron generate_dataset(True) raw_train_dataset = np.load('UNSW/train_dataset.npy') train_labels = np.load('UNSW/train_labels.npy') raw_valid_dataset = np.load('UNSW/valid_dataset.npy') valid_labels = np.load('UNSW/valid_labels.npy') raw_test_dataset = np.load('UNSW/test_dataset.npy') test_labels = np.load('UNSW/test_labels.npy') columns = np.array(range(1, 6) + range(8, 16) + range(17, 19) + range(23, 25) + [26]) [train_dataset, valid_dataset, test_dataset] = augment_quantiled( raw_train_dataset, raw_valid_dataset, raw_test_dataset, columns) permutate_dataset(train_dataset, train_labels) print('Training set', train_dataset.shape, train_labels.shape) print('Validation set', valid_dataset.shape, valid_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) num_samples, feature_size = train_dataset.shape num_labels = train_labels.shape[1] batch_size = 80 keep_prob = 0.80 beta = 0.00008 weights = [1.0, 1.0]
0: 1.0, 1: 8.0, 2: 3.0, 3: 3.0, 4: 8.0, 5: 3.0, 6: 8.0, 7: 16.0, 8: 16.0, 9: 3.0 } weights = None hidden_size = [800, 480] fold = 5 unsw.generate_dataset(True, True, model_dir) # raw_train_dataset = np.load(data_dir + 'train_dataset.npy') # raw_test_dataset = np.load(data_dir + 'test_dataset.npy') # X, _, X_test = min_max_scale(raw_train_dataset, None, raw_test_dataset) raw_X = np.load(data_dir + 'train_dataset.npy') raw_X_test = np.load(data_dir + 'test_dataset.npy') y = np.load(data_dir + 'train_labels.npy') y_test = np.load(data_dir + 'test_labels.npy') y_flatten = np.argmax(y, axis=1) X, _, X_test = min_max_scale(raw_X, None, raw_X_test) print('Train dataset', X.shape, y.shape, y_flatten.shape) print('Test dataset', X_test.shape, y_test.shape) feature_size = X.shape[1] num_samples, num_classes = y.shape skf = StratifiedKFold(n_splits=fold)
il = Input(shape=(feature_size, ), name='input') h1 = Dense(encoder_size, activation='relu', name='h1')(il) h1 = Dropout(0.8)(h1) h2 = Dense(480, activation='sigmoid', name='h2')(h1) sm = Dense(num_classes, activation='softmax', name='output')(h2) mlp = Model(inputs=il, outputs=sm, name='sae_mlp') mlp.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) mlp.summary() mlp.get_layer('h1').set_weights(init_weights) mlp.save(pretrained_mlp_path) os.environ['CUDA_VISIBLE_DEVICES'] = '1' model_dir = 'SparseAE/' generate_dataset(True, True, model_dir) data_dir = model_dir + 'UNSW/' pretrained_mlp_path = data_dir + 'sae_mlp.h5' raw_train_dataset = np.load(data_dir + 'train_dataset.npy') raw_test_dataset = np.load(data_dir + 'test_dataset.npy') y = np.load(data_dir + 'train_labels.npy') y_test = np.load(data_dir + 'test_labels.npy') X, _, X_test = min_max_scale(raw_train_dataset, None, raw_test_dataset) X, y = permutate_dataset(X, y) print('Training set', X.shape, y.shape) print('Test set', X_test.shape) num_samples, num_classes = y.shape feature_size = X.shape[1] encoder_size = 800 num_epoch = 160
model = Sequential() model.add(Embedding(vocabulary_dim, embedding_dim, input_length=1)) model.add(Flatten()) model.compile('rmsprop', 'mse') e_train = model.predict(X_train) e_valid = model.predict(X_valid) e_test = model.predict(X_test) print(e_train.shape) # print(np.amax(e_train, axis=0), np.amin(e_train, axis=0)) print(e_test.shape) # print(np.amax(e_test, axis=0), np.amin(e_test, axis=0)) return e_train, e_valid, e_test generate_dataset(one_hot_encode=False) raw_train = np.load('UNSW/train_dataset.npy') y_train = np.load('UNSW/train_labels.npy') raw_valid = np.load('UNSW/valid_dataset.npy') y_valid = np.load('UNSW/valid_labels.npy') raw_test = np.load('UNSW/test_dataset.npy') y_test = np.load('UNSW/test_labels.npy') train_cont = raw_train[:, :-3] valid_cont = raw_valid[:, :-3] test_cont = raw_test[:, :-3] train_disc = raw_train[:, -3:] valid_disc = raw_valid[:, -3:] test_disc = raw_test[:, -3:] print("Continuous dataset", train_cont.shape)
h1 = Dense(num_hidden_rbm, activation='sigmoid', name='h1')(input_layer) h1 = Dropout(0.8)(h1) h2 = Dense(480, activation='sigmoid', name='h2')(h1) sm = Dense(num_classes, activation='softmax', name='output')(h2) mlp = Model(inputs=input_layer, outputs=sm, name='rbm_mlp') mlp.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) mlp.summary() mlp.get_layer('h1').set_weights([rbm_w, rbm_b]) mlp.save(pretrained_mlp_path) os.environ['CUDA_VISIBLE_DEVICES'] = '1' model_dir = 'RBM/' generate_dataset(binary_label=True, one_hot_encoding=True, root_dir=model_dir) data_dir = model_dir + 'UNSW/' pretrained_mlp_path = data_dir + 'rbm_mlp.h5' raw_train_dataset = np.load(data_dir + 'train_dataset.npy') raw_valid_dataset = np.load(data_dir + 'valid_dataset.npy') y = np.load(data_dir + 'train_labels.npy') raw_test_dataset = np.load(data_dir + 'test_dataset.npy') y_test = np.load(data_dir + 'test_labels.npy') [X, X_valid, X_test] = min_max_scale(raw_train_dataset, raw_valid_dataset, raw_test_dataset) X, y = permutate_dataset(X, y) print('Training set', X.shape, y.shape) print('Test set', X_test.shape) (num_samples, num_classes) = y.shape feature_size = X.shape[1]