def process_nsl(root='/home/naruto/NetLearner'): nslkdd.generate_datasets(binary_label=True, one_hot_encoding=False) raw_X_train = np.load('%s/NSLKDD/train_dataset.npy' % root) y_train = np.load('%s/NSLKDD/train_labels.npy' % root) raw_X_test = np.load('%s/NSLKDD/test_dataset.npy' % root) y_test = np.load('%s/NSLKDD/test_labels.npy' % root) [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test) permutate_dataset(X_train, y_train) permutate_dataset(X_test, y_test) print('Training set', X_train.shape, y_train.shape) print('Test set', X_test.shape, y_test.shape) return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
def process_nsl(root='SharedAutoEncoder/'): nslkdd.generate_datasets(True, one_hot_encoding=True, root=root) raw_X_train = np.load(root + 'NSLKDD/train_dataset.npy') y_train = np.load(root + 'NSLKDD/train_labels.npy') raw_X_test = np.load(root + 'NSLKDD/test_dataset.npy') y_test = np.load(root + 'NSLKDD/test_labels.npy') [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test) permutate_dataset(X_train, y_train) permutate_dataset(X_test, y_test) print('Training set', X_train.shape, y_train.shape) print('Test set', X_test.shape, y_test.shape) return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
def process_unsw(root='/home/naruto/NetLearner'): unsw.generate_dataset(False) raw_X_train = np.load('%s/UNSW/train_dataset.npy' % root) y_train = np.load('%s/UNSW/train_labels.npy' % root) raw_X_test = np.load('%s/UNSW/test_dataset.npy' % root) y_test = np.load('%s/UNSW/test_labels.npy' % root) [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test) permutate_dataset(X_train, y_train) permutate_dataset(X_test, y_test) print('Training set', X_train.shape, y_train.shape) print('Test set', X_test.shape, y_test.shape) return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
def process_nsl(): nslkdd.generate_datasets(binary_label=True) raw_X_train = np.load('NSLKDD/train_dataset.npy') y_train = np.load('NSLKDD/train_labels.npy') raw_X_test = np.load('NSLKDD/test_dataset.npy') y_test = np.load('NSLKDD/test_labels.npy') [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test) permutate_dataset(X_train, y_train) permutate_dataset(X_test, y_test) print('Training set', X_train.shape, y_train.shape) print('Test set', X_test.shape, y_test.shape) return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
def process_unsw(): unsw.generate_dataset(True) raw_X_train = np.load('UNSW/train_dataset.npy') y_train = np.load('UNSW/train_labels.npy') raw_X_test = np.load('UNSW/test_dataset.npy') y_test = np.load('UNSW/test_labels.npy') [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test) permutate_dataset(X_train, y_train) permutate_dataset(X_test, y_test) print('Training set', X_train.shape, y_train.shape) print('Test set', X_test.shape, y_test.shape) return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
from __future__ import print_function, division import numpy as np from netlearner.utils import min_max_scale, maybe_npsave from netlearner.rbm import RestrictedBoltzmannMachine import tensorflow as tf from math import ceil tf.set_random_seed(9876) encoder_name = 'RBM' raw_train_dataset = np.load('NSLKDD/train_dataset.npy') train_labels = np.load('NSLKDD/train_ref.npy') raw_valid_dataset = np.load('NSLKDD/valid_dataset.npy') valid_labels = np.load('NSLKDD/valid_ref.npy') raw_test_dataset = np.load('NSLKDD/test_dataset.npy') [train_dataset, valid_dataset, test_dataset] = min_max_scale( raw_train_dataset, raw_valid_dataset, raw_test_dataset) print('Training set', train_dataset.shape, train_labels.shape) print('Test set', test_dataset.shape) num_samples = train_dataset.shape[0] feature_size = train_dataset.shape[1] num_hidden_rbm = 100 rbm_lr = 0.01 batch_size = 10 num_epochs = 40 num_steps = ceil(train_dataset.shape[0] / batch_size * num_epochs) rbm = RestrictedBoltzmannMachine(feature_size, num_hidden_rbm, batch_size, trans_func=tf.nn.sigmoid, name=encoder_name) print('Restricted Boltzmann Machine built') rbm.train_with_labels(train_dataset, train_labels, int(num_steps),
9: 3.0 } weights = None hidden_size = [800, 480] fold = 5 unsw.generate_dataset(True, True, model_dir) # raw_train_dataset = np.load(data_dir + 'train_dataset.npy') # raw_test_dataset = np.load(data_dir + 'test_dataset.npy') # X, _, X_test = min_max_scale(raw_train_dataset, None, raw_test_dataset) raw_X = np.load(data_dir + 'train_dataset.npy') raw_X_test = np.load(data_dir + 'test_dataset.npy') y = np.load(data_dir + 'train_labels.npy') y_test = np.load(data_dir + 'test_labels.npy') y_flatten = np.argmax(y, axis=1) X, _, X_test = min_max_scale(raw_X, None, raw_X_test) print('Train dataset', X.shape, y.shape, y_flatten.shape) print('Test dataset', X_test.shape, y_test.shape) feature_size = X.shape[1] num_samples, num_classes = y.shape skf = StratifiedKFold(n_splits=fold) hist = {'train_loss': [], 'valid_loss': []} train_loss, valid_loss = [], [] for train_index, valid_index in skf.split(X, y_flatten): train_dataset, valid_dataset = X[train_index], X[valid_index] train_labels, valid_labels = y[train_index], y[valid_index] mlp = build_model() history = mlp.fit(train_dataset, train_labels,
mlp.summary() mlp.get_layer('h1').set_weights(init_weights) mlp.save(pretrained_mlp_path) os.environ['CUDA_VISIBLE_DEVICES'] = '1' model_dir = 'SparseAE/' generate_dataset(True, True, model_dir) data_dir = model_dir + 'UNSW/' pretrained_mlp_path = data_dir + 'sae_mlp.h5' raw_train_dataset = np.load(data_dir + 'train_dataset.npy') raw_test_dataset = np.load(data_dir + 'test_dataset.npy') y = np.load(data_dir + 'train_labels.npy') y_test = np.load(data_dir + 'test_labels.npy') X, _, X_test = min_max_scale(raw_train_dataset, None, raw_test_dataset) X, y = permutate_dataset(X, y) print('Training set', X.shape, y.shape) print('Test set', X_test.shape) num_samples, num_classes = y.shape feature_size = X.shape[1] encoder_size = 800 num_epoch = 160 batch_size = 80 class_weights = None sae_weights = pretrain_model() build_model(sae_weights) fold = 5 skf = StratifiedKFold(n_splits=fold)
for i in range(3): [ftr, fv, fte] = embedding_symbolic_feature(train_disc[:, i], valid_disc[:, i], test_disc[:, i]) X_train = np.concatenate((X_train, ftr), axis=1) print(X_train.shape) print(X_valid.shape, fv.shape) X_valid = np.concatenate((X_valid, fv), axis=1) X_test = np.concatenate((X_test, fte), axis=1) X_train = np.concatenate((X_train, train_disc), axis=1) X_valid = np.concatenate((X_valid, valid_disc), axis=1) X_test = np.concatenate((X_test, test_disc), axis=1) print("Augmenting discrete & embedding dataset", X_train.shape) [X_train, X_valid, X_test] = min_max_scale(X_train, X_valid, X_test) print("Min-max scaled dataset", X_train.shape, X_test.shape) X_train, y_train = permutate_dataset(X_train, y_train) X_valid, y_valid = permutate_dataset(X_valid, y_valid, 'Valid') X_test, y_test = permutate_dataset(X_test, y_test, 'Test') num_samples, num_features = X_train.shape num_classes = y_train.shape[1] batch_size = 40 keep_prob = 0.8 beta = 0.000 weights = [1.0, 1.0] num_epochs = [160] init_lrs = [0.001] hidden_layer_sizes = [
mlp.get_layer('h1').set_weights([rbm_w, rbm_b]) mlp.save(pretrained_mlp_path) os.environ['CUDA_VISIBLE_DEVICES'] = '1' model_dir = 'RBM/' generate_dataset(binary_label=True, one_hot_encoding=True, root_dir=model_dir) data_dir = model_dir + 'UNSW/' pretrained_mlp_path = data_dir + 'rbm_mlp.h5' raw_train_dataset = np.load(data_dir + 'train_dataset.npy') raw_valid_dataset = np.load(data_dir + 'valid_dataset.npy') y = np.load(data_dir + 'train_labels.npy') raw_test_dataset = np.load(data_dir + 'test_dataset.npy') y_test = np.load(data_dir + 'test_labels.npy') [X, X_valid, X_test] = min_max_scale(raw_train_dataset, raw_valid_dataset, raw_test_dataset) X, y = permutate_dataset(X, y) print('Training set', X.shape, y.shape) print('Test set', X_test.shape) (num_samples, num_classes) = y.shape feature_size = X.shape[1] num_epoch = 240 batch_size = 96 weights = None pretrain_model() fold = 5 skf = StratifiedKFold(n_splits=fold) hist = {'train_loss': [], 'valid_loss': []} train_loss, valid_loss = [], [] y_flatten = np.argmax(y, axis=1) for train_index, valid_index in skf.split(X, y_flatten):
import numpy as np from visualize.feature_metrics import plot_feature_importance from visualize.feature_metrics import plot_pca_components from preprocess.unsw import generate_dataset from netlearner.utils import min_max_scale, augment_quantiled from netlearner.utils import quantile_transform generate_dataset(one_hot_encode=False) raw = np.load('UNSW/train_dataset.npy') train_labels = np.load('UNSW/train_labels.npy') y = np.argmax(train_labels, 1) plot_feature_importance(raw, y, 'UNSW', 'raw') columns = np.array( range(1, 6) + range(8, 16) + range(17, 19) + range(23, 25) + [26]) minmax, _, _ = min_max_scale(raw, None, None) augment = augment_quantiled(raw, None, None, columns) replace = quantile_transform(minmax, None, None, columns) plot_pca_components(minmax, y, 'UNSW', 'raw') plot_pca_components(augment, y, 'UNSW', 'augment_quantile') plot_pca_components(replace, y, 'UNSW', 'quantile_transform')
from keras.models import Model, load_model from keras.layers import Input, Dense, Dropout import os import pickle os.environ['CUDA_VISIBLE_DEVICES'] = '2' model_dir = 'KerasMLP/' generate_dataset(False, True, model_dir) data_dir = model_dir + 'UNSW/' mlp_path = data_dir + 'mlp.h5' train_dataset = np.load(data_dir + 'train_dataset.npy') test_dataset = np.load(data_dir + 'test_dataset.npy') train_labels = np.load(data_dir + 'train_labels.npy') test_labels = np.load(data_dir + 'test_labels.npy') train_dataset, _, test_dataset = min_max_scale(train_dataset, None, test_dataset) print('Training set', train_dataset.shape, train_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) batch_size = 40 keep_prob = 0.8 num_epoch = 240 tail = 200 incremental = False if incremental is False: num_samples, num_classes = train_labels.shape feature_size = train_dataset.shape[1] hidden_size = [400, 256] input_layer = Input(shape=(feature_size, ), name='input') h1 = Dense(hidden_size[0], activation='tanh', name='h1')(input_layer) h1 = Dropout(keep_prob)(h1)