Ejemplo n.º 1
0
def process_nsl(root='/home/naruto/NetLearner'):
    nslkdd.generate_datasets(binary_label=True, one_hot_encoding=False)
    raw_X_train = np.load('%s/NSLKDD/train_dataset.npy' % root)
    y_train = np.load('%s/NSLKDD/train_labels.npy' % root)
    raw_X_test = np.load('%s/NSLKDD/test_dataset.npy' % root)
    y_test = np.load('%s/NSLKDD/test_labels.npy' % root)
    [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test)
    permutate_dataset(X_train, y_train)
    permutate_dataset(X_test, y_test)

    print('Training set', X_train.shape, y_train.shape)
    print('Test set', X_test.shape, y_test.shape)
    return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
Ejemplo n.º 2
0
def process_nsl(root='SharedAutoEncoder/'):
    nslkdd.generate_datasets(True, one_hot_encoding=True, root=root)
    raw_X_train = np.load(root + 'NSLKDD/train_dataset.npy')
    y_train = np.load(root + 'NSLKDD/train_labels.npy')
    raw_X_test = np.load(root + 'NSLKDD/test_dataset.npy')
    y_test = np.load(root + 'NSLKDD/test_labels.npy')
    [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test)
    permutate_dataset(X_train, y_train)
    permutate_dataset(X_test, y_test)

    print('Training set', X_train.shape, y_train.shape)
    print('Test set', X_test.shape, y_test.shape)
    return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
Ejemplo n.º 3
0
def process_unsw(root='/home/naruto/NetLearner'):
    unsw.generate_dataset(False)
    raw_X_train = np.load('%s/UNSW/train_dataset.npy' % root)
    y_train = np.load('%s/UNSW/train_labels.npy' % root)
    raw_X_test = np.load('%s/UNSW/test_dataset.npy' % root)
    y_test = np.load('%s/UNSW/test_labels.npy' % root)
    [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test)
    permutate_dataset(X_train, y_train)
    permutate_dataset(X_test, y_test)

    print('Training set', X_train.shape, y_train.shape)
    print('Test set', X_test.shape, y_test.shape)
    return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
Ejemplo n.º 4
0
def process_nsl():
    nslkdd.generate_datasets(binary_label=True)
    raw_X_train = np.load('NSLKDD/train_dataset.npy')
    y_train = np.load('NSLKDD/train_labels.npy')
    raw_X_test = np.load('NSLKDD/test_dataset.npy')
    y_test = np.load('NSLKDD/test_labels.npy')
    [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test)
    permutate_dataset(X_train, y_train)
    permutate_dataset(X_test, y_test)

    print('Training set', X_train.shape, y_train.shape)
    print('Test set', X_test.shape, y_test.shape)
    return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
Ejemplo n.º 5
0
def process_unsw():
    unsw.generate_dataset(True)
    raw_X_train = np.load('UNSW/train_dataset.npy')
    y_train = np.load('UNSW/train_labels.npy')
    raw_X_test = np.load('UNSW/test_dataset.npy')
    y_test = np.load('UNSW/test_labels.npy')
    [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test)
    permutate_dataset(X_train, y_train)
    permutate_dataset(X_test, y_test)

    print('Training set', X_train.shape, y_train.shape)
    print('Test set', X_test.shape, y_test.shape)
    return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
Ejemplo n.º 6
0
from __future__ import print_function, division
import numpy as np
from netlearner.utils import min_max_scale, maybe_npsave
from netlearner.rbm import RestrictedBoltzmannMachine
import tensorflow as tf
from math import ceil

tf.set_random_seed(9876)
encoder_name = 'RBM'
raw_train_dataset = np.load('NSLKDD/train_dataset.npy')
train_labels = np.load('NSLKDD/train_ref.npy')
raw_valid_dataset = np.load('NSLKDD/valid_dataset.npy')
valid_labels = np.load('NSLKDD/valid_ref.npy')
raw_test_dataset = np.load('NSLKDD/test_dataset.npy')
[train_dataset, valid_dataset, test_dataset] = min_max_scale(
    raw_train_dataset, raw_valid_dataset, raw_test_dataset)
print('Training set', train_dataset.shape, train_labels.shape)
print('Test set', test_dataset.shape)

num_samples = train_dataset.shape[0]
feature_size = train_dataset.shape[1]
num_hidden_rbm = 100
rbm_lr = 0.01
batch_size = 10
num_epochs = 40
num_steps = ceil(train_dataset.shape[0] / batch_size * num_epochs)
rbm = RestrictedBoltzmannMachine(feature_size, num_hidden_rbm,
                                 batch_size, trans_func=tf.nn.sigmoid,
                                 name=encoder_name)
print('Restricted Boltzmann Machine built')
rbm.train_with_labels(train_dataset, train_labels, int(num_steps),
Ejemplo n.º 7
0
    9: 3.0
}
weights = None
hidden_size = [800, 480]
fold = 5

unsw.generate_dataset(True, True, model_dir)
# raw_train_dataset = np.load(data_dir + 'train_dataset.npy')
# raw_test_dataset = np.load(data_dir + 'test_dataset.npy')
# X, _, X_test = min_max_scale(raw_train_dataset, None, raw_test_dataset)
raw_X = np.load(data_dir + 'train_dataset.npy')
raw_X_test = np.load(data_dir + 'test_dataset.npy')
y = np.load(data_dir + 'train_labels.npy')
y_test = np.load(data_dir + 'test_labels.npy')
y_flatten = np.argmax(y, axis=1)
X, _, X_test = min_max_scale(raw_X, None, raw_X_test)
print('Train dataset', X.shape, y.shape, y_flatten.shape)
print('Test dataset', X_test.shape, y_test.shape)

feature_size = X.shape[1]
num_samples, num_classes = y.shape
skf = StratifiedKFold(n_splits=fold)
hist = {'train_loss': [], 'valid_loss': []}
train_loss, valid_loss = [], []

for train_index, valid_index in skf.split(X, y_flatten):
    train_dataset, valid_dataset = X[train_index], X[valid_index]
    train_labels, valid_labels = y[train_index], y[valid_index]
    mlp = build_model()
    history = mlp.fit(train_dataset,
                      train_labels,
Ejemplo n.º 8
0
    mlp.summary()
    mlp.get_layer('h1').set_weights(init_weights)
    mlp.save(pretrained_mlp_path)


os.environ['CUDA_VISIBLE_DEVICES'] = '1'
model_dir = 'SparseAE/'
generate_dataset(True, True, model_dir)
data_dir = model_dir + 'UNSW/'
pretrained_mlp_path = data_dir + 'sae_mlp.h5'

raw_train_dataset = np.load(data_dir + 'train_dataset.npy')
raw_test_dataset = np.load(data_dir + 'test_dataset.npy')
y = np.load(data_dir + 'train_labels.npy')
y_test = np.load(data_dir + 'test_labels.npy')
X, _, X_test = min_max_scale(raw_train_dataset, None, raw_test_dataset)
X, y = permutate_dataset(X, y)
print('Training set', X.shape, y.shape)
print('Test set', X_test.shape)
num_samples, num_classes = y.shape
feature_size = X.shape[1]
encoder_size = 800
num_epoch = 160
batch_size = 80
class_weights = None

sae_weights = pretrain_model()
build_model(sae_weights)

fold = 5
skf = StratifiedKFold(n_splits=fold)
Ejemplo n.º 9
0
for i in range(3):
    [ftr, fv, fte] = embedding_symbolic_feature(train_disc[:, i],
                                                valid_disc[:, i], test_disc[:,
                                                                            i])
    X_train = np.concatenate((X_train, ftr), axis=1)
    print(X_train.shape)
    print(X_valid.shape, fv.shape)
    X_valid = np.concatenate((X_valid, fv), axis=1)
    X_test = np.concatenate((X_test, fte), axis=1)

X_train = np.concatenate((X_train, train_disc), axis=1)
X_valid = np.concatenate((X_valid, valid_disc), axis=1)
X_test = np.concatenate((X_test, test_disc), axis=1)

print("Augmenting discrete & embedding dataset", X_train.shape)
[X_train, X_valid, X_test] = min_max_scale(X_train, X_valid, X_test)
print("Min-max scaled dataset", X_train.shape, X_test.shape)

X_train, y_train = permutate_dataset(X_train, y_train)
X_valid, y_valid = permutate_dataset(X_valid, y_valid, 'Valid')
X_test, y_test = permutate_dataset(X_test, y_test, 'Test')

num_samples, num_features = X_train.shape
num_classes = y_train.shape[1]
batch_size = 40
keep_prob = 0.8
beta = 0.000
weights = [1.0, 1.0]
num_epochs = [160]
init_lrs = [0.001]
hidden_layer_sizes = [
Ejemplo n.º 10
0
    mlp.get_layer('h1').set_weights([rbm_w, rbm_b])
    mlp.save(pretrained_mlp_path)


os.environ['CUDA_VISIBLE_DEVICES'] = '1'
model_dir = 'RBM/'
generate_dataset(binary_label=True, one_hot_encoding=True, root_dir=model_dir)
data_dir = model_dir + 'UNSW/'
pretrained_mlp_path = data_dir + 'rbm_mlp.h5'

raw_train_dataset = np.load(data_dir + 'train_dataset.npy')
raw_valid_dataset = np.load(data_dir + 'valid_dataset.npy')
y = np.load(data_dir + 'train_labels.npy')
raw_test_dataset = np.load(data_dir + 'test_dataset.npy')
y_test = np.load(data_dir + 'test_labels.npy')
[X, X_valid, X_test] = min_max_scale(raw_train_dataset, raw_valid_dataset,
                                     raw_test_dataset)
X, y = permutate_dataset(X, y)
print('Training set', X.shape, y.shape)
print('Test set', X_test.shape)
(num_samples, num_classes) = y.shape
feature_size = X.shape[1]
num_epoch = 240
batch_size = 96
weights = None
pretrain_model()
fold = 5
skf = StratifiedKFold(n_splits=fold)
hist = {'train_loss': [], 'valid_loss': []}
train_loss, valid_loss = [], []
y_flatten = np.argmax(y, axis=1)
for train_index, valid_index in skf.split(X, y_flatten):
import numpy as np
from visualize.feature_metrics import plot_feature_importance
from visualize.feature_metrics import plot_pca_components

from preprocess.unsw import generate_dataset
from netlearner.utils import min_max_scale, augment_quantiled
from netlearner.utils import quantile_transform

generate_dataset(one_hot_encode=False)
raw = np.load('UNSW/train_dataset.npy')
train_labels = np.load('UNSW/train_labels.npy')
y = np.argmax(train_labels, 1)

plot_feature_importance(raw, y, 'UNSW', 'raw')
columns = np.array(
    range(1, 6) + range(8, 16) + range(17, 19) + range(23, 25) + [26])
minmax, _, _ = min_max_scale(raw, None, None)
augment = augment_quantiled(raw, None, None, columns)
replace = quantile_transform(minmax, None, None, columns)
plot_pca_components(minmax, y, 'UNSW', 'raw')
plot_pca_components(augment, y, 'UNSW', 'augment_quantile')
plot_pca_components(replace, y, 'UNSW', 'quantile_transform')
Ejemplo n.º 12
0
from keras.models import Model, load_model
from keras.layers import Input, Dense, Dropout
import os
import pickle

os.environ['CUDA_VISIBLE_DEVICES'] = '2'
model_dir = 'KerasMLP/'
generate_dataset(False, True, model_dir)
data_dir = model_dir + 'UNSW/'
mlp_path = data_dir + 'mlp.h5'

train_dataset = np.load(data_dir + 'train_dataset.npy')
test_dataset = np.load(data_dir + 'test_dataset.npy')
train_labels = np.load(data_dir + 'train_labels.npy')
test_labels = np.load(data_dir + 'test_labels.npy')
train_dataset, _, test_dataset = min_max_scale(train_dataset, None,
                                               test_dataset)
print('Training set', train_dataset.shape, train_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

batch_size = 40
keep_prob = 0.8
num_epoch = 240
tail = 200
incremental = False
if incremental is False:
    num_samples, num_classes = train_labels.shape
    feature_size = train_dataset.shape[1]
    hidden_size = [400, 256]
    input_layer = Input(shape=(feature_size, ), name='input')
    h1 = Dense(hidden_size[0], activation='tanh', name='h1')(input_layer)
    h1 = Dropout(keep_prob)(h1)