raw_valid = np.load('UNSW/valid_dataset.npy')
y_valid = np.load('UNSW/valid_labels.npy')
raw_test = np.load('UNSW/test_dataset.npy')
y_test = np.load('UNSW/test_labels.npy')

train_cont = raw_train[:, :-3]
valid_cont = raw_valid[:, :-3]
test_cont = raw_test[:, :-3]
train_disc = raw_train[:, -3:]
valid_disc = raw_valid[:, -3:]
test_disc = raw_test[:, -3:]

print("Continuous dataset", train_cont.shape)
columns = np.array(
    range(1, 6) + range(8, 16) + range(17, 19) + range(23, 25) + [26])
[X_train, X_valid, X_test] = augment_quantiled(train_cont, valid_cont,
                                               test_cont, columns)
print("Augmenting quantiled dataset", X_train.shape)

for i in range(3):
    [ftr, fv, fte] = embedding_symbolic_feature(train_disc[:, i],
                                                valid_disc[:, i], test_disc[:,
                                                                            i])
    X_train = np.concatenate((X_train, ftr), axis=1)
    print(X_train.shape)
    print(X_valid.shape, fv.shape)
    X_valid = np.concatenate((X_valid, fv), axis=1)
    X_test = np.concatenate((X_test, fte), axis=1)

X_train = np.concatenate((X_train, train_disc), axis=1)
X_valid = np.concatenate((X_valid, valid_disc), axis=1)
X_test = np.concatenate((X_test, test_disc), axis=1)
Esempio n. 2
0
from preprocess.unsw import generate_dataset
from netlearner.utils import hyperparameter_summary
from netlearner.utils import augment_quantiled, permutate_dataset
from netlearner.multilayer_perceptron import MultilayerPerceptron

generate_dataset(True)
raw_train_dataset = np.load('UNSW/train_dataset.npy')
train_labels = np.load('UNSW/train_labels.npy')
raw_valid_dataset = np.load('UNSW/valid_dataset.npy')
valid_labels = np.load('UNSW/valid_labels.npy')
raw_test_dataset = np.load('UNSW/test_dataset.npy')
test_labels = np.load('UNSW/test_labels.npy')

columns = np.array(range(1, 6) + range(8, 16) + range(17, 19) +
                   range(23, 25) + [26])
[train_dataset, valid_dataset, test_dataset] = augment_quantiled(
    raw_train_dataset, raw_valid_dataset, raw_test_dataset, columns)
permutate_dataset(train_dataset, train_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

num_samples, feature_size = train_dataset.shape
num_labels = train_labels.shape[1]
batch_size = 80
keep_prob = 0.80
beta = 0.00008
weights = [1.0, 1.0]
num_epochs = [160]
init_lrs = [0.001]
hidden_layer_sizes = [
                      [400, 400, 400, 400],
import numpy as np
from visualize.feature_metrics import plot_feature_importance
from visualize.feature_metrics import plot_pca_components

from preprocess.unsw import generate_dataset
from netlearner.utils import min_max_scale, augment_quantiled
from netlearner.utils import quantile_transform

generate_dataset(one_hot_encode=False)
raw = np.load('UNSW/train_dataset.npy')
train_labels = np.load('UNSW/train_labels.npy')
y = np.argmax(train_labels, 1)

plot_feature_importance(raw, y, 'UNSW', 'raw')
columns = np.array(
    range(1, 6) + range(8, 16) + range(17, 19) + range(23, 25) + [26])
minmax, _, _ = min_max_scale(raw, None, None)
augment = augment_quantiled(raw, None, None, columns)
replace = quantile_transform(minmax, None, None, columns)
plot_pca_components(minmax, y, 'UNSW', 'raw')
plot_pca_components(augment, y, 'UNSW', 'augment_quantile')
plot_pca_components(replace, y, 'UNSW', 'quantile_transform')