def generate_dataset(binary_label, one_hot_encoding, root_dir=''): prefix = 'UNSW/UNSW_NB15_' train_name = prefix + 'training-set.csv' test_name = prefix + 'testing-set.csv' category_maps = discovery_category_map([train_name, test_name]) num_train, sym_train, train_bin_labels, train_ten_labels = \ load_csv(train_name, category_maps) num_test, sym_test, test_bin_labels, test_ten_labels = \ load_csv(test_name, category_maps) if one_hot_encoding is True: sym_train, sym_test = encode_symbolic_feature(sym_train, sym_test) if binary_label is True: train_labels = encode_labels(train_bin_labels) test_labels = encode_labels(test_bin_labels) else: train_labels = encode_labels(train_ten_labels, 10) test_labels = encode_labels(test_ten_labels, 10) # num_train, num_test = std_numeric_feature(num_train, num_test) train_traffic = np.concatenate((num_train, sym_train), axis=1) test_traffic = np.concatenate((num_test, sym_test), axis=1) print('Trainset shape:', train_traffic.shape, train_labels.shape) maybe_npsave('%sUNSW/train_dataset' % root_dir, train_traffic) maybe_npsave('%sUNSW/train_labels' % root_dir, train_labels, binary_label=binary_label) """ valid_traffic, valid_labels = split_valid(test_traffic, test_labels) print('Validset shape:', valid_traffic.shape, valid_labels.shape) maybe_npsave('%sUNSW/valid_dataset' % root_dir, valid_traffic) maybe_npsave('%sUNSW/valid_labels' % root_dir, valid_labels, binary_label=binary_label) """ print('Testset shape:', test_traffic.shape, test_labels.shape) maybe_npsave('%sUNSW/test_dataset' % root_dir, test_traffic) maybe_npsave('%sUNSW/test_labels' % root_dir, test_labels, binary_label=binary_label)
batch_size = 10 num_epochs = 40 num_steps = ceil(train_dataset.shape[0] / batch_size * num_epochs) rbm = RestrictedBoltzmannMachine(feature_size, num_hidden_rbm, batch_size, trans_func=tf.nn.sigmoid, name=encoder_name) print('Restricted Boltzmann Machine built') rbm.train_with_labels(train_dataset, train_labels, int(num_steps), valid_dataset, rbm_lr) test_loss = rbm.calc_reconstruct_loss(test_dataset) print("Testset reconstruction error: %f" % test_loss) hrand = np.random.random((train_dataset.shape[0], num_hidden_rbm)) rbm_train_dataset = rbm.encode_dataset(train_dataset, hrand) hrand = np.random.random((valid_dataset.shape[0], num_hidden_rbm)) rbm_valid_dataset = rbm.encode_dataset(valid_dataset, hrand) hrand = np.random.random((test_dataset.shape[0], num_hidden_rbm)) rbm_test_dataset = rbm.encode_dataset(test_dataset, hrand) print('Encoded training set', rbm_train_dataset.shape) print('Encoded valid set', rbm_valid_dataset.shape) print('Encoded test set', rbm_test_dataset.shape) tr_fn = maybe_npsave('trainset.' + encoder_name, rbm_train_dataset, 0, rbm_train_dataset.shape[0], True) va_fn = maybe_npsave('validset.' + encoder_name, rbm_valid_dataset, 0, rbm_valid_dataset.shape[0], True) te_fn = maybe_npsave('testset.' + encoder_name, rbm_test_dataset, 0, rbm_test_dataset.shape[0], True) print('Encoded train set %s saved to %s' % (rbm_train_dataset.shape, tr_fn)) print('Encoded valid set %s saved to %s' % (rbm_valid_dataset.shape, va_fn)) print('Encoded test set %s saved to %s' % (rbm_test_dataset.shape, te_fn))
trans_func=tf.nn.sigmoid, num_labels=2, name=encoder_name) rbm.train_with_labels(train_dataset, train_labels, int(num_steps), valid_dataset, rbm_lr) test_loss = rbm.calc_reconstruct_loss(test_dataset) print("Testset reconstruction error: %f" % test_loss) hyperparameter = { '#hidden units': num_hidden_rbm, 'init_lr': rbm_lr, 'num_epochs': num_epoch, 'num_steps': num_steps, 'act_func': 'sigmoid', 'batch_size': batch_size, } hyperparameter_summary(rbm.dirname, hyperparameter) hrand = np.random.random((train_dataset.shape[0], num_hidden_rbm)) rbm_train_dataset = rbm.encode_dataset(train_dataset, hrand) print('Encoded training set', rbm_train_dataset.shape) hrand = np.random.random((valid_dataset.shape[0], num_hidden_rbm)) rbm_valid_dataset = rbm.encode_dataset(valid_dataset, hrand) print('Encoded valid set', rbm_valid_dataset.shape) hrand = np.random.random((test_dataset.shape[0], num_hidden_rbm)) rbm_test_dataset = rbm.encode_dataset(test_dataset, hrand) print('Encoded test set', rbm_test_dataset.shape) maybe_npsave('trainset.rbm', rbm_train_dataset, True) maybe_npsave('validset.rbm', rbm_valid_dataset, True) maybe_npsave('testset.rbm', rbm_test_dataset, True)
train_labels = np.load('NSLKDD/train_ref.npy') print('Training set', train_dataset.shape, train_labels.shape) print('Test set', test_dataset.shape) feature_size = train_dataset.shape[1] encoder_size = 100 init_lr = 0.01 autoencoder = Autoencoder(feature_size, encoder_size, transfer_func=tf.nn.sigmoid, name='AE') batch_size = 20 num_steps = 1001 autoencoder.train_with_labels(train_dataset, train_labels, batch_size, num_steps, init_lr, valid_dataset) test_loss = autoencoder.calc_reconstruct_loss(test_dataset) print("Testset reconstruction loss: %f" % test_loss) ae_train_dataset = autoencoder.encode_dataset(train_dataset) ae_valid_dataset = autoencoder.encode_dataset(valid_dataset) ae_test_dataset = autoencoder.encode_dataset(test_dataset) tr_fn = maybe_npsave('trainset.ae', ae_train_dataset, 0, ae_train_dataset.shape[0], True) va_fn = maybe_npsave('validset.ae', ae_valid_dataset, 0, ae_valid_dataset.shape[0], True) te_fn = maybe_npsave('testset.ae', ae_test_dataset, 0, ae_test_dataset.shape[0], True) print('Encoded train set %s saved to %s' % (ae_train_dataset.shape, tr_fn)) print('Encoded valid set %s saved to %s' % (ae_valid_dataset.shape, va_fn)) print('Encoded test set %s saved to %s' % (ae_test_dataset.shape, te_fn))
init_lr = 0.01 batch_size = 50 num_epochs = 2 num_steps = ceil(train_dataset.shape[0] / batch_size * num_epochs) sae = SparseAutoencoder(feature_size, encoder_size, model_dir, optimizer=tf.train.AdamOptimizer, sparsity=0.05, sparsity_weight=0.1, init_lr=init_lr, decay_steps=int(num_steps)) sae.train_with_labels(train_dataset, train_labels, batch_size, int(num_steps), valid_dataset) test_loss = sae.calc_reconstruct_loss(test_dataset) print("Testset reconstruction loss: %f" % test_loss) sae_train_dataset = sae.encode_dataset(train_dataset) sae_valid_dataset = sae.encode_dataset(valid_dataset) sae_test_dataset = sae.encode_dataset(test_dataset) tr_fn = maybe_npsave(model_dir + 'sae_train', sae_train_dataset, 0, sae_train_dataset.shape[0], True) va_fn = maybe_npsave(model_dir + 'sae_valid', sae_valid_dataset, 0, sae_valid_dataset.shape[0], True) te_fn = maybe_npsave(model_dir + 'sae_test', sae_test_dataset, 0, sae_test_dataset.shape[0], True) print('Encoded train set %s saved to %s' % (sae_train_dataset.shape, tr_fn)) print('Encoded valid set %s saved to %s' % (sae_valid_dataset.shape, va_fn)) print('Encoded test set %s saved to %s' % (sae_test_dataset.shape, te_fn))