コード例 #1
0
def generate_dataset(binary_label, one_hot_encoding, root_dir=''):
    prefix = 'UNSW/UNSW_NB15_'
    train_name = prefix + 'training-set.csv'
    test_name = prefix + 'testing-set.csv'
    category_maps = discovery_category_map([train_name, test_name])

    num_train, sym_train, train_bin_labels, train_ten_labels = \
        load_csv(train_name, category_maps)
    num_test, sym_test, test_bin_labels, test_ten_labels = \
        load_csv(test_name, category_maps)

    if one_hot_encoding is True:
        sym_train, sym_test = encode_symbolic_feature(sym_train, sym_test)

    if binary_label is True:
        train_labels = encode_labels(train_bin_labels)
        test_labels = encode_labels(test_bin_labels)
    else:
        train_labels = encode_labels(train_ten_labels, 10)
        test_labels = encode_labels(test_ten_labels, 10)

    # num_train, num_test = std_numeric_feature(num_train, num_test)
    train_traffic = np.concatenate((num_train, sym_train), axis=1)
    test_traffic = np.concatenate((num_test, sym_test), axis=1)

    print('Trainset shape:', train_traffic.shape, train_labels.shape)
    maybe_npsave('%sUNSW/train_dataset' % root_dir, train_traffic)
    maybe_npsave('%sUNSW/train_labels' % root_dir,
                 train_labels,
                 binary_label=binary_label)
    """
    valid_traffic, valid_labels = split_valid(test_traffic, test_labels)
    print('Validset shape:', valid_traffic.shape, valid_labels.shape)
    maybe_npsave('%sUNSW/valid_dataset' % root_dir, valid_traffic)
    maybe_npsave('%sUNSW/valid_labels' % root_dir, valid_labels,
                 binary_label=binary_label)
    """
    print('Testset shape:', test_traffic.shape, test_labels.shape)
    maybe_npsave('%sUNSW/test_dataset' % root_dir, test_traffic)
    maybe_npsave('%sUNSW/test_labels' % root_dir,
                 test_labels,
                 binary_label=binary_label)
コード例 #2
0
batch_size = 10
num_epochs = 40
num_steps = ceil(train_dataset.shape[0] / batch_size * num_epochs)
rbm = RestrictedBoltzmannMachine(feature_size, num_hidden_rbm,
                                 batch_size, trans_func=tf.nn.sigmoid,
                                 name=encoder_name)
print('Restricted Boltzmann Machine built')
rbm.train_with_labels(train_dataset, train_labels, int(num_steps),
                      valid_dataset, rbm_lr)
test_loss = rbm.calc_reconstruct_loss(test_dataset)
print("Testset reconstruction error: %f" % test_loss)

hrand = np.random.random((train_dataset.shape[0], num_hidden_rbm))
rbm_train_dataset = rbm.encode_dataset(train_dataset, hrand)
hrand = np.random.random((valid_dataset.shape[0], num_hidden_rbm))
rbm_valid_dataset = rbm.encode_dataset(valid_dataset, hrand)
hrand = np.random.random((test_dataset.shape[0], num_hidden_rbm))
rbm_test_dataset = rbm.encode_dataset(test_dataset, hrand)
print('Encoded training set', rbm_train_dataset.shape)
print('Encoded valid set', rbm_valid_dataset.shape)
print('Encoded test set', rbm_test_dataset.shape)
tr_fn = maybe_npsave('trainset.' + encoder_name, rbm_train_dataset,
                     0, rbm_train_dataset.shape[0], True)
va_fn = maybe_npsave('validset.' + encoder_name, rbm_valid_dataset,
                     0, rbm_valid_dataset.shape[0], True)
te_fn = maybe_npsave('testset.' + encoder_name, rbm_test_dataset,
                     0, rbm_test_dataset.shape[0], True)
print('Encoded train set %s saved to %s' % (rbm_train_dataset.shape, tr_fn))
print('Encoded valid set %s saved to %s' % (rbm_valid_dataset.shape, va_fn))
print('Encoded test set %s saved to %s' % (rbm_test_dataset.shape, te_fn))
コード例 #3
0
                                 trans_func=tf.nn.sigmoid,
                                 num_labels=2,
                                 name=encoder_name)
rbm.train_with_labels(train_dataset, train_labels, int(num_steps),
                      valid_dataset, rbm_lr)
test_loss = rbm.calc_reconstruct_loss(test_dataset)
print("Testset reconstruction error: %f" % test_loss)
hyperparameter = {
    '#hidden units': num_hidden_rbm,
    'init_lr': rbm_lr,
    'num_epochs': num_epoch,
    'num_steps': num_steps,
    'act_func': 'sigmoid',
    'batch_size': batch_size,
}
hyperparameter_summary(rbm.dirname, hyperparameter)

hrand = np.random.random((train_dataset.shape[0], num_hidden_rbm))
rbm_train_dataset = rbm.encode_dataset(train_dataset, hrand)
print('Encoded training set', rbm_train_dataset.shape)
hrand = np.random.random((valid_dataset.shape[0], num_hidden_rbm))
rbm_valid_dataset = rbm.encode_dataset(valid_dataset, hrand)
print('Encoded valid set', rbm_valid_dataset.shape)
hrand = np.random.random((test_dataset.shape[0], num_hidden_rbm))
rbm_test_dataset = rbm.encode_dataset(test_dataset, hrand)
print('Encoded test set', rbm_test_dataset.shape)

maybe_npsave('trainset.rbm', rbm_train_dataset, True)
maybe_npsave('validset.rbm', rbm_valid_dataset, True)
maybe_npsave('testset.rbm', rbm_test_dataset, True)
コード例 #4
0
ファイル: autoencoder.py プロジェクト: yue123161/NetLearner
train_labels = np.load('NSLKDD/train_ref.npy')
print('Training set', train_dataset.shape, train_labels.shape)
print('Test set', test_dataset.shape)

feature_size = train_dataset.shape[1]
encoder_size = 100
init_lr = 0.01
autoencoder = Autoencoder(feature_size,
                          encoder_size,
                          transfer_func=tf.nn.sigmoid,
                          name='AE')
batch_size = 20
num_steps = 1001
autoencoder.train_with_labels(train_dataset, train_labels, batch_size,
                              num_steps, init_lr, valid_dataset)
test_loss = autoencoder.calc_reconstruct_loss(test_dataset)
print("Testset reconstruction loss: %f" % test_loss)

ae_train_dataset = autoencoder.encode_dataset(train_dataset)
ae_valid_dataset = autoencoder.encode_dataset(valid_dataset)
ae_test_dataset = autoencoder.encode_dataset(test_dataset)
tr_fn = maybe_npsave('trainset.ae', ae_train_dataset, 0,
                     ae_train_dataset.shape[0], True)
va_fn = maybe_npsave('validset.ae', ae_valid_dataset, 0,
                     ae_valid_dataset.shape[0], True)
te_fn = maybe_npsave('testset.ae', ae_test_dataset, 0,
                     ae_test_dataset.shape[0], True)
print('Encoded train set %s saved to %s' % (ae_train_dataset.shape, tr_fn))
print('Encoded valid set %s saved to %s' % (ae_valid_dataset.shape, va_fn))
print('Encoded test set %s saved to %s' % (ae_test_dataset.shape, te_fn))
コード例 #5
0
init_lr = 0.01
batch_size = 50
num_epochs = 2
num_steps = ceil(train_dataset.shape[0] / batch_size * num_epochs)

sae = SparseAutoencoder(feature_size,
                        encoder_size,
                        model_dir,
                        optimizer=tf.train.AdamOptimizer,
                        sparsity=0.05,
                        sparsity_weight=0.1,
                        init_lr=init_lr,
                        decay_steps=int(num_steps))
sae.train_with_labels(train_dataset, train_labels, batch_size, int(num_steps),
                      valid_dataset)
test_loss = sae.calc_reconstruct_loss(test_dataset)
print("Testset reconstruction loss: %f" % test_loss)

sae_train_dataset = sae.encode_dataset(train_dataset)
sae_valid_dataset = sae.encode_dataset(valid_dataset)
sae_test_dataset = sae.encode_dataset(test_dataset)
tr_fn = maybe_npsave(model_dir + 'sae_train', sae_train_dataset, 0,
                     sae_train_dataset.shape[0], True)
va_fn = maybe_npsave(model_dir + 'sae_valid', sae_valid_dataset, 0,
                     sae_valid_dataset.shape[0], True)
te_fn = maybe_npsave(model_dir + 'sae_test', sae_test_dataset, 0,
                     sae_test_dataset.shape[0], True)
print('Encoded train set %s saved to %s' % (sae_train_dataset.shape, tr_fn))
print('Encoded valid set %s saved to %s' % (sae_valid_dataset.shape, va_fn))
print('Encoded test set %s saved to %s' % (sae_test_dataset.shape, te_fn))