Esempio n. 1
0
def get_unsw_data():
    dataset_names = [
        'UNSW/UNSW_NB15_%s-set.csv' % x for x in ['training', 'testing']
    ]
    feature_file = 'UNSW/feature_names_train_test.csv'

    headers, _, _, _ = unsw.get_feature_names(feature_file)
    symbolic_features = unsw.discovery_feature_volcabulary(dataset_names)
    integer_features = unsw.discovery_integer_map(feature_file, dataset_names)
    continuous_features = unsw.discovery_continuous_map(
        feature_file, dataset_names)
    X, y = get_dataset(dataset_names[0], headers, 'unsw')
    test_X, test_y = get_dataset(dataset_names[1], headers, 'unsw')

    train_dict = dict()
    test_dict = dict()
    merged_inputs = []
    embeddings = []
    large_discrete = []
    merged_dim = 0
    merged_dim += build_embeddings(symbolic_features, integer_features,
                                   embeddings, large_discrete, merged_inputs,
                                   X, test_X, train_dict, test_dict, 'unsw')
    merged_dim += len(continuous_features)
    cont_component = build_continuous(continuous_features, merged_inputs, X,
                                      test_X, train_dict, test_dict, 'unsw')

    return train_dict, y, test_dict, test_y
def modality_net_unsw(hidden):
    dataset_names = [
        'UNSW/UNSW_NB15_%s-set.csv' % x for x in ['training', 'testing']
    ]
    feature_file = 'UNSW/feature_names_train_test.csv'

    headers, _, _, _ = unsw.get_feature_names(feature_file)
    symbolic_features = unsw.discovery_feature_volcabulary(dataset_names)
    integer_features = unsw.discovery_integer_map(feature_file, dataset_names)
    continuous_features = unsw.discovery_continuous_map(
        feature_file, dataset_names)
    X, y = get_dataset(dataset_names[0], headers, 'unsw')
    test_X, test_y = get_dataset(dataset_names[1], headers, 'unsw')

    train_dict = dict()
    test_dict = dict()
    merged_inputs = []
    embeddings = []
    large_discrete = []
    merged_dim = 0
    merged_dim += build_embeddings(symbolic_features, integer_features,
                                   embeddings, large_discrete, merged_inputs,
                                   X, test_X, train_dict, test_dict, 'unsw')
    merged_dim += len(continuous_features)
    cont_component = build_continuous(continuous_features, merged_inputs, X,
                                      test_X, train_dict, test_dict, 'unsw')
    logger.debug('merge input_dim for UNSW-NB dataset = %s' % merged_dim)

    merge = concatenate(embeddings + large_discrete + [cont_component],
                        name='concate_features_unsw')
    h1 = Dense(hidden[0], activation='relu', name='h1_unsw')(merge)
    dropout = Dropout(drop_prob)(h1)
    h2 = Dense(hidden[1], activation='relu', name='h2_unsw')(dropout)

    bn = BatchNormalization(name='bn_unsw')(h2)
    h3 = Dense(hidden[2], activation='sigmoid', name='sigmoid_unsw')(bn)
    sm = Dense(2, activation='softmax', name='output_unsw')(h3)

    model = Model(inputs=merged_inputs, outputs=sm)
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    history = model.fit(train_dict, {'output_unsw': y}, batch_size, num_epochs)
    modnet['unsw_loss'].append(history.history['loss'])
    score = model.evaluate(train_dict, y, y.shape[0])
    logger.debug('modnet[unsw] train loss\t%.6f' % score[0])
    logger.info('modenet[unsw] train accu\t%.6f' % score[1])
    modnet['unsw']['train'].append(score[1])

    score = model.evaluate(test_dict, test_y, test_y.shape[0])
    logger.debug('modnet[unsw] test loss\t%.6f' % score[0])
    logger.info('modenet[unsw] test accu\t%.6f' % score[1])
    modnet['unsw']['test'].append(score[1])

    model.save_weights('ModalityNets/modnet_unsw.h5')
    # np.savez('ModalityNets/unsw_EX.npy', train=EX, test=EX_test)
    return merge, merged_inputs, train_dict, test_dict, y, test_y
Esempio n. 3
0
def get_unsw_data():
    dataset_names = [
        'UNSW/UNSW_NB15_%s-set.csv' % x for x in ['training', 'testing']
    ]
    feature_file = 'UNSW/feature_names_train_test.csv'

    headers, _, _, _ = unsw.get_feature_names(feature_file)
    symbolic_features = unsw.discovery_feature_volcabulary(dataset_names)
    integer_features = unsw.discovery_integer_map(feature_file, dataset_names)
    continuous_features = unsw.discovery_continuous_map(
        feature_file, dataset_names)
    X, y = get_dataset(dataset_names[0], headers, 'unsw')
    X_test, y_test = get_dataset(dataset_names[1], headers, 'unsw')

    train_dict = dict()
    test_dict = dict()
    input_layer = []
    embeddings = []
    large_discrete = []
    merged_dim = 0
    merged_dim += build_embeddings(symbolic_features, integer_features,
                                   embeddings, large_discrete, input_layer, X,
                                   X_test, train_dict, test_dict, 'unsw')
    merged_dim += len(continuous_features)
    cont_component = build_continuous(continuous_features, input_layer, X,
                                      X_test, train_dict, test_dict, 'unsw')
    pprint('merge input_dim for UNSW-NB dataset = %s' % merged_dim)

    merged_layer = concatenate(embeddings + large_discrete + [cont_component],
                               name='concate_features_unsw')

    model = Model(inputs=input_layer, outputs=merged_layer)
    model.compile('adam', 'mse')
    model.summary()
    MX = model.predict(train_dict)
    MX_test = model.predict(test_dict)

    return MX, MX_test, y, y_test
Esempio n. 4
0
        result = m.evaluate(test_ib)
        history['test'].append(result)
        logger.info('******   Test performance   ******')
        for key in result:
            logger.info("%s: %s" % (key, result[key]))

    return history


os.environ['CUDA_VISIBLE_DEVICES'] = '0'
train_filename = 'UNSW/UNSW_NB15_training-set.csv'
test_filename = 'UNSW/UNSW_NB15_testing-set.csv'
feature_filename = 'UNSW/feature_names_train_test.csv'
CSV_COLUMNS, symbolic_names, continuous_names, discrete_names = \
    get_feature_names(feature_filename)
upper, lower, small_ranges = discovery_discrete_range(
    [train_filename, test_filename], discrete_names, CSV_COLUMNS)

quantile_names = []
"""
for name in continuous_names + discrete_names:
    quantile_names.append(name + '_quantile')
"""

print(symbolic_names, len(symbolic_names))
print(continuous_names, len(continuous_names))
print(discrete_names, len(discrete_names))
# print(quantile_names, len(quantile_names))

header = generate_header(CSV_COLUMNS)
Esempio n. 5
0
    num_classes = 2
    y = np.zeros(shape=(labels.shape[0], num_classes))
    for (i, l) in enumerate(labels):
        y[i, l] = 1

    return X, y


dataset_names = [
    'UNSW/UNSW_NB15_%s-set.csv' % x for x in ['training', 'testing']
]
feature_file = 'UNSW/feature_names_train_test.csv'

symbolic_features = discovery_feature_volcabulary(dataset_names)
integer_features = discovery_integer_map(feature_file, dataset_names)
headers, _, _, _ = get_feature_names(feature_file)

X, y = get_dataset(dataset_names[0], headers)
test_X, test_y = get_dataset(dataset_names[1], headers)

train_dict = dict()
test_dict = dict()
merged_dim = 0
merged_inputs = []

# Define embedding layers/inputs
embeddings = []

for (name, values) in symbolic_features.items():
    column = Input(shape=(1, ), name=name)
    merged_inputs.append(column)