def get_unsw_data(): dataset_names = [ 'UNSW/UNSW_NB15_%s-set.csv' % x for x in ['training', 'testing'] ] feature_file = 'UNSW/feature_names_train_test.csv' headers, _, _, _ = unsw.get_feature_names(feature_file) symbolic_features = unsw.discovery_feature_volcabulary(dataset_names) integer_features = unsw.discovery_integer_map(feature_file, dataset_names) continuous_features = unsw.discovery_continuous_map( feature_file, dataset_names) X, y = get_dataset(dataset_names[0], headers, 'unsw') test_X, test_y = get_dataset(dataset_names[1], headers, 'unsw') train_dict = dict() test_dict = dict() merged_inputs = [] embeddings = [] large_discrete = [] merged_dim = 0 merged_dim += build_embeddings(symbolic_features, integer_features, embeddings, large_discrete, merged_inputs, X, test_X, train_dict, test_dict, 'unsw') merged_dim += len(continuous_features) cont_component = build_continuous(continuous_features, merged_inputs, X, test_X, train_dict, test_dict, 'unsw') return train_dict, y, test_dict, test_y
def modality_net_unsw(hidden): dataset_names = [ 'UNSW/UNSW_NB15_%s-set.csv' % x for x in ['training', 'testing'] ] feature_file = 'UNSW/feature_names_train_test.csv' headers, _, _, _ = unsw.get_feature_names(feature_file) symbolic_features = unsw.discovery_feature_volcabulary(dataset_names) integer_features = unsw.discovery_integer_map(feature_file, dataset_names) continuous_features = unsw.discovery_continuous_map( feature_file, dataset_names) X, y = get_dataset(dataset_names[0], headers, 'unsw') test_X, test_y = get_dataset(dataset_names[1], headers, 'unsw') train_dict = dict() test_dict = dict() merged_inputs = [] embeddings = [] large_discrete = [] merged_dim = 0 merged_dim += build_embeddings(symbolic_features, integer_features, embeddings, large_discrete, merged_inputs, X, test_X, train_dict, test_dict, 'unsw') merged_dim += len(continuous_features) cont_component = build_continuous(continuous_features, merged_inputs, X, test_X, train_dict, test_dict, 'unsw') logger.debug('merge input_dim for UNSW-NB dataset = %s' % merged_dim) merge = concatenate(embeddings + large_discrete + [cont_component], name='concate_features_unsw') h1 = Dense(hidden[0], activation='relu', name='h1_unsw')(merge) dropout = Dropout(drop_prob)(h1) h2 = Dense(hidden[1], activation='relu', name='h2_unsw')(dropout) bn = BatchNormalization(name='bn_unsw')(h2) h3 = Dense(hidden[2], activation='sigmoid', name='sigmoid_unsw')(bn) sm = Dense(2, activation='softmax', name='output_unsw')(h3) model = Model(inputs=merged_inputs, outputs=sm) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.summary() history = model.fit(train_dict, {'output_unsw': y}, batch_size, num_epochs) modnet['unsw_loss'].append(history.history['loss']) score = model.evaluate(train_dict, y, y.shape[0]) logger.debug('modnet[unsw] train loss\t%.6f' % score[0]) logger.info('modenet[unsw] train accu\t%.6f' % score[1]) modnet['unsw']['train'].append(score[1]) score = model.evaluate(test_dict, test_y, test_y.shape[0]) logger.debug('modnet[unsw] test loss\t%.6f' % score[0]) logger.info('modenet[unsw] test accu\t%.6f' % score[1]) modnet['unsw']['test'].append(score[1]) model.save_weights('ModalityNets/modnet_unsw.h5') # np.savez('ModalityNets/unsw_EX.npy', train=EX, test=EX_test) return merge, merged_inputs, train_dict, test_dict, y, test_y
def get_unsw_data(): dataset_names = [ 'UNSW/UNSW_NB15_%s-set.csv' % x for x in ['training', 'testing'] ] feature_file = 'UNSW/feature_names_train_test.csv' headers, _, _, _ = unsw.get_feature_names(feature_file) symbolic_features = unsw.discovery_feature_volcabulary(dataset_names) integer_features = unsw.discovery_integer_map(feature_file, dataset_names) continuous_features = unsw.discovery_continuous_map( feature_file, dataset_names) X, y = get_dataset(dataset_names[0], headers, 'unsw') X_test, y_test = get_dataset(dataset_names[1], headers, 'unsw') train_dict = dict() test_dict = dict() input_layer = [] embeddings = [] large_discrete = [] merged_dim = 0 merged_dim += build_embeddings(symbolic_features, integer_features, embeddings, large_discrete, input_layer, X, X_test, train_dict, test_dict, 'unsw') merged_dim += len(continuous_features) cont_component = build_continuous(continuous_features, input_layer, X, X_test, train_dict, test_dict, 'unsw') pprint('merge input_dim for UNSW-NB dataset = %s' % merged_dim) merged_layer = concatenate(embeddings + large_discrete + [cont_component], name='concate_features_unsw') model = Model(inputs=input_layer, outputs=merged_layer) model.compile('adam', 'mse') model.summary() MX = model.predict(train_dict) MX_test = model.predict(test_dict) return MX, MX_test, y, y_test
result = m.evaluate(test_ib) history['test'].append(result) logger.info('****** Test performance ******') for key in result: logger.info("%s: %s" % (key, result[key])) return history os.environ['CUDA_VISIBLE_DEVICES'] = '0' train_filename = 'UNSW/UNSW_NB15_training-set.csv' test_filename = 'UNSW/UNSW_NB15_testing-set.csv' feature_filename = 'UNSW/feature_names_train_test.csv' CSV_COLUMNS, symbolic_names, continuous_names, discrete_names = \ get_feature_names(feature_filename) upper, lower, small_ranges = discovery_discrete_range( [train_filename, test_filename], discrete_names, CSV_COLUMNS) quantile_names = [] """ for name in continuous_names + discrete_names: quantile_names.append(name + '_quantile') """ print(symbolic_names, len(symbolic_names)) print(continuous_names, len(continuous_names)) print(discrete_names, len(discrete_names)) # print(quantile_names, len(quantile_names)) header = generate_header(CSV_COLUMNS)
num_classes = 2 y = np.zeros(shape=(labels.shape[0], num_classes)) for (i, l) in enumerate(labels): y[i, l] = 1 return X, y dataset_names = [ 'UNSW/UNSW_NB15_%s-set.csv' % x for x in ['training', 'testing'] ] feature_file = 'UNSW/feature_names_train_test.csv' symbolic_features = discovery_feature_volcabulary(dataset_names) integer_features = discovery_integer_map(feature_file, dataset_names) headers, _, _, _ = get_feature_names(feature_file) X, y = get_dataset(dataset_names[0], headers) test_X, test_y = get_dataset(dataset_names[1], headers) train_dict = dict() test_dict = dict() merged_dim = 0 merged_inputs = [] # Define embedding layers/inputs embeddings = [] for (name, values) in symbolic_features.items(): column = Input(shape=(1, ), name=name) merged_inputs.append(column)