def run_main(input_file): dataset = TrafficDataset(input_file, transform=None, normalization_flg=True) train_sampler, test_sampler = split_train_test(dataset, split_percent=0.9, shuffle=True) cntr = Counter(dataset.y) print('dataset: ', len(dataset), ' y:', sorted(cntr.items())) # train_loader = torch.utils.input_data.DataLoader(dataset, batch_size, shuffle=True, num_workers=4) # use all dataset train_loader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=4, sampler=train_sampler) X, y = get_loader_iterators_contents(train_loader) cntr = Counter(y) print('train_loader: ', len(train_loader.sampler), ' y:', sorted(cntr.items())) global test_loader test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=4, sampler=test_sampler) X, y = get_loader_iterators_contents(test_loader) cntr = Counter(y) print('test_loader: ', len(test_loader.sampler), ' y:', sorted(cntr.items())) EMBEDDING_DIM = num_features # input_size HIDDEN_DIM = 30 # proposed_algorithms = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, '', num_classes) for i in range(4, 11): print('first_%d_pkts' % i) global first_n_pkts first_n_pkts = i model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, '', num_classes) model.train(train_loader) print('***train accuracy: ') model.test(train_loader) print('***test accuracy: ') model.test(test_loader)
def run_main(input_file, num_features): # # input_file = '../input_data/data_split_train_v2_711/train_%dpkt_images_merged.csv' % i # input_file = '../input_data/attack_normal_data/benign_data.csv' print(input_file) dataset = TrafficDataset(input_file, transform=None, normalization_flg=True) train_sampler, test_sampler = split_train_test(dataset, split_percent=0.7, shuffle=True) cntr = Counter(dataset.y) print('dataset: ', len(dataset), ' y:', sorted(cntr.items())) # train_loader = torch.utils.input_data.DataLoader(dataset, batch_size, shuffle=True, num_workers=4) # use all dataset train_loader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=4, sampler=train_sampler) X, y = get_loader_iterators_contents(train_loader) cntr = Counter(y) print('train_loader: ', len(train_loader.sampler), ' y:', sorted(cntr.items())) global test_loader test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=4, sampler=test_sampler) X, y = get_loader_iterators_contents(test_loader) cntr = Counter(y) print('test_loader: ', len(test_loader.sampler), ' y:', sorted(cntr.items())) model = WGAN(num_classes, num_features, batch_size=batch_size).to(device) model.run_train(train_loader) # show_results(proposed_algorithms.results, i) # proposed_algorithms.run_test(test_loader) # Save the proposed_algorithms checkpoint # torch.save(proposed_algorithms.state_dict(), 'wgan_model_%d.ckpt' % i) return model, test_loader
def run_main_cross_validation(i): input_file = '../input_data/data_split_train_v2_711/train_%dpkt_images_merged.csv' % i print(input_file) dataset = TrafficDataset(input_file, transform=None, normalization_flg=True) acc_sum = 0.0 # k_fold = KFold(n_splits=10) k_fold = StratifiedKFold(n_splits=10) for k, (train_idxs_k, test_idxs_k) in enumerate(k_fold.split(dataset)): print('--------------------- k = %d -------------------' % (k + 1)) cntr = Counter(dataset.y) print('dataset: ', len(dataset), ' y:', sorted(cntr.items())) train_sampler = SubsetRandomSampler(train_idxs_k) test_sampler = SubsetRandomSampler(test_idxs_k) # train_loader = torch.utils.input_data.DataLoader(dataset, batch_size, shuffle=True, num_workers=4) # use all dataset train_loader = torch.utils.data.DataLoader(dataset, batch_size, shuffle=False, num_workers=4, sampler=train_sampler) X, y = get_loader_iterators_contents(train_loader) cntr = Counter(y) print('train_loader: ', len(train_idxs_k), ' y:', sorted(cntr.items())) global test_loader test_loader = torch.utils.data.DataLoader(dataset, batch_size, shuffle=False, num_workers=4, sampler=test_sampler) X, y = get_loader_iterators_contents(test_loader) cntr = Counter(y) print('test_loader: ', len(test_idxs_k), ' y:', sorted(cntr.items())) model = ConvNet(num_classes, num_features=i * 60 + i - 1).to(device) print(model) model.run_train(train_loader) show_results(model.results, i) # proposed_algorithms.run_test(test_loader) acc_sum_tmp = np.sum(model.results['test_acc']) if acc_sum < acc_sum_tmp: print('***acc_sum:', acc_sum, ' < acc_sum_tmp:', acc_sum_tmp) acc_sum = acc_sum_tmp # Save the proposed_algorithms checkpoint torch.save(model.state_dict(), '../results/model_%d.ckpt' % i)
def modify_random_n_features_by_WGAN(mixed_input_file, num_features, random_choose_n_features=2, random_flg=False): """ :param mixed_input_file: include benign and attack :param num_features: all the features used to train GAN :param random_choose_n_features: random choose n features to modify :return: """ # input_file = '../input_data/data_split_train_v2_711/train_%dpkt_images_merged.csv' % i # input_file = '../input_data/attack_normal_data/benign_data.csv' print(mixed_input_file) dataset = TrafficDataset(mixed_input_file, transform=None, normalization_flg=True) train_sampler, test_sampler = split_train_test(dataset, split_percent=0.7, shuffle=True) cntr = Counter(dataset.y) print('dataset: ', len(dataset), ' y:', sorted(cntr.items())) # train_loader = torch.utils.input_data.DataLoader(dataset, batch_size, shuffle=True, num_workers=4) # use all dataset train_loader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=4, sampler=train_sampler) X, y = get_loader_iterators_contents(train_loader) cntr = Counter(y) print('train_loader: ', len(train_loader.sampler), ' y:', sorted(cntr.items())) # global test_loader # test_loader = torch.utils.input_data.DataLoader(dataset, batch_size=batch_size, num_workers=4, # sampler=benign_test_sampler) # X, y = get_loader_iterators_contents(test_loader) # cntr = Counter(y) # print('test_loader: ', len(test_loader.sampler), ' y:', sorted(cntr.items())) model = WGAN(num_classes, num_features, batch_size=batch_size).to(device) model.run_train(train_loader, random_choose_n_features, random_flg=random_flg) # show_results(proposed_algorithms.results, i) return model
def generate_attack_data(model, mixed_input_file, random_choose_n_features=2, n=1000, random_flg=False): print(mixed_input_file) dataset = TrafficDataset(mixed_input_file, transform=None, normalization_flg=True) train_sampler, test_sampler = split_train_test(dataset, split_percent=0.7, shuffle=True) cntr = Counter(dataset.y) print('dataset: ', len(dataset), ' y:', sorted(cntr.items())) # train_loader = torch.utils.input_data.DataLoader(dataset, batch_size, shuffle=True, num_workers=4) # use all dataset train_loader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=4, sampler=train_sampler) X, y = get_loader_iterators_contents(train_loader) cntr = Counter(y) print('train_loader: ', len(train_loader.sampler), ' y:', sorted(cntr.items())) # global test_loader # test_loader = torch.utils.input_data.DataLoader(dataset, batch_size=batch_size, num_workers=4, # sampler=benign_test_sampler) # X, y = get_loader_iterators_contents(test_loader) # cntr = Counter(y) # print('test_loader: ', len(test_loader.sampler), ' y:', sorted(cntr.items())) cnt = 0 generated_data = [] while cnt < n: for step, (b_x, b_y) in enumerate(train_loader): b_x = b_x.to(device) b_y = b_y.to(device) b_x = b_x.view([b_x.shape[0], -1]) b_x = Variable(b_x).float() b_y = Variable(b_y).type(torch.LongTensor) # Forward pass # b_y_preds = proposed_algorithms(b_x) tmp_list = b_y.data.tolist() index_tmp = [step for step, i in enumerate(tmp_list) if i == 0] # benigin_data index_attack_tmp = [ step for step, i in enumerate(tmp_list) if i == 1 ] # benigin_data index_tmp_len = min(len(index_tmp), len(index_attack_tmp)) cnt += index_tmp_len b_x_benign = b_x[index_tmp[:index_tmp_len]] b_x_attack = b_x[index_attack_tmp[:index_tmp_len]] if random_flg: indexs_random = np.random.randint( 0, 41, random_choose_n_features) # select random indexs else: indexs_random = [ r_i for r_i in range(random_choose_n_features) ] g_in_size = 4 z_ = torch.randn((index_tmp_len, g_in_size)) # random normal 0-1 # z_ = z_.view([len(index_attack_tmp), -1]) G_ = model.G(z_) # detach to avoid training G on these labels # G_=self.G(z_) for i in range(len(G_)): for step, j in enumerate(indexs_random): b_x_attack[i][j] = G_[i][step] G_ = b_x_attack D_fake = model.D(G_.detach()) D_real = model.D(b_x_benign) print('D_real', D_real.data.tolist()) print('D_fake', D_fake.data.tolist()) # D_fake_loss = torch.mean(D_fake, dim=0) generated_data.extend(b_x_attack) return generated_data[:n]
num_epochs = 500 num_classes = 4 batch_size = 100 learning_rate = 0.001 # # # MNIST dataset # train_dataset = torchvision.datasets.MNIST(root='../../input_data/', # train=True, # transform=transforms.ToTensor(), # download=True) # # test_dataset = torchvision.datasets.MNIST(root='../../input_data/', # train=False, # transform=transforms.ToTensor()) input_file = '../input_data/data_split_train_v2_711/train_1pkt_images_merged.csv' train_data = TrafficDataset(input_file, transform=None) train_loader = torch.utils.data.DataLoader(train_data, batch_size=30, shuffle=True, num_workers=4) test_loader = train_loader # # # Data loader # train_loader = torch.utils.input_data.DataLoader(dataset=train_dataset, # batch_size=batch_size, # shuffle=True) # # test_loader = torch.utils.input_data.DataLoader(dataset=test_dataset, # batch_size=batch_size, # shuffle=False)