def run_main(input_file):
    dataset = TrafficDataset(input_file, transform=None, normalization_flg=True)

    train_sampler, test_sampler = split_train_test(dataset, split_percent=0.9, shuffle=True)
    cntr = Counter(dataset.y)
    print('dataset: ', len(dataset), ' y:', sorted(cntr.items()))
    # train_loader = torch.utils.input_data.DataLoader(dataset, batch_size, shuffle=True, num_workers=4)  # use all dataset
    train_loader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=4, sampler=train_sampler)
    X, y = get_loader_iterators_contents(train_loader)
    cntr = Counter(y)
    print('train_loader: ', len(train_loader.sampler), ' y:', sorted(cntr.items()))
    global test_loader
    test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=4,
                                              sampler=test_sampler)
    X, y = get_loader_iterators_contents(test_loader)
    cntr = Counter(y)
    print('test_loader: ', len(test_loader.sampler), ' y:', sorted(cntr.items()))

    EMBEDDING_DIM = num_features  # input_size
    HIDDEN_DIM = 30
    # proposed_algorithms = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, '', num_classes)

    for i in range(4, 11):
        print('first_%d_pkts' % i)
        global first_n_pkts
        first_n_pkts = i
        model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, '', num_classes)
        model.train(train_loader)

        print('***train accuracy: ')
        model.test(train_loader)

        print('***test accuracy: ')
        model.test(test_loader)
Esempio n. 2
0
def run_main(input_file, num_features):
    # # input_file = '../input_data/data_split_train_v2_711/train_%dpkt_images_merged.csv' % i
    # input_file = '../input_data/attack_normal_data/benign_data.csv'
    print(input_file)
    dataset = TrafficDataset(input_file, transform=None, normalization_flg=True)

    train_sampler, test_sampler = split_train_test(dataset, split_percent=0.7, shuffle=True)
    cntr = Counter(dataset.y)
    print('dataset: ', len(dataset), ' y:', sorted(cntr.items()))
    # train_loader = torch.utils.input_data.DataLoader(dataset, batch_size, shuffle=True, num_workers=4)  # use all dataset
    train_loader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=4, sampler=train_sampler)
    X, y = get_loader_iterators_contents(train_loader)
    cntr = Counter(y)
    print('train_loader: ', len(train_loader.sampler), ' y:', sorted(cntr.items()))
    global test_loader
    test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=4,
                                              sampler=test_sampler)
    X, y = get_loader_iterators_contents(test_loader)
    cntr = Counter(y)
    print('test_loader: ', len(test_loader.sampler), ' y:', sorted(cntr.items()))

    model = WGAN(num_classes, num_features, batch_size=batch_size).to(device)
    model.run_train(train_loader)
    # show_results(proposed_algorithms.results, i)

    # proposed_algorithms.run_test(test_loader)

    # Save the proposed_algorithms checkpoint
    # torch.save(proposed_algorithms.state_dict(), 'wgan_model_%d.ckpt' % i)

    return model, test_loader
Esempio n. 3
0
def run_main_cross_validation(i):
    input_file = '../input_data/data_split_train_v2_711/train_%dpkt_images_merged.csv' % i
    print(input_file)
    dataset = TrafficDataset(input_file,
                             transform=None,
                             normalization_flg=True)

    acc_sum = 0.0

    # k_fold = KFold(n_splits=10)
    k_fold = StratifiedKFold(n_splits=10)
    for k, (train_idxs_k, test_idxs_k) in enumerate(k_fold.split(dataset)):
        print('--------------------- k = %d -------------------' % (k + 1))
        cntr = Counter(dataset.y)
        print('dataset: ', len(dataset), ' y:', sorted(cntr.items()))
        train_sampler = SubsetRandomSampler(train_idxs_k)
        test_sampler = SubsetRandomSampler(test_idxs_k)
        # train_loader = torch.utils.input_data.DataLoader(dataset, batch_size, shuffle=True, num_workers=4)  # use all dataset
        train_loader = torch.utils.data.DataLoader(dataset,
                                                   batch_size,
                                                   shuffle=False,
                                                   num_workers=4,
                                                   sampler=train_sampler)
        X, y = get_loader_iterators_contents(train_loader)
        cntr = Counter(y)
        print('train_loader: ', len(train_idxs_k), ' y:', sorted(cntr.items()))
        global test_loader
        test_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_size,
                                                  shuffle=False,
                                                  num_workers=4,
                                                  sampler=test_sampler)
        X, y = get_loader_iterators_contents(test_loader)
        cntr = Counter(y)
        print('test_loader: ', len(test_idxs_k), ' y:', sorted(cntr.items()))

        model = ConvNet(num_classes, num_features=i * 60 + i - 1).to(device)
        print(model)
        model.run_train(train_loader)
        show_results(model.results, i)

        # proposed_algorithms.run_test(test_loader)
        acc_sum_tmp = np.sum(model.results['test_acc'])
        if acc_sum < acc_sum_tmp:
            print('***acc_sum:', acc_sum, ' < acc_sum_tmp:', acc_sum_tmp)
            acc_sum = acc_sum_tmp
            # Save the proposed_algorithms checkpoint
            torch.save(model.state_dict(), '../results/model_%d.ckpt' % i)
def modify_random_n_features_by_WGAN(mixed_input_file,
                                     num_features,
                                     random_choose_n_features=2,
                                     random_flg=False):
    """

    :param mixed_input_file: include benign and attack
    :param num_features: all the features used to train GAN
    :param random_choose_n_features: random choose n features to modify
    :return:
    """

    # input_file = '../input_data/data_split_train_v2_711/train_%dpkt_images_merged.csv' % i
    # input_file = '../input_data/attack_normal_data/benign_data.csv'
    print(mixed_input_file)
    dataset = TrafficDataset(mixed_input_file,
                             transform=None,
                             normalization_flg=True)

    train_sampler, test_sampler = split_train_test(dataset,
                                                   split_percent=0.7,
                                                   shuffle=True)
    cntr = Counter(dataset.y)
    print('dataset: ', len(dataset), ' y:', sorted(cntr.items()))
    # train_loader = torch.utils.input_data.DataLoader(dataset, batch_size, shuffle=True, num_workers=4)  # use all dataset
    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size,
                                               num_workers=4,
                                               sampler=train_sampler)
    X, y = get_loader_iterators_contents(train_loader)
    cntr = Counter(y)
    print('train_loader: ', len(train_loader.sampler), ' y:',
          sorted(cntr.items()))
    # global test_loader
    # test_loader = torch.utils.input_data.DataLoader(dataset, batch_size=batch_size, num_workers=4,
    #                                           sampler=benign_test_sampler)
    # X, y = get_loader_iterators_contents(test_loader)
    # cntr = Counter(y)
    # print('test_loader: ', len(test_loader.sampler), ' y:', sorted(cntr.items()))

    model = WGAN(num_classes, num_features, batch_size=batch_size).to(device)
    model.run_train(train_loader,
                    random_choose_n_features,
                    random_flg=random_flg)
    # show_results(proposed_algorithms.results, i)

    return model
def generate_attack_data(model,
                         mixed_input_file,
                         random_choose_n_features=2,
                         n=1000,
                         random_flg=False):
    print(mixed_input_file)
    dataset = TrafficDataset(mixed_input_file,
                             transform=None,
                             normalization_flg=True)

    train_sampler, test_sampler = split_train_test(dataset,
                                                   split_percent=0.7,
                                                   shuffle=True)
    cntr = Counter(dataset.y)
    print('dataset: ', len(dataset), ' y:', sorted(cntr.items()))
    # train_loader = torch.utils.input_data.DataLoader(dataset, batch_size, shuffle=True, num_workers=4)  # use all dataset
    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size,
                                               num_workers=4,
                                               sampler=train_sampler)
    X, y = get_loader_iterators_contents(train_loader)
    cntr = Counter(y)
    print('train_loader: ', len(train_loader.sampler), ' y:',
          sorted(cntr.items()))
    # global test_loader
    # test_loader = torch.utils.input_data.DataLoader(dataset, batch_size=batch_size, num_workers=4,
    #                                           sampler=benign_test_sampler)
    # X, y = get_loader_iterators_contents(test_loader)
    # cntr = Counter(y)
    # print('test_loader: ', len(test_loader.sampler), ' y:', sorted(cntr.items()))

    cnt = 0
    generated_data = []
    while cnt < n:
        for step, (b_x, b_y) in enumerate(train_loader):

            b_x = b_x.to(device)
            b_y = b_y.to(device)

            b_x = b_x.view([b_x.shape[0], -1])
            b_x = Variable(b_x).float()
            b_y = Variable(b_y).type(torch.LongTensor)
            # Forward pass
            # b_y_preds = proposed_algorithms(b_x)

            tmp_list = b_y.data.tolist()
            index_tmp = [step for step, i in enumerate(tmp_list)
                         if i == 0]  # benigin_data
            index_attack_tmp = [
                step for step, i in enumerate(tmp_list) if i == 1
            ]  # benigin_data

            index_tmp_len = min(len(index_tmp), len(index_attack_tmp))

            cnt += index_tmp_len

            b_x_benign = b_x[index_tmp[:index_tmp_len]]
            b_x_attack = b_x[index_attack_tmp[:index_tmp_len]]

            if random_flg:
                indexs_random = np.random.randint(
                    0, 41, random_choose_n_features)  # select random indexs
            else:
                indexs_random = [
                    r_i for r_i in range(random_choose_n_features)
                ]
            g_in_size = 4
            z_ = torch.randn((index_tmp_len, g_in_size))  # random normal 0-1
            # z_ = z_.view([len(index_attack_tmp), -1])

            G_ = model.G(z_)  # detach to avoid training G on these labels
            # G_=self.G(z_)

            for i in range(len(G_)):
                for step, j in enumerate(indexs_random):
                    b_x_attack[i][j] = G_[i][step]

            G_ = b_x_attack
            D_fake = model.D(G_.detach())
            D_real = model.D(b_x_benign)
            print('D_real', D_real.data.tolist())
            print('D_fake', D_fake.data.tolist())
            # D_fake_loss = torch.mean(D_fake, dim=0)
            generated_data.extend(b_x_attack)

    return generated_data[:n]
Esempio n. 6
0
num_epochs = 500
num_classes = 4
batch_size = 100
learning_rate = 0.001
#
# # MNIST dataset
# train_dataset = torchvision.datasets.MNIST(root='../../input_data/',
#                                            train=True,
#                                            transform=transforms.ToTensor(),
#                                            download=True)
#
# test_dataset = torchvision.datasets.MNIST(root='../../input_data/',
#                                           train=False,
#                                           transform=transforms.ToTensor())
input_file = '../input_data/data_split_train_v2_711/train_1pkt_images_merged.csv'
train_data = TrafficDataset(input_file, transform=None)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=30, shuffle=True, num_workers=4)
test_loader = train_loader


#
# # Data loader
# train_loader = torch.utils.input_data.DataLoader(dataset=train_dataset,
#                                            batch_size=batch_size,
#                                            shuffle=True)
#
# test_loader = torch.utils.input_data.DataLoader(dataset=test_dataset,
#                                           batch_size=batch_size,
#                                           shuffle=False)