Beispiel #1
0
def test(opt, net, loader):
    correct = 0
    if opt.adversarial:
        correct_fgm, correct_pgd = 0, 0
    total = 0
    net.eval()
    logger.info('Starting testing...')

    with torch.set_grad_enabled(opt.adversarial):
        # adversarial attacks need grad computations
        n_batches = len(loader)
        for i, (input, target) in enumerate(loader):
            logger.info('batch: {}/{}'.format(i + 1, n_batches))
            input, target = \
                input.to(device, non_blocking=True), target.to(
                    device, non_blocking=True)

            output = net(input, update_centers=False)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

            if opt.adversarial:

                def net_fn(input):
                    return net(input, update_centers=False)

                input_fgm = fast_gradient_method(net_fn, input,
                                                 opt.adversary_eps,
                                                 opt.adversary_norm)
                input_pgd = projected_gradient_descent(net_fn, input,
                                                       opt.adversary_eps,
                                                       opt.pgd_step_eps,
                                                       opt.pgd_n_steps,
                                                       opt.adversary_norm)
                output_fgm = net(input_fgm, update_centers=False)
                output_pgd = net(input_pgd, update_centers=False)
                _, predicted_fgm = torch.max(output_fgm.data, 1)
                _, predicted_pgd = torch.max(output_pgd.data, 1)
                correct_fgm += (predicted_fgm == target).sum().item()
                correct_pgd += (predicted_pgd == target).sum().item()

    acc = 100 * correct / total
    results = {'accuracy (%)': acc}
    logger.info('Accuracy (%): {:.3f}'.format(acc))

    if opt.adversarial:
        acc_fgm = 100 * correct_fgm / total
        logger.info('Accuracy under FGM (%): {:.3f}'.format(acc_fgm))
        acc_pgd = 100 * correct_pgd / total
        logger.info('Accuracy under PGD (%): {:.3f}'.format(acc_pgd))
        results['accuracy under FGM (%)'] = acc_fgm
        results['accuracy under PGD (%)'] = acc_pgd

    with open(os.path.join(opt.save_dir, 'test.json'), 'w') as out:
        json.dump(results, out, indent=2)
    logger.info('Testing finished!')
def main(_):
    # Load training and test data
    data = ld_cifar10()

    # Instantiate model, loss, and optimizer for training
    net = CNN(in_channels=3)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    if device == 'cuda':
        net = net.cuda()
    loss_fn = torch.nn.CrossEntropyLoss(reduction='mean')
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)

    # Train vanilla model
    net.train()
    for epoch in range(1, FLAGS.nb_epochs + 1):
        train_loss = 0.
        for x, y in data.train:
            x, y = x.to(device), y.to(device)
            if FLAGS.adv_train:
                # Replace clean example with adversarial example for adversarial training
                x = projected_gradient_descent(net, x, FLAGS.eps, 0.01, 40,
                                               np.inf)
                # Stop backward from entering the graph that created the adv example
                x = x.clone().detach()
            optimizer.zero_grad()
            loss = loss_fn(net(x), y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        print('epoch: {}/{}, train loss: {:.3f}'.format(
            epoch, FLAGS.nb_epochs, train_loss))

    # Evaluate on clean and adversarial data
    net.eval()
    report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0)
    for x, y in data.test:
        x, y = x.to(device), y.to(device)
        x_fgm = fast_gradient_method(net, x, FLAGS.eps, np.inf)
        x_pgd = projected_gradient_descent(net, x, FLAGS.eps, 0.01, 40, np.inf)
        _, y_pred = net(x).max(1)  # model prediction on clean examples
        _, y_pred_fgm = net(x_fgm).max(
            1)  # model prediction on FGM adversarial examples
        _, y_pred_pgd = net(x_pgd).max(
            1)  # model prediction on PGD adversarial examples
        report.nb_test += y.size(0)
        report.correct += y_pred.eq(y).sum().item()
        report.correct_fgm += y_pred_fgm.eq(y).sum().item()
        report.correct_pgd += y_pred_pgd.eq(y).sum().item()
    print('test acc on clean examples (%): {:.3f}'.format(
        report.correct / report.nb_test * 100.))
    print('test acc on FGM adversarial examples (%): {:.3f}'.format(
        report.correct_fgm / report.nb_test * 100.))
    print('test acc on PGD adversarial examples (%): {:.3f}'.format(
        report.correct_pgd / report.nb_test * 100.))
Beispiel #3
0
def test(opt, net, loader):
  correct = 0
  if opt.adversarial:
    correct_fgm, correct_pgd = 0, 0
  total = 0
  net.eval()
  logger.info('Starting testing...')

  with torch.set_grad_enabled(opt.adversarial):
    # adversarial attacks need grad computations
    for i, (input, target) in enumerate(loader):
      logger.info('batch: {}/{}'.format(i, len(loader)))
      input, target = \
        input.to(device, non_blocking=True), target.to(device, non_blocking=True)

      output = net(input, update_centers=False)

      # save tensors for visualization
      raw_data = input if i == 0 else torch.cat((raw_data, input))
      labels = target if i == 0 else torch.cat((labels, target))
      activations = net_head(input)
      all_activations = activations if i == 0 else torch.cat((all_activations, activations))

      _, predicted = torch.max(output.data, 1)
      total += target.size(0)
      correct += (predicted == target).sum().item()
      
      if opt.adversarial:
        net_fn = lambda input: net(input, update_centers=False)
        input_fgm = fast_gradient_method(net_fn, input, opt.adversary_eps, opt.adversary_norm)
        input_pgd = projected_gradient_descent(net_fn, input, opt.adversary_eps,
            opt.pgd_step_eps, opt.pgd_n_steps, opt.adversary_norm)
        output_fgm = net(input_fgm, update_centers=False)
        output_pgd = net(input_pgd, update_centers=False)
        _, predicted_fgm = torch.max(output_fgm.data, 1)
        _, predicted_pgd = torch.max(output_pgd.data, 1)
        correct_fgm += (predicted_fgm == target).sum().item()
        correct_pgd += (predicted_pgd == target).sum().item()
         
  logger.info('Accuracy (%): {:.3f}'.format(100 * correct / total))

  if opt.adversarial:
    logger.info('Accuracy under FGM (%): {:.3f}'.format(100 * correct_fgm / total))
    logger.info('Accuracy under PGD (%): {:.3f}'.format(100 * correct_pgd / total))

  logger.info('Testing finished!')

  return raw_data.cpu().numpy(), labels.cpu().numpy(), all_activations.cpu().numpy()
def run_attacks_cleverhans(res_path, ncl=False):
    if ncl:
        MODEL_DIR = '/mnt/md0/orville/Miriam/modular-loss-experiments-morph/results_ncl/CIFAR-10/densenet-82-8-8'
        rel_dirs = ['alpha_0.0_gamma_0.02_n_models_2_1583114412120',
                    'alpha_0.0_gamma_0.05_n_models_2_1583114439810']
        alpha = ['0.02', '0.05']
        res_path = res_path + '_ncl'
    else:
        MODEL_DIR = '/mnt/md0/orville/Miriam/modular-loss-experiments-morph/results/CIFAR-10/densenet-82-8-8'
        # UNCORR_MODEL_DIR = 'alpha_0.0_gamma_0.0_n_models_2_1581641733617'
        # CORR_MODEL_DIR = 'alpha_0.1_gamma_0.0_n_models_2_1581641746832'
        # CORR_MODEL_DIR_2 = 'alpha_0.2_gamma_0.0_n_models_2_1581641777871'
        # UNCORR_MODEL_DIR = 'alpha_0.0_gamma_0.0_n_models_3_1585505819121'
        # CORR_MODEL_DIR = 'alpha_0.1_gamma_0.0_n_models_3_1585505685528'
        # CORR_MODEL_DIR_2 = 'alpha_0.2_gamma_0.0_n_models_3_1585505042819'
        # rel_dirs = [UNCORR_MODEL_DIR, CORR_MODEL_DIR, CORR_MODEL_DIR_2]
        rel_dirs = ['alpha_0.0_gamma_0.0_n_models_3_1585505819121',
                  'alpha_0.1_gamma_0.0_n_models_3_1589795142450',
                  'alpha_0.2_gamma_0.0_n_models_3_1589794987034',
                  'alpha_0.3_gamma_0.0_n_models_3_1589795486214',
                  'alpha_0.4_gamma_0.0_n_models_3_1589796192038',
                  'alpha_0.5_gamma_0.0_n_models_3_1589796200262',
                  'alpha_0.6_gamma_0.0_n_models_3_1589796218204',
                  'alpha_0.7_gamma_0.0_n_models_3_1589796234665']
        alpha = list(map(lambda x: format(x, '2.1f'), np.arange(0.0, 0.8, 0.1)))

    batch_size = 256  # 128  # 516
    n_workers = 20
    dataset = 'CIFAR-10'
    network = 'densenet-82-8-8'
    loaders, _ = get_dataloaders_(batch_size, 0, dataset, False, early_stop=False, n_workers=n_workers)
    n_models = 2 if ncl else 3

    params = {}
    params['densenet-82-8-8'] = {'num_modules': n_models, 'bottleneck': True, 'reduction': 0.5, 'depth': 82, 'growth_rate': 8,
                                 'input_shape': (3, 32, 32), 'output_dim': 10}
    network = 'densenet-82-8-8'
    model = DenseNet(input_shape=params[network]['input_shape'],
                     output_dim=params[network]['output_dim'],
                     growth_rate=params[network]['growth_rate'],
                     depth=params[network]['depth'],
                     reduction=params[network]['reduction'],
                     bottleneck=params[network]['bottleneck'],
                     num_modules=n_models)

    device = torch.device("cuda")
    reports = dict.fromkeys(alpha)
    for model_path, curr_alpha in tqdm(zip(rel_dirs, alpha), total=len(alpha)):
        if ncl:
            weight_path = path.join(MODEL_DIR, model_path, 'trial_0/' + curr_alpha + '/weights/final_weights.pt')
        else:
            weight_path = path.join(MODEL_DIR, model_path, 'trial_0/0.0/weights/final_weights.pt')
        model.reset_parameters()
        model.load_state_dict(torch.load(weight_path))
        model.eval()  # model.train(mode=False)
        net = ModelMeanEP(model).to(device)

        report = dict()
        for x, y in tqdm(loaders['test'], total=len(loaders['test'])):
            x, y = x.to(device), y.to(device)
            report['nb_test'] = report.get('nb_test', 0) + y.size(0)

            _, y_pred = net(x).max(1)  # model prediction on clean examples
            report['acc'] = report.get('acc', 0) + y_pred.eq(y).sum().item()

            # model prediction on FGM adversarial examples
            x_adv = fast_gradient_method(net, x, 0.02, np.inf)
            _, y_pred = net(x_adv).max(1)  # model prediction on FGM adversarial examples
            report['FGM_0.02'] = report.get('FGM_0.02', 0) + y_pred.eq(y).sum().item()

            x_adv = fast_gradient_method(net, x, 0.04, np.inf)
            _, y_pred = net(x_adv).max(1)  # model prediction on FGM adversarial examples
            report['FGM_0.04'] = report.get('FGM_0.04', 0) + y_pred.eq(y).sum().item()

            # model prediction on BIM adversarial examples
            x_adv = projected_gradient_descent(net, x, eps=0.01, eps_iter=0.01 / 10, nb_iter=10, norm=np.inf, rand_init=0)
            _, y_pred = net(x_adv).max(1)
            report['BIM_0.01'] = report.get('BIM_0.01', 0) + y_pred.eq(y).sum().item()

            x_adv = projected_gradient_descent(net, x, eps=0.02, eps_iter=0.02 / 10, nb_iter=10, norm=np.inf, rand_init=0)
            _, y_pred = net(x_adv).max(1)
            report['BIM_0.02'] = report.get('BIM_0.02', 0) + y_pred.eq(y).sum().item()

            # model prediction on PGD adversarial examples
            x_adv = projected_gradient_descent(net, x, eps=0.01, eps_iter=0.01 / 10, nb_iter=10, norm=np.inf)
            _, y_pred = net(x_adv).max(1)
            report['PGD_0.01'] = report.get('PGD_0.01', 0) + y_pred.eq(y).sum().item()

            x_adv = projected_gradient_descent(net, x, eps=0.02, eps_iter=0.02 / 10, nb_iter=10, norm=np.inf)
            _, y_pred = net(x_adv).max(1)
            report['PGD_0.02'] = report.get('PGD_0.02', 0) + y_pred.eq(y).sum().item()

        for key in ['acc', 'FGM_0.02', 'FGM_0.04', 'BIM_0.01', 'BIM_0.02', 'PGD_0.01', 'PGD_0.02']:
            report[key] = (report[key] / report['nb_test']) * 100.

        reports[curr_alpha] = report
        pickle.dump(reports, open(res_path, 'wb'))
    pickle.dump(reports, open(res_path, 'wb'))
Beispiel #5
0
def evaluate_attack_mnist(model,
                          device,
                          attack,
                          eps=0,
                          norm=2,
                          num_SD=1.8,
                          num_summed_SD=0.75,
                          num_false=1,
                          num_imgs=10000,
                          print_every=100,
                          stop_idx=[],
                          CheckAll=False,
                          use_printouts=False,
                          get_psi=False):
    '''

    :param model (nn.Module): The model to be used for testing. It is expected to have the following attributes:
        .name (str): Name of the model
        .encoder (nn.Module): Neural network with callable, implemented forward function -- must expect inputs of size 784
        .classifier (nn.Module): Neural network with callable, implemented forward function -- must expect inputs of size 784
        .z_prior (torch.nn.Parameter): length-2 tuple that holds the z_prior mean(s) in [0] and z_prior variances in [1]
    :param device (str): either 'cuda' or 'cpu'
    :param attack (str): Choice of 'noise', 'fgm', or 'pixel'
    :param eps (int or float): Strength of attack. Note that pixel attacks only takes integer values
    :param norm (int or float): Either 2 or np.inf. Used only with fgm attacks
    :param num_SD (float): sigma_detect threshold value
    :param num_summed_SD (float): Sigma_detect threshold value
    :param num_false (int): Number of delta values for a given image that must exceed sigma_detect to be detected
    :param num_imgs (int): Number of images to iterate over through the test dataset
    :param print_every (int): How often to print a progress report
    :param stop_idx (list of ints): List of specific indexes within the test dataset to pause at with detailed printouts
    :param CheckAll (bool): If true, will pause at every image within the test dataset
    :param use_printouts (bool): If true, will pause at every anomalous / successful adversarial image in the dataset
    :param get_psi (bool): If true (and eps = 0), will evaluate and save psi values across the dataset
            as 'psis_minst.npy'
    '''

    # Load MNIST test dataset
    testload = torch.utils.data.DataLoader(
        datasets.MNIST("data/mnist",
                       train=False,
                       download=True,
                       transform=transforms.Compose([
                           transforms.Resize(28),
                           transforms.ToTensor(),
                           transforms.Normalize([0.5], [0.5])
                       ])))
    # x_test = testload.dataset.test_data.to(device).reshape(-1, 784).float() / 255
    x_test = testload.dataset.test_data.to(device).reshape(
        -1, 784).float()[:num_imgs] / 255
    y_test = testload.dataset.test_labels.to(device)[:num_imgs]
    print("y_test shape: {}".format(y_test.shape))
    total_num = len(x_test)
    print("Total length of x_test dataset: {}".format(total_num))

    # Load model in eval mode
    model.eval()

    # Load KL Data
    KL_Classes_Stats = np.zeros((10, 10, 2))
    if os.path.exists('deltas_mnist.npy'):
        KL_Classes_Stats = np.load('deltas_mnist.npy')
    else:
        print(
            "Warning: No deltas_mnist file to load. Make sure you run determine_deltas_mnist first!"
        )
    KL_Summed_SD_Stats = np.zeros((10, 2))
    if os.path.exists('psis_mnist.npy'):
        KL_Summed_SD_Stats = np.load('psis_mnist.npy')
    else:
        print(
            "Warning: No psis_mnist file to load. Make sure you run with get_psi=True first!"
        )

    # Create vectors to hold values
    Max_Delta_KL_z_adv = []
    Summed_KL_z_adv = []
    PredictClean = []
    ProbClean = []
    ProbAdv = []
    PredictAdv = []
    IsCorrect = []
    AdvImages = []
    SuccessfulAdvAtkDetected = []
    UnsuccessfulAdvAtkDetected = []
    FalsePositive = []  # only used for d = 0 pixels changed
    AnomalyDetected = []
    KL_Summed_SD = []
    for i in range(10):
        KL_Summed_SD.append([])

    # If running Single Pixel attack, load class
    attacker = None
    if attack == 'pixel':
        attacker = OnePixelAttack(model, device)

    for x, y, j in zip(x_test, y_test, range(len(x_test))):
        # Load single img
        orig = x.view(28, 28).cpu().numpy()
        img = orig.copy()
        shape = img.shape

        inp = Variable(x.type(Tensor)).to(device)
        prob_orig = ut.softmax(model.classifier(inp).data.cpu().numpy()[0])
        pred_orig = np.argmax(prob_orig)

        # Append to vectors
        PredictClean.append(pred_orig)
        ProbClean.append(prob_orig)

        # Run specified attack
        adv_img = None
        if eps > 0:
            if attack == 'fgm':
                adv_img = fast_gradient_method(model.classifier,
                                               x,
                                               eps=eps,
                                               norm=norm,
                                               clip_min=0,
                                               clip_max=1).view(1, -1)
            elif attack == 'noise':
                adv_img = noise(x, eps=eps, clip_min=0, clip_max=1).view(1, -1)
            elif attack == 'pixel':
                _, _, _, adv_img = attacker.pixel_attack(
                    eps, shape, pred_orig, img)
            else:
                raise AssertionError(
                    "Attack must either be 'fgm', 'pixel', or 'noise'")
        else:
            adv_img = x.view(1, -1)
        adv_out = model.classifier(adv_img)
        prob = ut.softmax(adv_out.data.cpu().numpy())
        adv_y = F.softmax(adv_out, dim=-1).float()
        pred_adv = torch.topk(adv_y, 1, dim=-1)[1].item()
        prob_adv = prob[0][pred_adv]

        # Append to vectors
        PredictAdv.append(pred_adv)
        ProbAdv.append(prob_adv)
        AdvImages.append(adv_img.view(1, 28, 28).data)

        # Append to accuracy vector
        IsCorrect.append(int(pred_adv == y))

        #### Test KL z div for all images ####

        # Display adv image only if certain conditions are met
        if (((pred_orig != pred_adv) or
             (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx:
            fig1 = plt.imshow(adv_img.view(28, 28).cpu().data)
            fig1.axes.get_xaxis().set_visible(False)
            fig1.axes.get_yaxis().set_visible(False)
            plt.title('{} Attack, eps = {}, Adv Prediction: {}'.format(
                attack, eps, pred_adv))
            plt.show()
            fig2 = plt.imshow(x.view(28, 28).cpu().data)
            fig2.axes.get_xaxis().set_visible(False)
            fig2.axes.get_yaxis().set_visible(False)
            plt.title('Clean Image Prediction: {}'.format(pred_orig))
            plt.show()
        if (((pred_orig != pred_adv) or
             (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx:
            print(
                "Test Image i = {}: Original prediction: {}, Adversarially-induced prediction: {}, True Label = {}"
                .format(j, pred_orig, pred_adv, y))
        KL_local = []

        # Calculate KL div for "expected" (clean or adversarially-induced) label
        y_prob = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0]).float().view(1, -1).to(device)
        y_prob[0][pred_adv] = 1
        qm, qv = model.encoder(adv_img.view(1, -1), y_prob)
        kl_z_all = y_prob * ut.kl_normal(
            qm, qv, model.z_prior[0],
            model.z_prior[1])  # kl_z_all shape = [batch_size * y_dim]
        expected_kl_z = torch.sum(kl_z_all)
        TotalFalse = 0
        Num_SD_Away_Total = 0
        Max_Adv_KL = 0
        for i in range(10):
            y_prob = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0,
                                   0]).float().view(1, -1).to(device)
            y_prob[0][i] = 1
            #y_logprob = F.log_softmax(out, dim=1).float()
            qm, qv = model.encoder(adv_img.view(1, -1), y_prob)
            kl_z_all = y_prob * ut.kl_normal(
                qm, qv, model.z_prior[0],
                model.z_prior[1])  # kl_z_all shape = [batch_size * y_dim]
            kl_z = torch.sum(kl_z_all)
            KL_local.append(kl_z.item())
            if KL_Classes_Stats[pred_adv][i][1] > 0:
                Num_SD_Away = (abs(kl_z - expected_kl_z -
                                   KL_Classes_Stats[pred_adv][i][0]) /
                               KL_Classes_Stats[pred_adv][i][1]).item()
            else:
                Num_SD_Away = 0
            if Num_SD_Away > Max_Adv_KL:
                Max_Adv_KL = Num_SD_Away
            Num_SD_Away_Total = Num_SD_Away_Total + Num_SD_Away
            reasonable = True if Num_SD_Away <= num_SD else False
            if not reasonable:
                TotalFalse = TotalFalse + 1
            if (((pred_orig != pred_adv) or (pred_orig != y) or CheckAll)
                    and use_printouts) or j in stop_idx:
                print(
                    "delta KL_div for y = {}: {:.2f}, Expected delta KL_div: {:.2f}, SD: {:.2f}, Num SD Away: {:.2f}, Reasonable (within {} SD): {}"
                    .format(i, kl_z - expected_kl_z,
                            KL_Classes_Stats[pred_adv][i][0],
                            KL_Classes_Stats[pred_adv][i][1], Num_SD_Away,
                            num_SD, reasonable))

        PositiveDetected = 1 if (
            Num_SD_Away_Total - KL_Summed_SD_Stats[pred_adv][0]
        ) / KL_Summed_SD_Stats[pred_adv][1] > num_summed_SD else 0

        if (pred_orig != pred_adv) or (eps == 0 and pred_orig != y):
            Max_Delta_KL_z_adv.append(Max_Adv_KL)
            Summed_KL_z_adv.append(Num_SD_Away_Total)

        if eps == 0 and get_psi:
            KL_Summed_SD[y].append(Num_SD_Away_Total.item())

        if (((pred_orig != pred_adv) or
             (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx:
            print(
                "Summed SDs across classes: {:.2f}".format(Num_SD_Away_Total))
            print("Mean, SD for Summed SDs: {}".format(
                KL_Summed_SD_Stats[pred_adv]))
            print(
                "Detected: {}, PositiveDetected: {}, Detected as anomaly: {}".
                format(TotalFalse >= num_false, PositiveDetected,
                       bool(TotalFalse >= num_false or PositiveDetected)))

        # Append the Detected Value to the appropriate vector
        if eps == 0 and pred_orig == y:  # Then this is a false positive
            FalsePositive.append(
                int(TotalFalse >= num_false or PositiveDetected))
        if pred_orig == pred_adv and TotalFalse >= num_false:  # Then this is a detection of an unsuccessful adv atk
            UnsuccessfulAdvAtkDetected.append(PositiveDetected)
        if pred_orig != pred_adv and pred_orig == y:  # Then this is a detection of a successful adv atk
            SuccessfulAdvAtkDetected.append(
                int(TotalFalse >= num_false or PositiveDetected))
        if eps == 0 and pred_orig != y:  # Then this is a detection of anomaly
            AnomalyDetected.append(
                int(TotalFalse >= num_false or PositiveDetected))

        # Wait for user to press a keystroke before continuing
        if (((pred_orig != pred_adv) or
             (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx:
            input("Press Enter to continue...")

        # progress print
        if j and j % print_every == 0:
            # Get ongoing stats printed out
            Accuracy = statistics.mean(IsCorrect) * 100
            Avg_Max_Delta_KL_z_adv = statistics.mean(Max_Delta_KL_z_adv)
            SD_Max_Delta_KL_z_adv = statistics.stdev(Max_Delta_KL_z_adv)
            Avg_Summed_KL_z_adv = statistics.mean(Summed_KL_z_adv)
            SD_Summed_KL_z_adv = statistics.stdev(Summed_KL_z_adv)
            print("Completed {} of {} Total Examples in MNIST Test Dataset. "
                  "Accuracy = {:.2f}, "
                  "Avg Max Delta Adversarial KL_z = {:.2f}, SD = {:.2f}, "
                  "Avg Summed Delta Adversarial KL_z = {:.2f}, SD = {:.2f}".
                  format(j, total_num, Accuracy, Avg_Max_Delta_KL_z_adv,
                         SD_Max_Delta_KL_z_adv, Avg_Summed_KL_z_adv,
                         SD_Summed_KL_z_adv))

    # After, determine stats
    Accuracy = statistics.mean(IsCorrect) * 100
    Avg_Max_Delta_KL_z_adv = statistics.mean(Max_Delta_KL_z_adv)
    SD_Max_Delta_KL_z_adv = statistics.stdev(Max_Delta_KL_z_adv)
    Avg_Summed_KL_z_adv = statistics.mean(Summed_KL_z_adv)
    SD_Summed_KL_z_adv = statistics.stdev(Summed_KL_z_adv)

    if eps == 0 and get_psi:
        KL_Summed_SD_Stats = np.zeros([10, 2])
        for i in range(10):
            KL_Summed_SD_Stats[i][0] = statistics.mean(KL_Summed_SD[i])
            KL_Summed_SD_Stats[i][1] = statistics.stdev(KL_Summed_SD[i])
        # Save file
        np.save('psis_mnist.npy', KL_Summed_SD_Stats)

    FalsePositivePercentage = None
    SuccessfulAdvAtkDetectedPercentage = None
    AnomalyDetectedPercentage = None
    if eps == 0 and len(FalsePositive) > 0:
        FalsePositivePercentage = sum(FalsePositive) / len(x_test) * 100
    if len(SuccessfulAdvAtkDetected) > 0:
        SuccessfulAdvAtkDetectedPercentage = statistics.mean(
            SuccessfulAdvAtkDetected) * 100
    if len(AnomalyDetected) > 0:
        AnomalyDetectedPercentage = statistics.mean(AnomalyDetected) * 100

    # Print out results to user
    print("Accuracy with eps = {} {} Disturbance: {:.2f}%".format(
        eps, attack, Accuracy))
    print("Percentage of Successful Adversarial Attacks: {:.2f}%".format(
        100 * len(SuccessfulAdvAtkDetected) / len(x_test)))
    print("Average Max Delta Adversarial KL_z = {:.2f}, SD = {:.2f}".format(
        Avg_Max_Delta_KL_z_adv, SD_Max_Delta_KL_z_adv))
    print("Average Summed Delta Adversarial KL_z = {:.2f}, SD = {:.2f}".format(
        Avg_Summed_KL_z_adv, SD_Summed_KL_z_adv))
    if eps == 0:
        print(
            "False Positive Percentage for Clean (eps = {}) data with KL threshold of {}: {}%"
            .format(eps, num_SD, FalsePositivePercentage))
        print(
            "Anomaly (incorrectly classified from clean img) Detected Percentage: {:.2f}%"
            .format(AnomalyDetectedPercentage))
    else:
        print("Successful Adversarial Attack Detected Percentage: {:.2f}%".
              format(SuccessfulAdvAtkDetectedPercentage))

    # Now, plot the histograms of the KL divergences of both the clean and corrupted images separately
    plt.figure(0)
    plt.hist(x=Max_Delta_KL_z_adv, bins='auto', color='#0504aa')
    plt.grid(axis='y')
    plt.xlabel('Max KL z Divergence')
    plt.ylabel('Frequency')
    plt.xlim(0, 5)
    if eps == 0:
        plt.title("Max Clean Delta using {} Model on MNIST".format(model.name))
    else:
        plt.title(
            'Max Adv. Delta using {} Model on MNIST, {} Attack, eps = {}'.
            format(model.name, attack, eps))

    plt.show()

    # Now, plot the histograms of the KL divergences of both the clean and corrupted images separately
    plt.figure(1)
    plt.hist(x=Summed_KL_z_adv, bins='auto', color='#607c8e')
    plt.grid(axis='y')
    plt.xlabel('Summed KL z Divergence')
    plt.ylabel('Frequency')
    plt.xlim(0, 35)
    if eps == 0:
        plt.title("Clean Psi using {} on MNIST".format(model.name))
    else:
        plt.title('Adv. Psi using {} on MNIST, {} Attack, eps = {}'.format(
            model.name, attack, eps))
    plt.show()

    # Save some of the examples of Adv images generated
    save_image(AdvImages[:25],
               "images/{}_attack-eps={}.png".format(attack, eps),
               nrow=5,
               normalize=True)
Beispiel #6
0
def main(_):
    # Load training and test data
    data = ld_cifar10()

    # Instantiate model, loss, and optimizer for training
    # net = CNN(in_channels=3)
    net = resnet.ResNet18()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    logging.info('Using GPU' if device == 'cuda' else 'Using CPU')
    if device == 'cuda':
        net = net.cuda()
    loss_fn = torch.nn.CrossEntropyLoss(reduction='mean')
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)

    # load checkpoint if exists
    if os.path.exists(FLAGS.checkpoint):
        ckpt = torch.load(FLAGS.checkpoint)
        net.load_state_dict(ckpt['net'])
        logging.info('Loaded model %s with accuracy %.3f', FLAGS.checkpoint,
                     ckpt['acc'])
    else:  # Train vanilla model
        if input('No checkpoint found, continue? y/[n]') != 'y':
            return -1
        net.train()
        with trange(1, FLAGS.nb_epochs + 1, desc='Training',
                    unit='Epoch') as t:
            for epoch in t:
                train_loss = 0.
                for x, y in data.train:
                    x, y = x.to(device), y.to(device)
                    if FLAGS.adv_train:
                        # Replace clean example with adversarial example for adversarial training
                        x = projected_gradient_descent(net, x, FLAGS.eps, 0.01,
                                                       40, np.inf)
                    optimizer.zero_grad()
                    loss = loss_fn(net(x), y)
                    loss.backward()
                    optimizer.step()
                    train_loss += loss.item()
                t.set_description('Train Loss=%.3f' % train_loss)

    # Evaluate on clean and adversarial data
    net.eval()
    report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0)
    for x, y in tqdm(data.test, unit='Samples', desc='Testing'):
        x, y = x.to(device), y.to(device)
        x_fgm = fast_gradient_method(net, x, FLAGS.eps, np.inf)
        x_pgd = projected_gradient_descent(net, x, FLAGS.eps, 0.01, 40, np.inf)
        _, y_pred = net(x).max(1)  # model prediction on clean examples
        _, y_pred_fgm = net(x_fgm).max(
            1)  # model prediction on FGM adversarial examples
        _, y_pred_pgd = net(x_pgd).max(
            1)  # model prediction on PGD adversarial examples
        report.nb_test += y.size(0)
        report.correct += y_pred.eq(y).sum().item()
        report.correct_fgm += y_pred_fgm.eq(y).sum().item()
        report.correct_pgd += y_pred_pgd.eq(y).sum().item()
    print('test acc on clean examples (%): {:.3f}'.format(
        report.correct / report.nb_test * 100.))
    print('test acc on FGM adversarial examples (%): {:.3f}'.format(
        report.correct_fgm / report.nb_test * 100.))
    print('test acc on PGD adversarial examples (%): {:.3f}'.format(
        report.correct_pgd / report.nb_test * 100.))

    return 0