Beispiel #1
0
 def reset(self):
     self.listener_probs = self.listener_probs_lit
     self.speaker_probs = softmax(self.listener_probs,
                                  axis=0,
                                  theta=self.theta).swapaxes(0, 1)  #[s][u]
     self.cur_depth = 0
     self.running_time = 0
Beispiel #2
0
 def __init__(self,
              listener_probs_lit=None,
              listener_prior=None,
              theta=1.0,
              items=None,
              vocab=None,
              default_depth=1):
     if items is not None:
         listener_probs_lit, vocab = binary_listener_probs(items)
         vocab = vocab
         items = items
     self.vocab = vocab
     self.items = items
     self.listener_probs_lit = listener_probs_lit
     self.listener_probs = self.listener_probs_lit
     self.theta = theta
     self.speaker_probs = softmax(self.listener_probs,
                                  axis=0,
                                  theta=self.theta).swapaxes(0, 1)  #[s][u]
     self.num_states = listener_probs_lit.shape[1]
     self.num_utterances = listener_probs_lit.shape[0]
     self.default_depth = default_depth
     self.cur_depth = 0
     self.running_time = 0
     if listener_prior is None:
         self.listener_prior = np.array(
             [1.0 / self.num_states for i in range(self.num_states)])
     else:
         self.listener_prior = listener_prior
    def optimize(self, x):
        adv_img = self.perturb(x)

        inp = Variable(torch.from_numpy(self.preprocess(adv_img)).float().unsqueeze(0)).to(self.device)
        out = self.model.classifier(inp)
        prob = ut.softmax(out.data.cpu().numpy()[0])

        return prob[self.pred_orig]
Beispiel #4
0
 def forward(self, data_row):
     # Calculate the vector of outputs from the hidden layer
     mid_output = sym_sigmoid(
         np.matmul(data_row, self.mid_weights) + self.mid_bias)
     mid_output = mid_output.flatten()
     exponents = np.dot(self.out_weights.T,
                        mid_output) + self.out_bias.flatten()
     return softmax(exponents)
    def callback(self, x, convergence):
        global pred_adv, prob_adv
        adv_img = self.perturb(x)

        inp = Variable(torch.from_numpy(self.preprocess(adv_img)).float().unsqueeze(0)).to(self.device)
        out = self.model.classifier(inp)
        prob = ut.softmax(out.data.cpu().numpy()[0])

        pred_adv = np.argmax(prob)
        prob_adv = prob[pred_adv]
        if pred_adv != self.pred_orig and prob_adv >= 0.9:
            return True
        else:
            pass
Beispiel #6
0
    def forward(self, x, t):
        if self.l1 is None and self.l2 is None:
            xp = cuda.get_array_module(x)
            self.l1 = xp.array([-5.0]).astype(xp.float32)
            self.l2 = xp.array([-5.0]).astype(xp.float32)

        self.t = t
        self.y = ut.softmax(x)
        self.E = ut.softmax_cross_entropy(x, t)
        E_hat = self.E  # / (xp.exp(self.l1) + xp.exp(self.l2))

        if self.variational is True:
            self.params['l1_' + self.name] = self.l1
            self.params['l2_' + self.name] = self.l2
        return E_hat
Beispiel #7
0
 def run(self, depth=None):
     start_time = time.time()
     if depth is None:
         depth = self.default_depth
     for d in range(depth):
         #Update listener based on speaker: P_l(s | w, a) prop P_s(w | s, a)P(s)
         self.listener_probs = bayes_rule(self.listener_prior,
                                          self.speaker_probs)  #[u][s]
         #Update speaker based on listener
         self.speaker_probs = softmax(self.listener_probs,
                                      axis=0,
                                      theta=self.theta).swapaxes(0,
                                                                 1)  #[s][u]
         self.cur_depth += 1
     self.running_time += time.time() - start_time
     return self.speaker_probs
Beispiel #8
0
    def train_network(self, epochs):
        for n in range(epochs):
            for data_row, true_value in zip(self.training_data,
                                            self.training_true):
                # Create a random choice of indexes from the training data
                # batch_idx = np.random.choice(len(self.training_data), self.batch_size, replace=False)

                # Select random choice from training data and ground truth
                # data_batch = self.training_data[i:i + self.batch_size]
                # true_batch = self.training_true[i:i + self.batch_size]

                # Calculate the output of the middle layer of the network
                update = sym_sigmoid(
                    np.matmul(data_row, self.mid_weights) + self.mid_bias)
                update = update.flatten()

                # Calculate the output of the network
                output = softmax(
                    np.dot(self.out_weights.T, update.T) + self.out_bias)

                # Calculate the error for use in back propagation
                error = output - true_value

                # Update the final layer weights using gradient descent
                out_weights_grad = np.outer(update, error)
                self.out_weights -= (self.alpha * out_weights_grad)

                # Update final layer bias
                self.out_bias -= error

                # Update the hidden layer weights & bias using batch gradient descent
                sig_dev = dev_sym_sigmoid(update)

                weights_grad = np.outer(
                    data_row.T, sig_dev * np.dot(error, self.out_weights.T))
                bias_grad = np.dot(error, self.out_weights.T) * sig_dev

                # Update weights by the gradient
                self.mid_weights = self.mid_weights - self.alpha * weights_grad.astype(
                    'float64')
                self.mid_bias = self.mid_bias - self.alpha * bias_grad.astype(
                    'float64')

            if n % 1 == 0:
                # Only calculate error every 50th epoch
                print(self.calc_cross_entropy(calc_on='training'))
def solve_bandit(bandit, timesteps=1000, step_size=0.01):
    '''
    Use the gradient-bandit method to try to extact the maximum reward from a
    k-arm bandit over a certain number of timesteps.
    '''
    n_steps = 0
    average_reward = 0
    utility = np.zeros(bandit.k)
    for _ in range(timesteps):
        policy = softmax(utility)
        arm_i = choose_action(policy)
        reward = bandit.crank_arm(arm_i)
        # update utility
        utility[arm_i] += step_size * (reward - average_reward) * (1 - policy[arm_i])
        for i in range(bandit.k):
            if i != arm_i:
                utility[i] += step_size * (reward - average_reward) * policy[i]
        # update average reward
        n_steps += 1
        average_reward += 1/n_steps * (reward - average_reward)
    reward_ratio = max(average_reward / bandit.max_possible_expected_reward() * 100, 0)
    return average_reward, reward_ratio
    def pixel_attack(self, d, shape, pred_orig, img):
        self.pred_orig = pred_orig
        bounds = [(0, shape[0] - 1), (0, shape[1] - 1), (0, 255)] * d
        adv_img = img
        if d > 0:
            result = differential_evolution(self.optimize, bounds, maxiter=600, popsize=10,
                                            tol=1e-5, callback=self.callback)
            adv_img = self.perturb(result.x)
        inp = Variable(torch.from_numpy(self.preprocess(adv_img)).float().unsqueeze(0)).to(self.device)
        out = self.model.classifier(inp)
        prob = ut.softmax(out.data.cpu().numpy())

        # Compute KL Divergence between q(z | x, y) and prior N(z | mu=0, sigma=1)
        adv_y = F.softmax(out, dim=-1).float()
        qm, qv = self.model.encoder(inp, adv_y)
        kl_z_all = adv_y * ut.kl_normal(
            qm, qv, self.model.z_prior[0], self.model.z_prior[1])  # kl_z_all shape = [batch_size * y_dim]
        kl_z = torch.sum(kl_z_all)  # scalar
        fake_y = torch.topk(adv_y, 1, dim=-1)[1].item()
        prob_fake_y = prob[0][fake_y]

        return kl_z, fake_y, prob_fake_y, inp
Beispiel #11
0
 def forward_pass_num(self, X, W1, W2):
     S1 = W1.dot(X) + self.b1
     H = np.maximum(0, S1)
     S = W2.dot(H) + self.b2
     P = softmax(S)
     return H, P
Beispiel #12
0
 def compute_accuracy_sum(self, y, t):
     xp = cuda.get_array_module(y)
     return xp.sum(ut.softmax(y).argmax(axis=1) == t.argmax(axis=1), axis=0)
Beispiel #13
0
            t += 1

    str_pred = predictions[i]
    str_pred = str_pred.replace('[', '')
    str_pred = str_pred.replace(']', '')
    str_pred = str_pred.replace(' ', ',')
    str_pred = str_pred.lstrip()

    tmp = str_pred.split(',')

    t = 0
    for j in range(len(tmp)):
        if (t < 3) and (tmp[j] != ''):
            predicted_probabilities_array[i][t] = tmp[j]
            t += 1
    predicted_probabilities_array[i] = softmax(
        predicted_probabilities_array[i])

    if (distances_str[i] < 20):
        probabilities_array_071[i] = probabilities_attack_array[i]
    else:
        probabilities_array_071[i] = predicted_probabilities_array[i]

    if (distances_str[i] < 30):
        probabilities_array_072[i] = probabilities_attack_array[i]
    else:
        probabilities_array_072[i] = predicted_probabilities_array[i]

    if (distances_str[i] < 40):
        probabilities_array_075[i] = probabilities_attack_array[i]
    else:
        probabilities_array_075[i] = predicted_probabilities_array[i]
Beispiel #14
0
                                        model,
                                        batch_size=2,
                                        max_iterations=1000,
                                        confidence=0)
        inputs, targets = generate_data(data,
                                        samples=1,
                                        targeted=True,
                                        start=k,
                                        inception=False)

        adv = attack.attack(inputs, targets)

        for i in range(len(adv)):

            inp = model.model.predict(inputs[i:i + 1])
            inp = softmax(inp)
            inp_str = str(inp[:, 0]) + ',' + str(inp[:, 1]) + ',' + str(
                inp[:, 2]) + '\n'
            results_labels_probas.write(inp_str)

            adver = model.model.predict(adv[i:i + 1])
            adver = softmax(adver)
            adver_str = str(adver[:, 0]) + ',' + str(adver[:, 1]) + ',' + str(
                adver[:, 2]) + '\n'
            results_probas.write(adver_str)

            if (np.argmax(inp) != np.argmax(adver)):
                success += 1

            distortions.append(np.sum((adver - inp)**2)**.5)
Beispiel #15
0
 def forward(self, x, t):
     self.t = t
     self.y = ut.softmax(x)
     return ut.softmax_cross_entropy(x, t)
Beispiel #16
0
def evaluate_attack_mnist(model,
                          device,
                          attack,
                          eps=0,
                          norm=2,
                          num_SD=1.8,
                          num_summed_SD=0.75,
                          num_false=1,
                          num_imgs=10000,
                          print_every=100,
                          stop_idx=[],
                          CheckAll=False,
                          use_printouts=False,
                          get_psi=False):
    '''

    :param model (nn.Module): The model to be used for testing. It is expected to have the following attributes:
        .name (str): Name of the model
        .encoder (nn.Module): Neural network with callable, implemented forward function -- must expect inputs of size 784
        .classifier (nn.Module): Neural network with callable, implemented forward function -- must expect inputs of size 784
        .z_prior (torch.nn.Parameter): length-2 tuple that holds the z_prior mean(s) in [0] and z_prior variances in [1]
    :param device (str): either 'cuda' or 'cpu'
    :param attack (str): Choice of 'noise', 'fgm', or 'pixel'
    :param eps (int or float): Strength of attack. Note that pixel attacks only takes integer values
    :param norm (int or float): Either 2 or np.inf. Used only with fgm attacks
    :param num_SD (float): sigma_detect threshold value
    :param num_summed_SD (float): Sigma_detect threshold value
    :param num_false (int): Number of delta values for a given image that must exceed sigma_detect to be detected
    :param num_imgs (int): Number of images to iterate over through the test dataset
    :param print_every (int): How often to print a progress report
    :param stop_idx (list of ints): List of specific indexes within the test dataset to pause at with detailed printouts
    :param CheckAll (bool): If true, will pause at every image within the test dataset
    :param use_printouts (bool): If true, will pause at every anomalous / successful adversarial image in the dataset
    :param get_psi (bool): If true (and eps = 0), will evaluate and save psi values across the dataset
            as 'psis_minst.npy'
    '''

    # Load MNIST test dataset
    testload = torch.utils.data.DataLoader(
        datasets.MNIST("data/mnist",
                       train=False,
                       download=True,
                       transform=transforms.Compose([
                           transforms.Resize(28),
                           transforms.ToTensor(),
                           transforms.Normalize([0.5], [0.5])
                       ])))
    # x_test = testload.dataset.test_data.to(device).reshape(-1, 784).float() / 255
    x_test = testload.dataset.test_data.to(device).reshape(
        -1, 784).float()[:num_imgs] / 255
    y_test = testload.dataset.test_labels.to(device)[:num_imgs]
    print("y_test shape: {}".format(y_test.shape))
    total_num = len(x_test)
    print("Total length of x_test dataset: {}".format(total_num))

    # Load model in eval mode
    model.eval()

    # Load KL Data
    KL_Classes_Stats = np.zeros((10, 10, 2))
    if os.path.exists('deltas_mnist.npy'):
        KL_Classes_Stats = np.load('deltas_mnist.npy')
    else:
        print(
            "Warning: No deltas_mnist file to load. Make sure you run determine_deltas_mnist first!"
        )
    KL_Summed_SD_Stats = np.zeros((10, 2))
    if os.path.exists('psis_mnist.npy'):
        KL_Summed_SD_Stats = np.load('psis_mnist.npy')
    else:
        print(
            "Warning: No psis_mnist file to load. Make sure you run with get_psi=True first!"
        )

    # Create vectors to hold values
    Max_Delta_KL_z_adv = []
    Summed_KL_z_adv = []
    PredictClean = []
    ProbClean = []
    ProbAdv = []
    PredictAdv = []
    IsCorrect = []
    AdvImages = []
    SuccessfulAdvAtkDetected = []
    UnsuccessfulAdvAtkDetected = []
    FalsePositive = []  # only used for d = 0 pixels changed
    AnomalyDetected = []
    KL_Summed_SD = []
    for i in range(10):
        KL_Summed_SD.append([])

    # If running Single Pixel attack, load class
    attacker = None
    if attack == 'pixel':
        attacker = OnePixelAttack(model, device)

    for x, y, j in zip(x_test, y_test, range(len(x_test))):
        # Load single img
        orig = x.view(28, 28).cpu().numpy()
        img = orig.copy()
        shape = img.shape

        inp = Variable(x.type(Tensor)).to(device)
        prob_orig = ut.softmax(model.classifier(inp).data.cpu().numpy()[0])
        pred_orig = np.argmax(prob_orig)

        # Append to vectors
        PredictClean.append(pred_orig)
        ProbClean.append(prob_orig)

        # Run specified attack
        adv_img = None
        if eps > 0:
            if attack == 'fgm':
                adv_img = fast_gradient_method(model.classifier,
                                               x,
                                               eps=eps,
                                               norm=norm,
                                               clip_min=0,
                                               clip_max=1).view(1, -1)
            elif attack == 'noise':
                adv_img = noise(x, eps=eps, clip_min=0, clip_max=1).view(1, -1)
            elif attack == 'pixel':
                _, _, _, adv_img = attacker.pixel_attack(
                    eps, shape, pred_orig, img)
            else:
                raise AssertionError(
                    "Attack must either be 'fgm', 'pixel', or 'noise'")
        else:
            adv_img = x.view(1, -1)
        adv_out = model.classifier(adv_img)
        prob = ut.softmax(adv_out.data.cpu().numpy())
        adv_y = F.softmax(adv_out, dim=-1).float()
        pred_adv = torch.topk(adv_y, 1, dim=-1)[1].item()
        prob_adv = prob[0][pred_adv]

        # Append to vectors
        PredictAdv.append(pred_adv)
        ProbAdv.append(prob_adv)
        AdvImages.append(adv_img.view(1, 28, 28).data)

        # Append to accuracy vector
        IsCorrect.append(int(pred_adv == y))

        #### Test KL z div for all images ####

        # Display adv image only if certain conditions are met
        if (((pred_orig != pred_adv) or
             (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx:
            fig1 = plt.imshow(adv_img.view(28, 28).cpu().data)
            fig1.axes.get_xaxis().set_visible(False)
            fig1.axes.get_yaxis().set_visible(False)
            plt.title('{} Attack, eps = {}, Adv Prediction: {}'.format(
                attack, eps, pred_adv))
            plt.show()
            fig2 = plt.imshow(x.view(28, 28).cpu().data)
            fig2.axes.get_xaxis().set_visible(False)
            fig2.axes.get_yaxis().set_visible(False)
            plt.title('Clean Image Prediction: {}'.format(pred_orig))
            plt.show()
        if (((pred_orig != pred_adv) or
             (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx:
            print(
                "Test Image i = {}: Original prediction: {}, Adversarially-induced prediction: {}, True Label = {}"
                .format(j, pred_orig, pred_adv, y))
        KL_local = []

        # Calculate KL div for "expected" (clean or adversarially-induced) label
        y_prob = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0,
                               0]).float().view(1, -1).to(device)
        y_prob[0][pred_adv] = 1
        qm, qv = model.encoder(adv_img.view(1, -1), y_prob)
        kl_z_all = y_prob * ut.kl_normal(
            qm, qv, model.z_prior[0],
            model.z_prior[1])  # kl_z_all shape = [batch_size * y_dim]
        expected_kl_z = torch.sum(kl_z_all)
        TotalFalse = 0
        Num_SD_Away_Total = 0
        Max_Adv_KL = 0
        for i in range(10):
            y_prob = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0,
                                   0]).float().view(1, -1).to(device)
            y_prob[0][i] = 1
            #y_logprob = F.log_softmax(out, dim=1).float()
            qm, qv = model.encoder(adv_img.view(1, -1), y_prob)
            kl_z_all = y_prob * ut.kl_normal(
                qm, qv, model.z_prior[0],
                model.z_prior[1])  # kl_z_all shape = [batch_size * y_dim]
            kl_z = torch.sum(kl_z_all)
            KL_local.append(kl_z.item())
            if KL_Classes_Stats[pred_adv][i][1] > 0:
                Num_SD_Away = (abs(kl_z - expected_kl_z -
                                   KL_Classes_Stats[pred_adv][i][0]) /
                               KL_Classes_Stats[pred_adv][i][1]).item()
            else:
                Num_SD_Away = 0
            if Num_SD_Away > Max_Adv_KL:
                Max_Adv_KL = Num_SD_Away
            Num_SD_Away_Total = Num_SD_Away_Total + Num_SD_Away
            reasonable = True if Num_SD_Away <= num_SD else False
            if not reasonable:
                TotalFalse = TotalFalse + 1
            if (((pred_orig != pred_adv) or (pred_orig != y) or CheckAll)
                    and use_printouts) or j in stop_idx:
                print(
                    "delta KL_div for y = {}: {:.2f}, Expected delta KL_div: {:.2f}, SD: {:.2f}, Num SD Away: {:.2f}, Reasonable (within {} SD): {}"
                    .format(i, kl_z - expected_kl_z,
                            KL_Classes_Stats[pred_adv][i][0],
                            KL_Classes_Stats[pred_adv][i][1], Num_SD_Away,
                            num_SD, reasonable))

        PositiveDetected = 1 if (
            Num_SD_Away_Total - KL_Summed_SD_Stats[pred_adv][0]
        ) / KL_Summed_SD_Stats[pred_adv][1] > num_summed_SD else 0

        if (pred_orig != pred_adv) or (eps == 0 and pred_orig != y):
            Max_Delta_KL_z_adv.append(Max_Adv_KL)
            Summed_KL_z_adv.append(Num_SD_Away_Total)

        if eps == 0 and get_psi:
            KL_Summed_SD[y].append(Num_SD_Away_Total.item())

        if (((pred_orig != pred_adv) or
             (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx:
            print(
                "Summed SDs across classes: {:.2f}".format(Num_SD_Away_Total))
            print("Mean, SD for Summed SDs: {}".format(
                KL_Summed_SD_Stats[pred_adv]))
            print(
                "Detected: {}, PositiveDetected: {}, Detected as anomaly: {}".
                format(TotalFalse >= num_false, PositiveDetected,
                       bool(TotalFalse >= num_false or PositiveDetected)))

        # Append the Detected Value to the appropriate vector
        if eps == 0 and pred_orig == y:  # Then this is a false positive
            FalsePositive.append(
                int(TotalFalse >= num_false or PositiveDetected))
        if pred_orig == pred_adv and TotalFalse >= num_false:  # Then this is a detection of an unsuccessful adv atk
            UnsuccessfulAdvAtkDetected.append(PositiveDetected)
        if pred_orig != pred_adv and pred_orig == y:  # Then this is a detection of a successful adv atk
            SuccessfulAdvAtkDetected.append(
                int(TotalFalse >= num_false or PositiveDetected))
        if eps == 0 and pred_orig != y:  # Then this is a detection of anomaly
            AnomalyDetected.append(
                int(TotalFalse >= num_false or PositiveDetected))

        # Wait for user to press a keystroke before continuing
        if (((pred_orig != pred_adv) or
             (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx:
            input("Press Enter to continue...")

        # progress print
        if j and j % print_every == 0:
            # Get ongoing stats printed out
            Accuracy = statistics.mean(IsCorrect) * 100
            Avg_Max_Delta_KL_z_adv = statistics.mean(Max_Delta_KL_z_adv)
            SD_Max_Delta_KL_z_adv = statistics.stdev(Max_Delta_KL_z_adv)
            Avg_Summed_KL_z_adv = statistics.mean(Summed_KL_z_adv)
            SD_Summed_KL_z_adv = statistics.stdev(Summed_KL_z_adv)
            print("Completed {} of {} Total Examples in MNIST Test Dataset. "
                  "Accuracy = {:.2f}, "
                  "Avg Max Delta Adversarial KL_z = {:.2f}, SD = {:.2f}, "
                  "Avg Summed Delta Adversarial KL_z = {:.2f}, SD = {:.2f}".
                  format(j, total_num, Accuracy, Avg_Max_Delta_KL_z_adv,
                         SD_Max_Delta_KL_z_adv, Avg_Summed_KL_z_adv,
                         SD_Summed_KL_z_adv))

    # After, determine stats
    Accuracy = statistics.mean(IsCorrect) * 100
    Avg_Max_Delta_KL_z_adv = statistics.mean(Max_Delta_KL_z_adv)
    SD_Max_Delta_KL_z_adv = statistics.stdev(Max_Delta_KL_z_adv)
    Avg_Summed_KL_z_adv = statistics.mean(Summed_KL_z_adv)
    SD_Summed_KL_z_adv = statistics.stdev(Summed_KL_z_adv)

    if eps == 0 and get_psi:
        KL_Summed_SD_Stats = np.zeros([10, 2])
        for i in range(10):
            KL_Summed_SD_Stats[i][0] = statistics.mean(KL_Summed_SD[i])
            KL_Summed_SD_Stats[i][1] = statistics.stdev(KL_Summed_SD[i])
        # Save file
        np.save('psis_mnist.npy', KL_Summed_SD_Stats)

    FalsePositivePercentage = None
    SuccessfulAdvAtkDetectedPercentage = None
    AnomalyDetectedPercentage = None
    if eps == 0 and len(FalsePositive) > 0:
        FalsePositivePercentage = sum(FalsePositive) / len(x_test) * 100
    if len(SuccessfulAdvAtkDetected) > 0:
        SuccessfulAdvAtkDetectedPercentage = statistics.mean(
            SuccessfulAdvAtkDetected) * 100
    if len(AnomalyDetected) > 0:
        AnomalyDetectedPercentage = statistics.mean(AnomalyDetected) * 100

    # Print out results to user
    print("Accuracy with eps = {} {} Disturbance: {:.2f}%".format(
        eps, attack, Accuracy))
    print("Percentage of Successful Adversarial Attacks: {:.2f}%".format(
        100 * len(SuccessfulAdvAtkDetected) / len(x_test)))
    print("Average Max Delta Adversarial KL_z = {:.2f}, SD = {:.2f}".format(
        Avg_Max_Delta_KL_z_adv, SD_Max_Delta_KL_z_adv))
    print("Average Summed Delta Adversarial KL_z = {:.2f}, SD = {:.2f}".format(
        Avg_Summed_KL_z_adv, SD_Summed_KL_z_adv))
    if eps == 0:
        print(
            "False Positive Percentage for Clean (eps = {}) data with KL threshold of {}: {}%"
            .format(eps, num_SD, FalsePositivePercentage))
        print(
            "Anomaly (incorrectly classified from clean img) Detected Percentage: {:.2f}%"
            .format(AnomalyDetectedPercentage))
    else:
        print("Successful Adversarial Attack Detected Percentage: {:.2f}%".
              format(SuccessfulAdvAtkDetectedPercentage))

    # Now, plot the histograms of the KL divergences of both the clean and corrupted images separately
    plt.figure(0)
    plt.hist(x=Max_Delta_KL_z_adv, bins='auto', color='#0504aa')
    plt.grid(axis='y')
    plt.xlabel('Max KL z Divergence')
    plt.ylabel('Frequency')
    plt.xlim(0, 5)
    if eps == 0:
        plt.title("Max Clean Delta using {} Model on MNIST".format(model.name))
    else:
        plt.title(
            'Max Adv. Delta using {} Model on MNIST, {} Attack, eps = {}'.
            format(model.name, attack, eps))

    plt.show()

    # Now, plot the histograms of the KL divergences of both the clean and corrupted images separately
    plt.figure(1)
    plt.hist(x=Summed_KL_z_adv, bins='auto', color='#607c8e')
    plt.grid(axis='y')
    plt.xlabel('Summed KL z Divergence')
    plt.ylabel('Frequency')
    plt.xlim(0, 35)
    if eps == 0:
        plt.title("Clean Psi using {} on MNIST".format(model.name))
    else:
        plt.title('Adv. Psi using {} on MNIST, {} Attack, eps = {}'.format(
            model.name, attack, eps))
    plt.show()

    # Save some of the examples of Adv images generated
    save_image(AdvImages[:25],
               "images/{}_attack-eps={}.png".format(attack, eps),
               nrow=5,
               normalize=True)
Beispiel #17
0
 def forward_pass(self, X):
     S1 = self.W1.dot(X) + self.b1
     H = np.maximum(0, S1)
     S = self.W2.dot(H) + self.b2
     P = softmax(S)
     return H, P