Exemple #1
0
    def plot_manifolds(self):
        """
        Plot manifolds.
        """

        #
        # Plot all classes and adversarial examples in image space for individual classes as well as all classes.
        #

        fit = self.test_codes.shape[0]//25
        test_images = self.test_images.reshape((self.test_images.shape[0], -1))
        manifold_visualization = plot.ManifoldVisualization('tsne', pre_pca=40)
        manifold_visualization.fit(test_images[:fit])
        log('[Testing] computed t-SNE on test images')

        for n in range(self.N_class):
            labels = ['Class %d' % (nn + 1) for nn in range(self.N_class)] + ['Adversarial Examples Class %d' % (n + 1)]
            data = numpy.concatenate((
                test_images[:fit],
                self.perturbation_images[self.perturbation_codes == n]
            ))
            classes = numpy.concatenate((
                self.test_codes[:fit],
                numpy.ones((self.perturbation_images[self.perturbation_codes == n].shape[0])) * 10,
            ))
            plot_file = os.path.join(self.args.plot_directory, 'perturbations_%d' % (n + 1))
            manifold_visualization.visualize(plot_file, data, classes, labels, title='Adversarial Examples Class %d\n(The adversarial examples are projected into the embedding using learned SVRs)' % n)
            log('[Testing] wrote %s' % plot_file)

        labels = ['Class %d' % (n + 1) for n in range(self.N_class)] + ['Adversarial Examples Class %d' % (n + 1) for n in range(self.N_class)]
        data = numpy.concatenate((
            test_images[:fit],
            self.perturbation_images
        ))
        classes = numpy.concatenate((
            self.test_codes[:fit],
            self.perturbation_codes + 10,
        ))
        plot_file = os.path.join(self.args.plot_directory, 'perturbations')
        manifold_visualization.visualize(plot_file, data, classes, labels, title='Adversarial Examples\n(The adversarial examples are projected into the embedding using learned SVRs)')
        log('[Testing] wrote %s' % plot_file)
Exemple #2
0
    def compute_statistics(self):
        """
        Compute statistics based on distances.
        """

        num_attempts = self.perturbations.shape[0]

        perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        perturbations = perturbations.reshape(
            (perturbations.shape[0] * perturbations.shape[1],
             perturbations.shape[2]))
        success = numpy.swapaxes(self.success, 0, 1)
        success = success.reshape((success.shape[0] * success.shape[1]))

        probabilities = numpy.swapaxes(self.probabilities, 0, 1)
        probabilities = probabilities.reshape(
            (probabilities.shape[0] * probabilities.shape[1], -1))
        confidences = numpy.max(probabilities, 1)

        perturbation_probabilities = self.test_probabilities[:self.success.
                                                             shape[1]]
        perturbation_probabilities = numpy.repeat(perturbation_probabilities,
                                                  num_attempts,
                                                  axis=0)
        perturbation_confidences = numpy.max(perturbation_probabilities, 1)

        probability_ratios = confidences / perturbation_confidences

        raw_overall_success = success >= 0
        log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success))

        # For off-manifold attacks this should not happen, but save is save.
        if not numpy.any(raw_overall_success):
            for type in [
                    'raw_success', 'raw_iteration', 'raw_roc',
                    'raw_confidence_weighted_success', 'raw_confidence',
                    'raw_ratios'
            ]:
                self.results[type] = 0
            if self.args.results_file:
                utils.write_pickle(self.args.results_file, self.results)
                log('[Testing] wrote %s' % self.args.results_file)
            log('[Testing] no successful attacks found, no plots')
            return

        #
        # We compute some simple statistics:
        # - raw success rate: fraction of successful attack without considering epsilon
        # - corrected success rate: fraction of successful attacks within epsilon-ball
        # - raw average perturbation: average distance to original samples (for successful attacks)
        # - corrected average perturbation: average distance to original samples for perturbations
        #   within epsilon-ball (for successful attacks).
        # These statistics can also be computed per class.
        # And these statistics are computed with respect to three norms.

        if self.args.plot_directory and utils.display():
            iterations = success[raw_overall_success]
            x = numpy.arange(numpy.max(iterations) + 1)
            y = numpy.bincount(iterations)
            plot_file = os.path.join(self.args.plot_directory, 'iterations')
            plot.bar(plot_file,
                     x,
                     y,
                     title='Distribution of Iterations of Successful Attacks',
                     xlabel='Number of Iterations',
                     ylabel='Count')
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory, 'probabilities')
            plot.histogram(plot_file, confidences[raw_overall_success], 50)
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory,
                                     'probability_ratios')
            plot.histogram(plot_file, probability_ratios, 50)
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory,
                                     'test_probabilities')
            plot.histogram(
                plot_file, self.test_probabilities[
                    numpy.arange(self.test_probabilities.shape[0]),
                    self.test_codes], 50)
            log('[Testing] wrote %s' % plot_file)

        y_true = numpy.concatenate(
            (numpy.zeros(confidences.shape[0]),
             numpy.ones(perturbation_confidences.shape[0])))
        y_score = numpy.concatenate((confidences, perturbation_confidences))
        roc_auc_score = sklearn.metrics.roc_auc_score(y_true, y_score)

        self.results['raw_roc'] = roc_auc_score
        self.results['raw_confidence_weighted_success'] = numpy.sum(
            confidences[raw_overall_success]) / numpy.sum(
                perturbation_confidences)
        self.results['raw_confidence'] = numpy.mean(
            probabilities[raw_overall_success])
        self.results['raw_ratios'] = numpy.mean(
            probability_ratios[raw_overall_success])
        self.results['raw_success'] = numpy.sum(
            raw_overall_success) / success.shape[0]
        self.results['raw_iteration'] = numpy.average(
            success[raw_overall_success])

        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.classifier.training is False

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file):
            self.original_perturbations = utils.read_hdf5(self.args.perturbations_file)
            assert len(self.original_perturbations.shape) == 3, self.original_perturbations.shape
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[1] == self.original_success.shape[1]
            assert self.original_perturbations.shape[2] == self.test_theta.shape[1]

            if self.original_perturbations.shape[1] <= self.args.max_samples and self.original_perturbations.shape[0] <= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples:
                if self.original_perturbations.shape[0] == self.args.max_attempts:
                    self.test_theta = self.test_theta[self.original_perturbations.shape[1]:]
                    self.test_fonts = self.test_fonts[self.original_perturbations.shape[1]:]
                    self.test_classes = self.test_classes[self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (
                    self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples))
                elif self.original_perturbations.shape[1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (
                    self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts))

        self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_theta.shape[1]))
        self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples/batch_size))

        for i in range(num_batches):
            if i*batch_size == self.args.max_samples:
                break

            i_start = i * batch_size
            i_end = min((i + 1) * batch_size, self.args.max_samples)

            batch_fonts = self.test_fonts[i_start: i_end]
            batch_classes = self.test_classes[i_start: i_end]
            batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32)

            batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu)
            batch_inputs = common.torch.as_variable(self.test_theta[i_start: i_end], self.args.use_gpu)
            batch_code = common.torch.as_variable(batch_code, self.args.use_gpu)

            t = 0
            # This basically allows to only optimize over theta, keeping the font/class code fixed.
            self.model.decoder.set_code(batch_code)

            while True and t < self.args.max_attempts:
                attack = self.setup_attack(batch_inputs, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(objective)
                assert not numpy.any(perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                perturbations = perturbations.reshape(batch_inputs.size())  # hack for when only one dimensional latent space is used!
                self.perturbations[t][i_start: i_end] = perturbations + batch_inputs.cpu().numpy()
                self.success[t][i_start: i_end] = success
                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis)
                self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)
Exemple #4
0
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.training is False
        assert self.test_images.shape[0] == self.test_codes.shape[0], 'number of samples has to match'

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file):
            self.original_perturbations = utils.read_hdf5(self.args.perturbations_file)
            if self.test_images.shape[3] > 1:
                assert len(self.original_perturbations.shape) == 5
            else:
                assert len(self.original_perturbations.shape) == 4
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[1] == self.original_success.shape[1]
            assert self.original_perturbations.shape[2] == self.test_images.shape[1]
            assert self.original_perturbations.shape[3] == self.test_images.shape[2]#

            if self.original_perturbations.shape[1] >= self.args.max_samples and self.original_perturbations.shape[0] >= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples:
                if self.original_perturbations.shape[0] == self.args.max_attempts:
                    self.test_images = self.test_images[self.original_perturbations.shape[1]:]
                    self.test_codes = self.test_codes[self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples))
                elif self.original_perturbations.shape[1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts))

        # can't squeeze here!
        if self.test_images.shape[3] > 1:
            self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2], self.test_images.shape[3]))
        else:
            self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2]))
        self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples/batch_size))

        for i in range(num_batches):  # self.test_images.shape[0]
            if i*batch_size == self.args.max_samples:
                break
                
            i_start = i*batch_size
            i_end = min((i+1)*batch_size, self.args.max_samples)

            batch_images = common.torch.as_variable(self.test_images[i_start: i_end], self.args.use_gpu)
            batch_classes = common.torch.as_variable(numpy.array(self.test_codes[i_start: i_end]), self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            t = 0
            while t < self.args.max_attempts:
                attack = self.setup_attack(batch_images, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(objective)
                assert not numpy.any(perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                self.perturbations[t][i_start: i_end] = numpy.squeeze(numpy.transpose(perturbations + batch_images.cpu().numpy(), (0, 2, 3, 1)))
                self.success[t][i_start: i_end] = success

                # IMPORTANT: The adversarial examples are not considering whether the classifier is
                # actually correct to start with.

                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis)
                self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.training is False

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples / batch_size))

        # can't squeeze here!
        if self.test_images.shape[3] > 1:
            self.perturbations = numpy.zeros(
                (self.args.max_attempts, self.args.max_samples,
                 self.test_images.shape[1], self.test_images.shape[2],
                 self.test_images.shape[3]))
        else:
            self.perturbations = numpy.zeros(
                (self.args.max_attempts, self.args.max_samples,
                 self.test_images.shape[1], self.test_images.shape[2]))
        self.success = numpy.ones(
            (self.args.max_attempts, self.args.max_samples), dtype=int) * -1
        self.probabilities = numpy.zeros(
            (self.args.max_attempts, self.args.max_samples, self.N_class))

        for i in range(num_batches):  # self.test_images.shape[0]
            if i * batch_size == self.args.max_samples:
                break

            i_start = i * batch_size
            i_end = min((i + 1) * batch_size, self.args.max_samples)

            batch_images = numpy.random.randint(0,
                                                255,
                                                size=[batch_size] +
                                                self.test_images.shape[1:])
            batch_images = common.torch.as_variable(batch_images,
                                                    self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            batch_classes = common.torch.as_variable(
                numpy.random.randint(0,
                                     self.N_class - 1,
                                     size=(batch_images.size(0))),
                self.args.use_gpu)

            t = 0
            while t < self.args.max_attempts:
                attack = self.setup_attack(batch_images, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(
                    objective)
                assert not numpy.any(
                    perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                self.perturbations[t][i_start:i_end] = numpy.squeeze(
                    numpy.transpose(perturbations + batch_images.cpu().numpy(),
                                    (0, 2, 3, 1)))
                self.success[t][i_start:i_end] = success
                self.probabilities[t][i_start:i_end] = probabilities
                # IMPORTANT: The adversarial examples are not considering whether the classifier is
                # actually correct to start with.

                t += 1

            log('[Attack] %d: completed' % i)

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)
        utils.write_hdf5(self.args.probabilities_file, self.probabilities)
        log('[Attack] wrote %s' % self.args.probabilities_file)
Exemple #6
0
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.classifier.training is False

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(
                self.args.success_file):
            self.original_perturbations = utils.read_hdf5(
                self.args.perturbations_file)
            assert len(self.original_perturbations.shape) == 3
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[
                0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[
                1] == self.original_success.shape[1]

            if self.original_perturbations.shape[
                    1] <= self.args.max_samples and self.original_perturbations.shape[
                        0] <= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more'
                    % (self.original_perturbations.shape[0],
                       self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[
                    0] == self.args.max_attempts or self.original_perturbations.shape[
                        1] == self.args.max_samples:
                if self.original_perturbations.shape[
                        0] == self.args.max_attempts:
                    self.test_images = self.test_images[
                        self.original_perturbations.shape[1]:]
                    self.test_codes = self.test_codes[
                        self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[
                        1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations'
                        % (self.original_perturbations.shape[0],
                           self.original_perturbations.shape[1],
                           self.args.max_samples))
                elif self.original_perturbations.shape[
                        1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[
                        0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts'
                        % (self.original_perturbations.shape[0],
                           self.original_perturbations.shape[1],
                           self.args.max_attempts))

        self.perturbations = numpy.zeros(
            (self.args.max_attempts, self.args.max_samples, self.args.N_theta))
        self.success = numpy.ones(
            (self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples / batch_size))

        for i in range(num_batches):
            if i * batch_size == self.args.max_samples:
                break

            i_start = i * batch_size
            i_end = min((i + 1) * batch_size, self.args.max_samples)

            batch_classes = common.torch.as_variable(
                self.test_codes[i_start:i_end], self.args.use_gpu)
            batch_theta = common.torch.as_variable(
                numpy.zeros((i_end - i_start, self.args.N_theta),
                            dtype=numpy.float32), self.args.use_gpu)
            if self.args.N_theta > 4:
                batch_theta[:, 4] = 1
            batch_images = common.torch.as_variable(
                self.test_images[i_start:i_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            self.model.decoder.set_image(batch_images)
            #output_images = self.model.decoder.forward(batch_theta)
            #error = torch.sum(torch.abs(output_images - batch_images))
            #error = error.item()
            #print(error)
            #from matplotlib import pyplot
            #output_images = numpy.squeeze(numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1)))
            #pyplot.imshow(output_images[0])
            #pyplot.show()

            t = 0
            while True and t < self.args.max_attempts:
                attack = self.setup_attack(batch_theta, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(
                    objective)
                assert not numpy.any(
                    perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                perturbations = perturbations.reshape(batch_theta.size(
                ))  # hack for when only one dimensional latent space is used!
                self.perturbations[t][
                    i_start:i_end] = perturbations + batch_theta.cpu().detach(
                    ).numpy()
                self.success[t][i_start:i_end] = success
                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate(
                    (self.original_perturbations, self.perturbations),
                    axis=concatenate_axis)
                self.success = numpy.concatenate(
                    (self.original_success, self.success),
                    axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)