예제 #1
0
    def test(self):
        """
        Test the model.
        """

        assert self.encoder is not None and self.decoder is not None

        self.encoder.eval()
        log('[Testing] set encoder to eval')
        self.decoder.eval()
        log('[Testing] set decoder to eval')

        if self.args.train_theta_file or self.train_reconstruction_file:
            self.test_train()
        self.test_test()
        if self.args.random_file:
            self.test_random()
        if self.args.interpolation_file:
            self.test_interpolation()

        self.results = {
            'reconstruction_error': self.reconstruction_error,
            'code_mean': self.code_mean,
            'code_var': self.code_var,
        }
        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
    def loop(self):
        """
        Main loop for training and testing, saving ...
        """

        while self.epoch < self.args.epochs:
            log('[Training] %s' % self.scheduler.report())

            # Note that we test first, to also get the error of the untrained model.
            testing = elapsed(functools.partial(self.test))
            training = elapsed(functools.partial(self.train))
            log('[Training] %gs training, %gs testing' % (training, testing))

            if self.args.early_stopping:
                validation = elapsed(functools.partial(self.validate))
                log('[Training] %gs validation' % validation)

            # Save model checkpoint after each epoch.
            utils.remove(self.args.state_file + '.%d' % (self.epoch - 1))
            State.checkpoint(self.model, self.scheduler.optimizer, self.epoch,
                             self.args.state_file + '.%d' % self.epoch)
            log('[Training] %d: checkpoint' % self.epoch)
            torch.cuda.empty_cache()  # necessary?

            # Save statistics and plots.
            if self.args.training_file:
                utils.write_hdf5(self.args.training_file,
                                 self.train_statistics)
                log('[Training] %d: wrote %s' %
                    (self.epoch, self.args.training_file))
            if self.args.testing_file:
                utils.write_hdf5(self.args.testing_file, self.test_statistics)
                log('[Training] %d: wrote %s' %
                    (self.epoch, self.args.testing_file))

            if utils.display():
                self.plot()
            self.epoch += 1  # !

        # Final testing.
        testing = elapsed(functools.partial(self.test))
        log('[Training] %gs testing' % (testing))

        # Save model checkpoint after each epoch.
        utils.remove(self.args.state_file + '.%d' % (self.epoch - 1))
        State.checkpoint(self.model, self.scheduler.optimizer, self.epoch,
                         self.args.state_file)
        log('[Training] %d: checkpoint' % self.epoch)

        self.results = {
            'training_statistics': self.train_statistics,
            'testing_statistics': self.test_statistics,
        }
        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Training] wrote %s' % self.args.results_file)
예제 #3
0
    def compute_statistics(self):
        """
        Compute statistics based on distances.
        """

        num_attempts = self.perturbations.shape[0]

        perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        perturbations = perturbations.reshape(
            (perturbations.shape[0] * perturbations.shape[1],
             perturbations.shape[2]))
        success = numpy.swapaxes(self.success, 0, 1)
        success = success.reshape((success.shape[0] * success.shape[1]))

        probabilities = numpy.swapaxes(self.probabilities, 0, 1)
        probabilities = probabilities.reshape(
            (probabilities.shape[0] * probabilities.shape[1], -1))
        confidences = numpy.max(probabilities, 1)

        perturbation_probabilities = self.test_probabilities[:self.success.
                                                             shape[1]]
        perturbation_probabilities = numpy.repeat(perturbation_probabilities,
                                                  num_attempts,
                                                  axis=0)
        perturbation_confidences = numpy.max(perturbation_probabilities, 1)

        probability_ratios = confidences / perturbation_confidences

        raw_overall_success = success >= 0
        log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success))

        # For off-manifold attacks this should not happen, but save is save.
        if not numpy.any(raw_overall_success):
            for type in [
                    'raw_success', 'raw_iteration', 'raw_roc',
                    'raw_confidence_weighted_success', 'raw_confidence',
                    'raw_ratios'
            ]:
                self.results[type] = 0
            if self.args.results_file:
                utils.write_pickle(self.args.results_file, self.results)
                log('[Testing] wrote %s' % self.args.results_file)
            log('[Testing] no successful attacks found, no plots')
            return

        #
        # We compute some simple statistics:
        # - raw success rate: fraction of successful attack without considering epsilon
        # - corrected success rate: fraction of successful attacks within epsilon-ball
        # - raw average perturbation: average distance to original samples (for successful attacks)
        # - corrected average perturbation: average distance to original samples for perturbations
        #   within epsilon-ball (for successful attacks).
        # These statistics can also be computed per class.
        # And these statistics are computed with respect to three norms.

        if self.args.plot_directory and utils.display():
            iterations = success[raw_overall_success]
            x = numpy.arange(numpy.max(iterations) + 1)
            y = numpy.bincount(iterations)
            plot_file = os.path.join(self.args.plot_directory, 'iterations')
            plot.bar(plot_file,
                     x,
                     y,
                     title='Distribution of Iterations of Successful Attacks',
                     xlabel='Number of Iterations',
                     ylabel='Count')
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory, 'probabilities')
            plot.histogram(plot_file, confidences[raw_overall_success], 50)
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory,
                                     'probability_ratios')
            plot.histogram(plot_file, probability_ratios, 50)
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory,
                                     'test_probabilities')
            plot.histogram(
                plot_file, self.test_probabilities[
                    numpy.arange(self.test_probabilities.shape[0]),
                    self.test_codes], 50)
            log('[Testing] wrote %s' % plot_file)

        y_true = numpy.concatenate(
            (numpy.zeros(confidences.shape[0]),
             numpy.ones(perturbation_confidences.shape[0])))
        y_score = numpy.concatenate((confidences, perturbation_confidences))
        roc_auc_score = sklearn.metrics.roc_auc_score(y_true, y_score)

        self.results['raw_roc'] = roc_auc_score
        self.results['raw_confidence_weighted_success'] = numpy.sum(
            confidences[raw_overall_success]) / numpy.sum(
                perturbation_confidences)
        self.results['raw_confidence'] = numpy.mean(
            probabilities[raw_overall_success])
        self.results['raw_ratios'] = numpy.mean(
            probability_ratios[raw_overall_success])
        self.results['raw_success'] = numpy.sum(
            raw_overall_success) / success.shape[0]
        self.results['raw_iteration'] = numpy.average(
            success[raw_overall_success])

        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
예제 #4
0
    def compute_statistics(self):
        """
        Compute statistics based on distances.
        """

        # That's the basis for all computation as we only want to consider successful attacks
        # on test samples that were correctly classified.
        raw_overall_success = numpy.logical_and(self.success >= 0, self.accuracy)

        # Important check, for on-manifold attack this will happen if the manifold is small and the model very accurate!
        if not numpy.any(raw_overall_success):
            for n in range(len(self.norms)):
                for type in ['raw_success', 'raw_iteration', 'raw_average', 'raw_image']:
                    self.results[n][type] = 0
                for type in ['raw_class_success', 'raw_class_average', 'raw_class_image']:
                    self.results[n][type] = numpy.zeros((self.N_class))
            if self.args.results_file:
                utils.write_pickle(self.args.results_file, self.results)
                log('[Testing] wrote %s' % self.args.results_file)
            return

        #
        # Compute nearest neighbor statistics in image space.
        #

        if self.args.plot_directory and self.args.plot_manifolds and utils.display():
            log('[Testing] computing nearest neighbor ...')
            nearest_neighbors_indices = self.compute_nearest_neighbors(self.perturbation_images[raw_overall_success])
            pure_perturbations = self.test_images[raw_overall_success] - self.perturbation_images[raw_overall_success]
            pure_perturbations_norm = numpy.linalg.norm(pure_perturbations, ord=2, axis=1)
            for k in range(10):
                direction = self.perturbation_images[raw_overall_success] - self.train_images[nearest_neighbors_indices[:, k]]
                direction_norm = numpy.linalg.norm(direction, ord=2, axis=1)
                dot_products = numpy.einsum('ij,ij->i', direction, pure_perturbations)
                dot_product_norms = numpy.multiply(pure_perturbations_norm, direction_norm)
                dot_products, dot_product_norms = dot_products[dot_product_norms > 10**-8], dot_product_norms[dot_product_norms > 10**-8]
                dot_products /= dot_product_norms
                dot_products = numpy.degrees(numpy.arccos(dot_products))

                # matplotlib's hsitogram plots give weird error if there are NaN values, so simple check:
                if dot_products.shape[0] > 0 and not numpy.any(dot_products != dot_products):
                    plot_file = os.path.join(self.args.plot_directory, 'dot_products_nn%d' % k)
                    plot.histogram(plot_file, dot_products, 100, xmin=numpy.min(dot_products), xmax=numpy.max(dot_products),
                                  title='Dot Products Between Adversarial Perturbations and Direction to Nearest Neighbor %d' % k,
                                  xlabel='Dot Product', ylabel='Count')
                    log('[Testing] wrote %s' % plot_file)

        #
        # We compute some simple statistics:
        # - raw success rate: fraction of successful attack without considering epsilon
        # - corrected success rate: fraction of successful attacks within epsilon-ball
        # - raw average perturbation: average distance to original samples (for successful attacks)
        # - corrected average perturbation: average distance to original samples for perturbations
        #   within epsilon-ball (for successful attacks).
        # These statistics can also be computed per class.
        # And these statistics are computed with respect to three norms.

        if self.args.plot_directory and utils.display():
            iterations = self.success[raw_overall_success]
            x = numpy.arange(numpy.max(iterations) + 1)
            y = numpy.bincount(iterations)
            plot_file = os.path.join(self.args.plot_directory, 'iterations')
            plot.bar(plot_file, x, y,
                    title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count')
            log('[Testing] wrote %s' % plot_file)

        reference_perturbations = numpy.zeros(self.perturbations.shape)
        if self.args.N_theta > 4:
            reference_perturbations[:, 4] = 1

        for n in range(len(self.norms)):
            norm = self.norms[n]
            delta = numpy.linalg.norm(self.perturbations - reference_perturbations, norm, axis=1)
            image_delta = numpy.linalg.norm(self.test_images - self.perturbation_images, norm, axis=1)

            if self.args.plot_directory and utils.display():
                plot_file = os.path.join(self.args.plot_directory, 'distances_l%g' % norm)
                plot.histogram(plot_file, delta[raw_overall_success], 50, title='Distribution of $L_{%g}$ Distances of Successful Attacks' % norm,
                              xlabel='Distance', ylabel='Count')
                log('[Testing] wrote %s' % plot_file)

            debug_accuracy = numpy.sum(self.accuracy) / self.accuracy.shape[0]
            debug_attack_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.success >= 0)
            debug_test_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.accuracy)
            log('[Testing] attacked mode accuracy: %g' % debug_accuracy)
            log('[Testing] only %g of successful attacks are valid' % debug_attack_fraction)
            log('[Testing] only %g of correct samples are successfully attacked' % debug_test_fraction)

            N_accuracy = numpy.sum(self.accuracy)
            self.results[n]['raw_success'] = numpy.sum(raw_overall_success) / N_accuracy

            self.results[n]['raw_iteration'] = numpy.average(self.success[raw_overall_success])

            self.results[n]['raw_average'] = numpy.average(delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0

            self.results[n]['raw_image'] = numpy.average(image_delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0

            raw_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool)
            corrected_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool)

            self.results[n]['raw_class_success'] = numpy.zeros((self.N_class))

            self.results[n]['raw_class_average'] = numpy.zeros((self.N_class))

            self.results[n]['raw_class_image'] = numpy.zeros((self.N_class))

            for c in range(self.N_class):
                N_samples = numpy.sum(self.accuracy[self.perturbation_codes == c].astype(int))
                if N_samples <= 0:
                    continue;

                raw_class_success[c] = numpy.logical_and(raw_overall_success, self.perturbation_codes == c)

                self.results[n]['raw_class_success'][c] = numpy.sum(raw_class_success[c]) / N_samples

                if numpy.any(raw_class_success[c]):
                    self.results[n]['raw_class_average'][c] = numpy.average(delta[raw_class_success[c].astype(bool)])
                if numpy.any(corrected_class_success[c]):
                    self.results[n]['raw_class_image'][c] = numpy.average(image_delta[raw_class_success[c].astype(bool)])

        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
    def loop(self):
        """
        Main loop for training and testing, saving ...
        """

        auto_encoder_params = {
            'lr': self.args.base_lr,
            'lr_decay': self.args.base_lr_decay,
            'lr_min': 0.000000001,
            'weight_decay': self.args.weight_decay
        }

        classifier_params = {
            'lr': self.args.base_lr,
            'lr_decay': self.args.base_lr_decay,
            'lr_min': 0.000000001,
            'weight_decay': self.args.weight_decay
        }

        e = 0
        if os.path.exists(self.args.encoder_file) and os.path.exists(
                self.args.decoder_file) and os.path.exists(
                    self.args.classifier_file):
            state = State.load(self.args.encoder_file)
            log('[Training] loaded %s' % self.args.encoder_file)
            self.encoder.load_state_dict(state.model)
            log('[Training] loaded encoder')

            if self.args.use_gpu and not cuda.is_cuda(self.encoder):
                self.encoder = self.encoder.cuda()

            optimizer = torch.optim.Adam(list(self.encoder.parameters()),
                                         auto_encoder_params['lr'])
            optimizer.load_state_dict(state.optimizer)
            self.encoder_scheduler = ADAMScheduler(optimizer,
                                                   **auto_encoder_params)

            state = State.load(self.args.decoder_file)
            log('[Training] loaded %s' % self.args.decoder_file)
            self.decoder.load_state_dict(state.model)
            log('[Training] loaded decoder')

            if self.args.use_gpu and not cuda.is_cuda(self.decoder):
                self.decoder = self.decoder.cuda()

            optimizer = torch.optim.Adam(list(self.decoder.parameters()),
                                         auto_encoder_params['lr'])
            optimizer.load_state_dict(state.optimizer)
            self.decoder_scheduler = ADAMScheduler(optimizer,
                                                   **auto_encoder_params)

            state = State.load(self.args.classifier_file)
            log('[Training] loaded %s' % self.args.classifier_file)
            self.classifier.load_state_dict(state.model)
            log('[Training] loaded decoder')

            if self.args.use_gpu and not cuda.is_cuda(self.classifier):
                self.classifier = self.classifier.cuda()

            optimizer = torch.optim.Adam(list(self.classifier.parameters()),
                                         classifier_params['lr'])
            optimizer.load_state_dict(state.optimizer)
            self.classifier_scheduler = ADAMScheduler(optimizer,
                                                      **classifier_params)

            e = state.epoch + 1
            self.encoder_scheduler.update(e)
            self.decoder_scheduler.udpate(e)
            self.classifier_scheduler.update(e)
        else:
            if self.args.use_gpu and not cuda.is_cuda(self.encoder):
                self.encoder = self.encoder.cuda()
            if self.args.use_gpu and not cuda.is_cuda(self.decoder):
                self.decoder = self.decoder.cuda()
            if self.args.use_gpu and not cuda.is_cuda(self.classifier):
                self.classifier = self.classifier.cuda()

            self.encoder_scheduler = ADAMScheduler(
                list(self.encoder.parameters()), **auto_encoder_params)
            self.encoder_scheduler.initialize()  # !

            self.decoder_scheduler = ADAMScheduler(
                list(self.decoder.parameters()), **auto_encoder_params)
            self.decoder_scheduler.initialize()  # !

            self.classifier_scheduler = ADAMScheduler(
                list(self.classifier.parameters()), **classifier_params)
            self.classifier_scheduler.initialize()  # !

        log('[Training] model needs %gMiB' %
            (cuda.estimate_size(self.encoder) / (1024 * 1024)))

        while e < self.args.epochs:
            log('[Training] %s' % self.encoder_scheduler.report())
            log('[Training] %s' % self.decoder_scheduler.report())
            log('[Training] %s' % self.classifier_scheduler.report())

            testing = elapsed(functools.partial(self.test, e))
            training = elapsed(functools.partial(self.train, e))
            log('[Training] %gs training, %gs testing' % (training, testing))

            #utils.remove(self.args.encoder_file + '.%d' % (e - 1))
            #utils.remove(self.args.decoder_file + '.%d' % (e - 1))
            #utils.remove(self.args.classifier_file + '.%d' % (e - 1))
            State.checkpoint(self.encoder, self.encoder_scheduler.optimizer, e,
                             self.args.encoder_file + '.%d' % e)
            State.checkpoint(self.decoder, self.decoder_scheduler.optimizer, e,
                             self.args.decoder_file + '.%d' % e)
            State.checkpoint(self.classifier,
                             self.classifier_scheduler.optimizer, e,
                             self.args.classifier_file + '.%d' % e)

            log('[Training] %d: checkpoint' % e)
            torch.cuda.empty_cache()  # necessary?

            # Save statistics and plots.
            if self.args.training_file:
                utils.write_hdf5(self.args.training_file,
                                 self.train_statistics)
                log('[Training] %d: wrote %s' % (e, self.args.training_file))
            if self.args.testing_file:
                utils.write_hdf5(self.args.testing_file, self.test_statistics)
                log('[Training] %d: wrote %s' % (e, self.args.testing_file))

            #if utils.display():
            #    self.plot()

            e += 1  # !

        testing = elapsed(functools.partial(self.test, e))
        log('[Training] %gs testing' % (testing))

        #utils.remove(self.args.encoder_file + '.%d' % (e - 1))
        #utils.remove(self.args.decoder_file + '.%d' % (e - 1))
        #utils.remove(self.args.classifier_file + '.%d' % (e - 1))
        State.checkpoint(self.encoder, self.encoder_scheduler.optimizer, e,
                         self.args.encoder_file)
        State.checkpoint(self.decoder, self.decoder_scheduler.optimizer, e,
                         self.args.decoder_file)
        State.checkpoint(self.classifier, self.classifier_scheduler.optimizer,
                         e, self.args.classifier_file)

        self.results = {
            'training_statistics': self.train_statistics,
            'testing_statistics': self.test_statistics,
        }
        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Training] wrote %s' % self.args.results_file)
예제 #6
0
    def compute_latent_statistics(self):
        """
        Compute latent statistics.
        """

        N_class = numpy.max(self.test_codes) + 1
        num_attempts = self.perturbations.shape[0]

        perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        perturbations = perturbations.reshape(
            (perturbations.shape[0] * perturbations.shape[1],
             perturbations.shape[2]))
        success = numpy.swapaxes(self.success, 0, 1)
        success = success.reshape((success.shape[0] * success.shape[1]))

        accuracy = numpy.repeat(self.accuracy, num_attempts, axis=0)
        # Raw success is the base for all statistics, as we need to consider only these
        # attacks that are successful and where the classifier originally was correct.
        raw_overall_success = numpy.logical_and(success >= 0, accuracy)

        # For off-manifold attacks this should not happen, but save is save.
        if not numpy.any(raw_overall_success):
            for n in range(len(self.norms)):
                for type in [
                        'raw_success', 'raw_iteration', 'raw_average',
                        'raw_latent'
                ]:
                    self.results[n][type] = 0
                for type in [
                        'raw_class_success', 'raw_class_average',
                        'raw_class_latent'
                ]:
                    self.results[n][type] = numpy.zeros((N_class))
            if self.args.results_file:
                utils.write_pickle(self.args.results_file, self.results)
                log('[Testing] wrote %s' % self.args.results_file)
            log('[Testing] no successful attacks found, no plots')
            return

        perturbation_images = numpy.repeat(self.perturbation_images,
                                           num_attempts,
                                           axis=0)
        perturbation_codes = numpy.repeat(self.perturbation_codes,
                                          num_attempts,
                                          axis=0)

        #
        # Compute nearest neighbors for perturbations and test images,
        # to backproject them into the latent space.
        # Also compute the dot product betweenm perturbations and a local
        # plane approximation base don the three nearest neighbors.
        #

        log('[Testing] computing nearest neighbor ...')
        nearest_neighbors_indices = self.compute_nearest_neighbors(
            perturbation_images)
        nearest_neighbors = self.train_theta[nearest_neighbors_indices[:, 0]]
        perturbation_nearest_neighbor_indices = self.compute_nearest_neighbors(
            perturbations)
        perturbation_nearest_neighbor = self.train_theta[
            perturbation_nearest_neighbor_indices[:, 0]]

        # Compute statistics over the perturbation with respect to the plane
        # defined by the three nearest neighbors of the corresponding test sample.
        if self.args.plot_directory and self.args.plot_manifolds and utils.display(
        ):
            pure_perturbations = perturbations[
                raw_overall_success] - perturbation_images[raw_overall_success]
            pure_perturbations_norm = numpy.linalg.norm(pure_perturbations,
                                                        ord=2,
                                                        axis=1)
            for k in range(10):
                direction = perturbation_images[
                    raw_overall_success] - self.train_images[
                        nearest_neighbors_indices[:, k][raw_overall_success]]
                direction_norm = numpy.linalg.norm(direction, ord=2, axis=1)
                dot_products = numpy.einsum('ij,ij->i', direction,
                                            pure_perturbations)
                dot_product_norms = numpy.multiply(pure_perturbations_norm,
                                                   direction_norm)
                dot_product_norms[dot_product_norms == 0] = 1
                dot_products /= dot_product_norms
                dot_products = numpy.degrees(numpy.arccos(dot_products))

                # matplotlib's hsitogram plots give weird error if there are NaN values, so simple check:
                if dot_products.shape[0] > 0 and not numpy.any(
                        dot_products != dot_products):
                    plot_file = os.path.join(self.args.plot_directory,
                                             'dot_products_nn%d' % k)
                    plot.histogram(
                        plot_file,
                        dot_products,
                        100,
                        title=
                        'Dot Products Between Adversarial Perturbations and Direction to Nearest Neighbor %d'
                        % k,
                        xlabel='Dot Product (Between Normalized Vectors)',
                        ylabel='Count')
                    log('[Testing] wrote %s' % plot_file)

        #
        # We compute some simple statistics:
        # - raw success rate: fraction of successful attack without considering epsilon
        # - corrected success rate: fraction of successful attacks within epsilon-ball
        # - raw average perturbation: average distance to original samples (for successful attacks)
        # - corrected average perturbation: average distance to original samples for perturbations
        #   within epsilon-ball (for successful attacks).
        # These statistics can also be computed per class.
        # And these statistics are computed with respect to three norms.

        if self.args.plot_directory and utils.display():
            iterations = success[raw_overall_success]
            x = numpy.arange(numpy.max(iterations) + 1)
            y = numpy.bincount(iterations)
            plot_file = os.path.join(self.args.plot_directory, 'iterations')
            plot.bar(plot_file,
                     x,
                     y,
                     title='Distribution of Iterations of Successful Attacks',
                     xlabel='Number of Iterations',
                     ylabel='Count')
            log('[Testing] wrote %s' % plot_file)

        for n in range(len(self.norms)):
            norm = self.norms[n]
            delta = numpy.linalg.norm(perturbation_images - perturbations,
                                      norm,
                                      axis=1)
            latent_delta = numpy.linalg.norm(nearest_neighbors -
                                             perturbation_nearest_neighbor,
                                             norm,
                                             axis=1)

            if self.args.plot_directory and utils.display():
                plot_file = os.path.join(self.args.plot_directory,
                                         'distances_l%g' % norm)
                plot.histogram(
                    plot_file,
                    delta[raw_overall_success],
                    50,
                    title=
                    'Distribution of $L_{%g}$ Distances of Successful Attacks'
                    % norm,
                    xlabel='Distance',
                    ylabel='Count')
                log('[Testing] wrote %s' % plot_file)

            #debug_accuracy = numpy.sum(accuracy) / accuracy.shape[0]
            #debug_attack_fraction = numpy.sum(raw_overall_success) / numpy.sum(success >= 0)
            #debug_test_fraction = numpy.sum(raw_overall_success) / numpy.sum(accuracy)
            #log('[Testing] attacked model accuracy: %g' % debug_accuracy)
            #log('[Testing] only %g of successful attacks are valid' % debug_attack_fraction)
            #log('[Testing] only %g of correct samples are successfully attacked' % debug_test_fraction)

            N_accuracy = numpy.sum(accuracy)
            self.results[n]['raw_success'] = numpy.sum(
                raw_overall_success) / N_accuracy
            self.results[n]['raw_iteration'] = numpy.average(
                success[raw_overall_success])
            self.results[n]['raw_average'] = numpy.average(
                delta[raw_overall_success]) if numpy.any(
                    raw_overall_success) else 0
            self.results[n]['raw_latent'] = numpy.average(
                latent_delta[raw_overall_success]) if numpy.any(
                    raw_overall_success) else 0

            raw_class_success = numpy.zeros(
                (N_class, perturbation_images.shape[0]), bool)
            self.results[n]['raw_class_success'] = numpy.zeros((N_class))
            self.results[n]['raw_class_average'] = numpy.zeros((N_class))
            self.results[n]['raw_class_latent'] = numpy.zeros((N_class))

            for c in range(N_class):
                N_samples = numpy.sum(
                    numpy.logical_and(accuracy, perturbation_codes == c))
                if N_samples <= 0:
                    continue

                raw_class_success[c] = numpy.logical_and(
                    raw_overall_success, perturbation_codes == c)
                self.results[n]['raw_class_success'][c] = numpy.sum(
                    raw_class_success[c]) / N_samples
                if numpy.any(raw_class_success[c]):
                    self.results[n]['raw_class_average'][c] = numpy.average(
                        delta[raw_class_success[c].astype(bool)])
                if numpy.any(raw_class_success[c]):
                    self.results[n]['raw_class_latent'][c] = numpy.average(
                        latent_delta[raw_class_success[c].astype(bool)])

        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
예제 #7
0
    def compute_statistics(self):
        """
        Compute statistics based on distances.
        """

        N_class = numpy.max(self.test_codes) + 1
        num_attempts = self.perturbations.shape[0]

        perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        perturbations = perturbations.reshape(
            (perturbations.shape[0] * perturbations.shape[1],
             perturbations.shape[2]))
        success = numpy.swapaxes(self.success, 0, 1)
        success = success.reshape((success.shape[0] * success.shape[1]))

        accuracy = numpy.repeat(self.accuracy, num_attempts, axis=0)
        # Raw success is the base for all statistics, as we need to consider only these
        # attacks that are successful and where the classifier originally was correct.
        raw_overall_success = numpy.logical_and(success >= 0, accuracy)
        log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success))

        # For off-manifold attacks this should not happen, but save is save.
        if not numpy.any(raw_overall_success):
            for n in range(len(self.norms)):
                for type in [
                        'raw_success', 'raw_iteration', 'raw_average',
                        'raw_latent'
                ]:
                    self.results[n][type] = 0
                for type in [
                        'raw_class_success', 'raw_class_average',
                        'raw_class_latent'
                ]:
                    self.results[n][type] = numpy.zeros((N_class))
            if self.args.results_file:
                utils.write_pickle(self.args.results_file, self.results)
                log('[Testing] wrote %s' % self.args.results_file)
            log('[Testing] no successful attacks found, no plots')
            return

        perturbation_images = numpy.repeat(self.perturbation_images,
                                           num_attempts,
                                           axis=0)
        perturbation_codes = numpy.repeat(self.perturbation_codes,
                                          num_attempts,
                                          axis=0)

        #
        # We compute some simple statistics:
        # - raw success rate: fraction of successful attack without considering epsilon
        # - corrected success rate: fraction of successful attacks within epsilon-ball
        # - raw average perturbation: average distance to original samples (for successful attacks)
        # - corrected average perturbation: average distance to original samples for perturbations
        #   within epsilon-ball (for successful attacks).
        # These statistics can also be computed per class.
        # And these statistics are computed with respect to three norms.

        if self.args.plot_directory and utils.display():
            iterations = success[raw_overall_success]
            x = numpy.arange(numpy.max(iterations) + 1)
            y = numpy.bincount(iterations)
            plot_file = os.path.join(self.args.plot_directory, 'iterations')
            plot.bar(plot_file,
                     x,
                     y,
                     title='Distribution of Iterations of Successful Attacks',
                     xlabel='Number of Iterations',
                     ylabel='Count')
            log('[Testing] wrote %s' % plot_file)

        for n in range(len(self.norms)):
            norm = self.norms[n]
            delta = numpy.linalg.norm(perturbation_images - perturbations,
                                      norm,
                                      axis=1)

            if self.args.plot_directory and utils.display():
                plot_file = os.path.join(self.args.plot_directory,
                                         'distances_l%g' % norm)
                plot.histogram(
                    plot_file,
                    delta[raw_overall_success],
                    50,
                    title=
                    'Distribution of $L_{%g}$ Distances of Successful Attacks'
                    % norm,
                    xlabel='Distance',
                    ylabel='Count')
                log('[Testing] wrote %s' % plot_file)

            #debug_accuracy = numpy.sum(accuracy) / accuracy.shape[0]
            #debug_attack_fraction = numpy.sum(raw_overall_success) / numpy.sum(success >= 0)
            #debug_test_fraction = numpy.sum(raw_overall_success) / numpy.sum(accuracy)
            #log('[Testing] attacked model accuracy: %g' % debug_accuracy)
            #log('[Testing] only %g of successful attacks are valid' % debug_attack_fraction)
            #log('[Testing] only %g of correct samples are successfully attacked' % debug_test_fraction)

            N_accuracy = numpy.sum(accuracy)
            self.results[n]['raw_success'] = numpy.sum(
                raw_overall_success) / N_accuracy
            self.results[n]['raw_iteration'] = numpy.average(
                success[raw_overall_success])
            self.results[n]['raw_average'] = numpy.average(
                delta[raw_overall_success]) if numpy.any(
                    raw_overall_success) else 0
            self.results[n]['raw_latent'] = 0

            raw_class_success = numpy.zeros(
                (N_class, perturbation_images.shape[0]), bool)
            self.results[n]['raw_class_success'] = numpy.zeros((N_class))
            self.results[n]['raw_class_average'] = numpy.zeros((N_class))
            self.results[n]['raw_class_latent'] = numpy.zeros((N_class))

            for c in range(N_class):
                N_samples = numpy.sum(
                    numpy.logical_and(accuracy, perturbation_codes == c))
                if N_samples <= 0:
                    continue

                raw_class_success[c] = numpy.logical_and(
                    raw_overall_success, perturbation_codes == c)
                self.results[n]['raw_class_success'][c] = numpy.sum(
                    raw_class_success[c]) / N_samples
                if numpy.any(raw_class_success[c]):
                    self.results[n]['raw_class_average'][c] = numpy.average(
                        delta[raw_class_success[c].astype(bool)])

        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
예제 #8
0
    def test(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.training is False
        assert self.test_images.shape[0] == self.test_codes.shape[
            0], 'number of samples have to match'

        self.loss = 0.
        self.error = 0.
        num_batches = int(
            math.ceil(self.test_images.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.test_images.shape[0])
            batch_images = common.torch.as_variable(
                self.test_images[b_start:b_end], self.args.use_gpu)
            batch_classes = common.torch.as_variable(
                self.test_codes[b_start:b_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            output_classes = self.model(batch_images)
            e = torch.nn.functional.cross_entropy(output_classes,
                                                  batch_classes,
                                                  size_average=True)
            self.loss += e.item()

            values, indices = torch.max(torch.nn.functional.softmax(
                output_classes, dim=1),
                                        dim=1)
            errors = torch.abs(indices - batch_classes)
            e = torch.sum(errors > 0).float() / batch_classes.size()[0]
            self.error += e.item()

            self.accuracy = common.numpy.concatenate(self.accuracy,
                                                     errors.data.cpu().numpy())

        self.loss /= num_batches
        self.error /= num_batches
        log('[Testing] test loss %g; test error %g' % (self.loss, self.error))

        self.accuracy = self.accuracy == 0
        if self.args.accuracy_file:
            utils.write_hdf5(self.args.accuracy_file, self.accuracy)
            log('[Testing] wrote %s' % self.args.accuracy_file)

        accuracy = numpy.sum(self.accuracy) / self.accuracy.shape[0]
        if numpy.abs(1 - accuracy - self.error) < 1e-4:
            log('[Testing] accuracy file is with %g accuracy correct' %
                accuracy)

        self.results = {
            'loss': self.loss,
            'error': self.error,
        }
        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)