예제 #1
0
    def load_data(self):
        """
        Load data.
        """

        test_codes = utils.read_hdf5(self.args.test_codes_file).astype(
            numpy.int)
        self.test_fonts = test_codes[:, 1]
        self.test_classes = test_codes[:, 2]
        log('[Testing] read %s' % self.args.test_codes_file)

        self.perturbations = utils.read_hdf5(
            self.args.perturbations_file).astype(numpy.float32)
        self.N_attempts = self.perturbations.shape[0]
        self.N_samples = self.perturbations.shape[1]
        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        self.perturbations = self.perturbations.reshape(
            (self.perturbations.shape[0] * self.perturbations.shape[1], -1))
        log('[Testing] read %s' % self.args.perturbations_file)

        self.test_fonts = numpy.repeat(self.test_fonts[:self.N_samples],
                                       self.N_attempts,
                                       axis=0)
        self.test_classes = numpy.repeat(self.test_classes[:self.N_samples],
                                         self.N_attempts,
                                         axis=0)
    def load_data(self):
        """
        Load data.
        """

        self.test_images = utils.read_hdf5(self.args.test_images_file)
        log('[Testing] read %s' % self.args.test_images_file)

        if len(self.test_images.shape) < 4:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
        self.resolution = self.test_images.shape[1]
        self.image_channels = self.test_images.shape[3]

        self.test_codes = utils.read_hdf5(self.args.test_codes_file)
        self.test_codes = self.test_codes[:, self.args.label_index]
        log('[Testing] read %s' % self.args.test_codes_file)

        self.perturbations = utils.read_hdf5(
            self.args.perturbations_file).astype(numpy.float32)
        self.N_attempts = self.perturbations.shape[0]
        self.N_samples = self.perturbations.shape[1]
        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        self.perturbations = self.perturbations.reshape(
            (self.perturbations.shape[0] * self.perturbations.shape[1], -1))
        log('[Testing] read %s' % self.args.perturbations_file)

        self.test_codes = numpy.repeat(self.test_codes[:self.N_samples],
                                       self.N_attempts,
                                       axis=0)
        self.test_images = numpy.repeat(self.test_images[:self.N_samples],
                                        self.N_attempts,
                                        axis=0)
    def compute_normalized_ppca(self):
        """
        Compute PPCA.
        """

        nearest_neighbor_images = self.nearest_neighbor_images.reshape(self.nearest_neighbor_images.shape[0], -1)
        nearest_neighbor_images = nearest_neighbor_images[:self.args.n_fit]

        perturbations = self.perturbations.reshape(self.perturbations.shape[0], -1)
        test_images = self.test_images.reshape(self.test_images.shape[0], -1)
        pure_perturbations = perturbations - test_images

        nearest_neighbor_images_norms = numpy.linalg.norm(nearest_neighbor_images, ord=2, axis=1)
        perturbations_norms = numpy.linalg.norm(perturbations, ord=2, axis=1)
        test_images_norms = numpy.linalg.norm(test_images, ord=2, axis=1)
        pure_perturbations_norms = numpy.linalg.norm(pure_perturbations, ord=2, axis=1)

        success = numpy.logical_and(numpy.logical_and(self.success >= 0, self.accuracy), pure_perturbations_norms > 1e-4)
        log('[Detection] %d valid attacked samples' % numpy.sum(success))

        perturbations_norms = perturbations_norms[success]
        test_images_norms = test_images_norms[success]
        pure_perturbations_norms = pure_perturbations_norms[success]

        perturbations = perturbations[success]
        test_images = test_images[success]
        pure_perturbations = pure_perturbations[success]

        nearest_neighbor_images /= numpy.repeat(nearest_neighbor_images_norms.reshape(-1, 1), nearest_neighbor_images.shape[1], axis=1)
        perturbations /= numpy.repeat(perturbations_norms.reshape(-1, 1), perturbations.shape[1], axis=1)
        test_images /= numpy.repeat(test_images_norms.reshape(-1, 1), test_images.shape[1], axis=1)
        pure_perturbations /= numpy.repeat(pure_perturbations_norms.reshape(-1, 1), pure_perturbations.shape[1], axis=1)

        assert not numpy.any(nearest_neighbor_images != nearest_neighbor_images)
        assert not numpy.any(perturbations != perturbations)
        assert not numpy.any(test_images != test_images)
        assert not numpy.any(pure_perturbations != pure_perturbations)

        ppca = PPCA(n_components=self.args.n_pca)
        ppca.fit(nearest_neighbor_images)
        log('[Experiment] computed PPCA on nearest neighbor images')

        reconstructed_test_images = ppca.inverse_transform(ppca.transform(test_images))
        reconstructed_perturbations = ppca.inverse_transform(ppca.transform(perturbations))
        reconstructed_pure_perturbations = ppca.inverse_transform(ppca.transform(pure_perturbations))
        
        #self.probabilities['test'] = ppca.marginal(test_images)
        #self.probabilities['perturbation'] = ppca.marginal(perturbations)
        #self.probabilities['true'] = ppca.marginal(pure_perturbations)

        self.distances['test'] = numpy.average(numpy.multiply(reconstructed_test_images - test_images, reconstructed_test_images - test_images), axis=1)
        self.distances['perturbation'] = numpy.average(numpy.multiply(reconstructed_perturbations - perturbations, reconstructed_perturbations - perturbations), axis=1)
        self.distances['true'] = numpy.average(numpy.multiply(reconstructed_pure_perturbations - pure_perturbations, reconstructed_pure_perturbations - pure_perturbations), axis=1)

        self.angles['test'] = numpy.rad2deg(common.numpy.angles(test_images.T, reconstructed_test_images.T))
        self.angles['perturbation'] = numpy.rad2deg(common.numpy.angles(reconstructed_perturbations.T, perturbations.T))
        self.angles['true'] = numpy.rad2deg(common.numpy.angles(reconstructed_pure_perturbations.T, pure_perturbations.T))
예제 #4
0
    def load_data(self):
        """
        Load data and model.
        """

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(numpy.float32)
        log('[Testing] read %s' % self.args.test_images_file)

        # For handling both color and gray images.
        if len(self.test_images.shape) < 4:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
            log('[Testing] no color images, adjusted size')
        self.resolution = self.test_images.shape[2]
        log('[Testing] resolution %d' % self.resolution)

        self.train_images = utils.read_hdf5(self.args.train_images_file).astype(numpy.float32)
        # !
        self.train_images = self.train_images.reshape((self.train_images.shape[0], -1))
        log('[Testing] read %s' % self.args.train_images_file)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(numpy.int)
        self.test_codes = self.test_codes[:, self.args.label_index]
        self.N_class = numpy.max(self.test_codes) + 1
        log('[Testing] read %s' % self.args.test_codes_file)

        self.accuracy = utils.read_hdf5(self.args.accuracy_file)
        log('[Testing] read %s' % self.args.accuracy_file)

        self.perturbations = utils.read_hdf5(self.args.perturbations_file).astype(numpy.float32)
        self.N_attempts = self.perturbations.shape[0]

        # First, repeat relevant data.
        self.test_images = numpy.repeat(self.test_images[:self.perturbations.shape[1]], self.N_attempts, axis=0)
        self.perturbation_codes = numpy.repeat(self.test_codes[:self.perturbations.shape[1]], self.N_attempts, axis=0)
        self.perturbation_codes = numpy.squeeze(self.perturbation_codes)
        self.accuracy = numpy.repeat(self.accuracy[:self.perturbations.shape[1]], self.N_attempts, axis=0)

        # Then, reshape the perturbations!
        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        self.perturbations = self.perturbations.reshape((self.perturbations.shape[0] * self.perturbations.shape[1], -1))
        assert self.perturbations.shape[1] == self.args.N_theta
        log('[Testing] read %s' % self.args.perturbations_file)
        assert not numpy.any(self.perturbations != self.perturbations), 'NaN in perturbations'

        self.success = utils.read_hdf5(self.args.success_file)
        self.success = numpy.swapaxes(self.success, 0, 1)
        self.success = self.success.reshape((self.success.shape[0] * self.success.shape[1]))
        log('[Testing] read %s' % self.args.success_file)

        log('[Testing] using %d input channels' % self.test_images.shape[3])
        assert self.args.N_theta > 0 and self.args.N_theta <= 9
        decoder = models.STNDecoder(self.args.N_theta)
        # decoder.eval()
        log('[Testing] set up STN decoder')

        self.model = decoder
    def load_data(self):
        """
        Load data.
        """

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(numpy.float32)
        if len(self.test_images.shape) < 4:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
        log('[Testing] read %s' % self.args.test_images_file)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(numpy.int)
        self.test_codes = self.test_codes[:, self.args.label_index]
        log('[Testing] read %s' % self.args.test_codes_file)

        self.perturbations = utils.read_hdf5(self.args.perturbations_file).astype(numpy.float32)
        self.N_attempts = self.perturbations.shape[0]
        self.N_samples = self.perturbations.shape[1]
        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        if len(self.perturbations.shape) <= 4:
            self.perturbations = self.perturbations.reshape((self.perturbations.shape[0] * self.perturbations.shape[1], self.perturbations.shape[2], self.perturbations.shape[3], 1))
        else:
            self.perturbations = self.perturbations.reshape((self.perturbations.shape[0] * self.perturbations.shape[1], self.perturbations.shape[2], self.perturbations.shape[3], self.perturbations.shape[4]))

        log('[Testing] read %s' % self.args.perturbations_file)

        self.original_success = utils.read_hdf5(self.args.original_success_file)
        self.original_success = numpy.swapaxes(self.original_success, 0, 1)
        self.original_success = self.original_success.reshape((self.original_success.shape[0] * self.original_success.shape[1]))
        log('[Testing] read %s' % self.args.original_success_file)

        self.original_accuracy = utils.read_hdf5(self.args.original_accuracy_file)
        log('[Testing] read %s' % self.args.original_accuracy_file)

        self.perturbation_codes = numpy.repeat(self.test_codes[:self.N_samples], self.N_attempts, axis=0)
        self.transfer_success = numpy.copy(self.original_success)
예제 #6
0
    def compute_statistics(self):
        """
        Compute statistics based on distances.
        """

        num_attempts = self.perturbations.shape[0]

        perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        perturbations = perturbations.reshape(
            (perturbations.shape[0] * perturbations.shape[1],
             perturbations.shape[2]))
        success = numpy.swapaxes(self.success, 0, 1)
        success = success.reshape((success.shape[0] * success.shape[1]))

        probabilities = numpy.swapaxes(self.probabilities, 0, 1)
        probabilities = probabilities.reshape(
            (probabilities.shape[0] * probabilities.shape[1], -1))
        confidences = numpy.max(probabilities, 1)

        perturbation_probabilities = self.test_probabilities[:self.success.
                                                             shape[1]]
        perturbation_probabilities = numpy.repeat(perturbation_probabilities,
                                                  num_attempts,
                                                  axis=0)
        perturbation_confidences = numpy.max(perturbation_probabilities, 1)

        probability_ratios = confidences / perturbation_confidences

        raw_overall_success = success >= 0
        log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success))

        # For off-manifold attacks this should not happen, but save is save.
        if not numpy.any(raw_overall_success):
            for type in [
                    'raw_success', 'raw_iteration', 'raw_roc',
                    'raw_confidence_weighted_success', 'raw_confidence',
                    'raw_ratios'
            ]:
                self.results[type] = 0
            if self.args.results_file:
                utils.write_pickle(self.args.results_file, self.results)
                log('[Testing] wrote %s' % self.args.results_file)
            log('[Testing] no successful attacks found, no plots')
            return

        #
        # We compute some simple statistics:
        # - raw success rate: fraction of successful attack without considering epsilon
        # - corrected success rate: fraction of successful attacks within epsilon-ball
        # - raw average perturbation: average distance to original samples (for successful attacks)
        # - corrected average perturbation: average distance to original samples for perturbations
        #   within epsilon-ball (for successful attacks).
        # These statistics can also be computed per class.
        # And these statistics are computed with respect to three norms.

        if self.args.plot_directory and utils.display():
            iterations = success[raw_overall_success]
            x = numpy.arange(numpy.max(iterations) + 1)
            y = numpy.bincount(iterations)
            plot_file = os.path.join(self.args.plot_directory, 'iterations')
            plot.bar(plot_file,
                     x,
                     y,
                     title='Distribution of Iterations of Successful Attacks',
                     xlabel='Number of Iterations',
                     ylabel='Count')
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory, 'probabilities')
            plot.histogram(plot_file, confidences[raw_overall_success], 50)
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory,
                                     'probability_ratios')
            plot.histogram(plot_file, probability_ratios, 50)
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory,
                                     'test_probabilities')
            plot.histogram(
                plot_file, self.test_probabilities[
                    numpy.arange(self.test_probabilities.shape[0]),
                    self.test_codes], 50)
            log('[Testing] wrote %s' % plot_file)

        y_true = numpy.concatenate(
            (numpy.zeros(confidences.shape[0]),
             numpy.ones(perturbation_confidences.shape[0])))
        y_score = numpy.concatenate((confidences, perturbation_confidences))
        roc_auc_score = sklearn.metrics.roc_auc_score(y_true, y_score)

        self.results['raw_roc'] = roc_auc_score
        self.results['raw_confidence_weighted_success'] = numpy.sum(
            confidences[raw_overall_success]) / numpy.sum(
                perturbation_confidences)
        self.results['raw_confidence'] = numpy.mean(
            probabilities[raw_overall_success])
        self.results['raw_ratios'] = numpy.mean(
            probability_ratios[raw_overall_success])
        self.results['raw_success'] = numpy.sum(
            raw_overall_success) / success.shape[0]
        self.results['raw_iteration'] = numpy.average(
            success[raw_overall_success])

        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
    def load_data(self):
        """
        Load data and model.
        """

        with logw('[Detection] read %s' % self.args.train_images_file):
            self.nearest_neighbor_images = utils.read_hdf5(
                self.args.train_images_file)
            assert len(self.nearest_neighbor_images.shape) == 3

        with logw('[Detection] read %s' % self.args.test_images_file):
            self.test_images = utils.read_hdf5(self.args.test_images_file)
            if len(self.test_images.shape) < 4:
                self.test_images = numpy.expand_dims(self.test_images, axis=3)

        with logw('[Detection] read %s' % self.args.train_codes_file):
            self.train_codes = utils.read_hdf5(self.args.train_codes_file)

        with logw('[Detection] read %s' % self.args.test_codes_file):
            self.test_codes = utils.read_hdf5(self.args.test_codes_file)

        with logw('[Detection] read %s' % self.args.test_theta_file):
            self.test_theta = utils.read_hdf5(self.args.test_theta_file)

        with logw('[Detection] read %s' % self.args.perturbations_file):
            self.perturbations = utils.read_hdf5(self.args.perturbations_file)
            assert len(self.perturbations.shape) == 3

        with logw('[Detection] read %s' % self.args.success_file):
            self.success = utils.read_hdf5(self.args.success_file)

        with logw('[Detection] read %s' % self.args.accuracy_file):
            self.accuracy = utils.read_hdf5(self.args.accuracy_file)

        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        num_attempts = self.perturbations.shape[1]
        self.test_images = self.test_images[:self.perturbations.shape[0]]
        self.train_images = self.nearest_neighbor_images[:self.perturbations.
                                                         shape[0]]
        self.test_codes = self.test_codes[:self.perturbations.shape[0]]
        self.accuracy = self.accuracy[:self.perturbations.shape[0]]
        self.test_theta = self.test_theta[:self.perturbations.shape[0]]

        self.perturbations = self.perturbations.reshape(
            (self.perturbations.shape[0] * self.perturbations.shape[1],
             self.perturbations.shape[2]))
        self.success = numpy.swapaxes(self.success, 0, 1)
        self.success = self.success.reshape(
            (self.success.shape[0] * self.success.shape[1]))

        self.accuracy = numpy.repeat(self.accuracy, num_attempts, axis=0)
        self.test_images = numpy.repeat(self.test_images, num_attempts, axis=0)
        self.train_images = numpy.repeat(self.train_images,
                                         num_attempts,
                                         axis=0)
        self.test_codes = numpy.repeat(self.test_codes, num_attempts, axis=0)
        self.test_theta = numpy.repeat(self.test_theta, num_attempts, axis=0)

        max_samples = self.args.max_samples
        self.success = self.success[:max_samples]
        self.accuracy = self.accuracy[:max_samples]
        self.perturbations = self.perturbations[:max_samples]
        self.test_images = self.test_images[:max_samples]
        self.train_images = self.train_images[:max_samples]
        self.test_codes = self.test_codes[:max_samples]
        self.test_theta = self.test_theta[:max_samples]

        with logw('[Testing] read %s' % self.args.database_file):
            database = utils.read_hdf5(self.args.database_file)

            self.N_font = database.shape[0]
            self.N_class = database.shape[1]
            self.N_theta = self.test_theta.shape[1]

            database = database.reshape((database.shape[0] * database.shape[1],
                                         database.shape[2], database.shape[3]))
            database = torch.from_numpy(database)
            if self.args.use_gpu:
                database = database.cuda()
            database = torch.autograd.Variable(database, False)

            self.model = models.AlternativeOneHotDecoder(
                database, self.N_font, self.N_class, self.N_theta)
            self.model.eval()

        self.compute_images()
    def run_model(self):
        """
        Run model.
        """
        def run(decoder, classifier, theta, codes, batch_size, use_gpu):
            """
            Run the model for given images.

            :param decoder: decoder
            :type decoder: torch.nn.Module
            :param classifier: classifier
            :type classifier: torch.nn.Module
            :param theta: transformation codes
            :type theta: numpy.ndarray
            :param codes: codes
            :type codes: numpy.ndarray
            :param batch_size: batch size
            :type batch_size: int
            :param use_gpu: whether to use GPU
            :type use_gpu: bool
            :return: representations, images
            :rtype: numpy.ndarray, numpy.ndarray
            """

            assert decoder.training is False
            assert classifier.training is False

            images = None
            representations = None
            num_batches = int(math.ceil(theta.shape[0] / batch_size))

            for b in range(num_batches):
                b_start = b * batch_size
                b_end = min((b + 1) * batch_size, theta.shape[0])

                batch_theta = common.torch.as_variable(theta[b_start:b_end],
                                                       use_gpu)
                batch_code = common.torch.as_variable(codes[b_start:b_end],
                                                      use_gpu)

                decoder.set_code(batch_code)
                output_images = decoder.forward(batch_theta)
                output_representations = torch.nn.functional.softmax(
                    classifier.forward(output_images), dim=1)

                output_images = numpy.transpose(
                    output_images.data.cpu().numpy(), (0, 2, 3, 1))
                images = common.numpy.concatenate(images, output_images)

                output_representations = output_representations.data.cpu(
                ).numpy()
                representations = common.numpy.concatenate(
                    representations, output_representations)
                log('[Visualization] %d/%d' % (b + 1, num_batches))

            return representations, images

        self.theta_representations, self.theta_images = run(
            self.decoder, self.classifier, self.test_theta, self.test_codes,
            self.args.batch_size, self.args.use_gpu)

        shape = self.perturbations.shape
        perturbations = self.perturbations.reshape(
            (shape[0] * shape[1], shape[2]))
        self.perturbation_representations, self.perturbation_images = run(
            self.decoder, self.classifier, perturbations,
            numpy.repeat(self.test_codes, shape[1], axis=0),
            self.args.batch_size, self.args.use_gpu)
        self.perturbation_representations = self.perturbation_representations.reshape(
            (shape[0], shape[1], -1))
        self.perturbation_images = self.perturbation_images.reshape(
            (shape[0], shape[1], self.perturbation_images.shape[1],
             self.perturbation_images.shape[2],
             self.perturbation_images.shape[3]))
예제 #9
0
    def load_data(self):
        """
        Load data.
        """

        assert self.args.batch_size % 4 == 0

        self.train_images = utils.read_hdf5(
            self.args.train_images_file).astype(numpy.float32)
        log('[Training] read %s' % self.args.train_images_file)

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.test_images_file)

        # For handling both color and gray images.
        if len(self.train_images.shape) < 4:
            self.train_images = numpy.expand_dims(self.train_images, axis=3)
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
            log('[Training] no color images, adjusted size')
        self.resolution = self.test_images.shape[2]
        log('[Training] resolution %d' % self.resolution)

        self.train_codes = utils.read_hdf5(self.args.train_codes_file).astype(
            numpy.int)
        assert self.train_codes.shape[1] >= self.args.label_index + 1
        self.train_codes = self.train_codes[:, self.args.label_index]
        log('[Training] read %s' % self.args.train_codes_file)
        self.N_class = numpy.max(self.train_codes) + 1

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(
            numpy.int)
        assert self.test_codes.shape[1] >= self.args.label_index + 1
        self.test_codes = self.test_codes[:, self.args.label_index]
        log('[Training] read %s' % self.args.test_codes_file)

        self.train_theta = utils.read_hdf5(self.args.train_theta_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.train_theta_file)

        assert self.test_images.shape[0] == self.test_codes.shape[0]

        self.min_bound = numpy.min(self.train_theta, axis=0)
        self.max_bound = numpy.max(self.train_theta, axis=0)
        log('[Training] min bound: %s' % ' '.join(
            ['%g' % self.min_bound[i]
             for i in range(self.min_bound.shape[0])]))
        log('[Training] max bound: %s' % ' '.join(
            ['%g' % self.max_bound[i]
             for i in range(self.max_bound.shape[0])]))

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.test_theta_file)

        assert self.train_codes.shape[0] == self.train_images.shape[0]
        assert self.test_codes.shape[0] == self.test_images.shape[0]
        assert self.train_theta.shape[
            0] == self.train_images.shape[0], '%s != %s' % ('x'.join(
                list(map(str, self.train_theta.shape))), 'x'.join(
                    list(map(str, self.train_images.shape))))
        assert self.test_theta.shape[0] == self.test_images.shape[0]

        # Select subset of samples
        if self.args.training_samples < 0:
            self.args.training_samples = self.train_images.shape[0]
        else:
            self.args.training_samples = min(self.args.training_samples,
                                             self.train_images.shape[0])
        log('[Training] using %d training samples' %
            self.args.training_samples)

        if self.args.test_samples < 0:
            self.args.test_samples = self.test_images.shape[0]
        else:
            self.args.test_samples = min(self.args.test_samples,
                                         self.test_images.shape[0])

        if self.args.early_stopping:
            assert self.args.validation_samples > 0
            assert self.args.training_samples + self.args.validation_samples <= self.train_images.shape[
                0]
            self.val_images = self.train_images[self.train_images.shape[0] -
                                                self.args.validation_samples:]
            self.val_codes = self.train_codes[self.train_codes.shape[0] -
                                              self.args.validation_samples:]
            self.train_images = self.train_images[:self.train_images.shape[0] -
                                                  self.args.validation_samples]
            self.train_codes = self.train_codeſ[:self.train_codes.shape[0] -
                                                self.args.validation_samples]
            assert self.val_images.shape[
                0] == self.args.validation_samples and self.val_codes.shape[
                    0] == self.args.validation_samples

        if self.args.random_samples:
            perm = numpy.random.permutation(self.train_images.shape[0] // 10)
            perm = perm[:self.args.training_samples // 10]
            perm = numpy.repeat(perm, self.N_class, axis=0) * 10 + numpy.tile(
                numpy.array(range(self.N_class)), (perm.shape[0]))
            self.train_images = self.train_images[perm]
            self.train_codes = self.train_codes[perm]
            self.train_theta = self.train_theta[perm]
        else:
            self.train_images = self.train_images[:self.args.training_samples]
            self.train_codes = self.train_codes[:self.args.training_samples]
            self.train_theta = self.train_theta[:self.args.training_samples]

        self.train_valid = (numpy.max(numpy.abs(self.train_theta), axis=1) <=
                            self.args.bound).astype(int)
        self.test_valid = (numpy.max(numpy.abs(self.test_theta), axis=1) <=
                           self.args.bound).astype(int)

        # Check that the dataset is balanced.
        number_samples = self.train_codes.shape[0] // self.N_class
        for c in range(self.N_class):
            number_samples_ = numpy.sum(self.train_codes == c)
            if number_samples_ != number_samples:
                log(
                    '[Training] dataset not balanced, class %d should have %d samples but has %d'
                    % (c, number_samples, number_samples_), LogLevel.WARNING)
예제 #10
0
    def load_data(self):
        """
        Load data and model.
        """

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(
            numpy.float32)
        log('[Testing] read %s' % self.args.test_images_file)

        if len(self.test_images.shape) <= 3:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
            log('[Testing] no color images, adjusted size')

        self.train_images = utils.read_hdf5(
            self.args.train_images_file).astype(numpy.float32)
        # !
        self.train_images = self.train_images.reshape(
            (self.train_images.shape[0], -1))
        log('[Testing] read %s' % self.args.train_images_file)

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(
            numpy.float32)
        log('[Testing] read %s' % self.args.test_theta_file)

        self.train_theta = utils.read_hdf5(self.args.train_theta_file).astype(
            numpy.float32)
        log('[Testing] read %s' % self.args.train_theta_file)

        self.test_codes = utils.read_hdf5(
            self.args.test_codes_file).astype(int)
        log('[Testing] read %s' % self.args.test_codes_file)

        self.accuracy = utils.read_hdf5(self.args.accuracy_file)
        log('[Testing] read %s' % self.args.accuracy_file)

        self.perturbations = utils.read_hdf5(
            self.args.perturbations_file).astype(numpy.float32)
        self.N_attempts = self.perturbations.shape[0]

        # First, repeat relevant data.
        self.perturbation_theta = numpy.repeat(
            self.test_theta[:self.perturbations.shape[1]],
            self.N_attempts,
            axis=0)
        self.perturbation_codes = numpy.repeat(
            self.test_codes[:self.perturbations.shape[1]],
            self.N_attempts,
            axis=0)
        self.accuracy = numpy.repeat(
            self.accuracy[:self.perturbations.shape[1]],
            self.N_attempts,
            axis=0)

        # Then, reshape the perturbations!
        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        self.perturbations = self.perturbations.reshape(
            (self.perturbations.shape[0] * self.perturbations.shape[1], -1))
        log('[Testing] read %s' % self.args.perturbations_file)

        self.success = utils.read_hdf5(self.args.success_file)
        self.success = numpy.swapaxes(self.success, 0, 1)
        self.success = self.success.reshape(
            (self.success.shape[0] * self.success.shape[1]))
        log('[Testing] read %s' % self.args.success_file)

        database = utils.read_hdf5(self.args.database_file)
        log('[Testing] read %s' % self.args.database_file)

        self.N_font = database.shape[0]
        self.N_class = database.shape[1]
        N_theta = self.test_theta.shape[1]
        log('[Testing] using %d N_theta' % N_theta)

        database = database.reshape((database.shape[0] * database.shape[1],
                                     database.shape[2], database.shape[3]))
        database = torch.from_numpy(database)
        if self.args.use_gpu:
            database = database.cuda()
        database = torch.autograd.Variable(database, False)

        self.model = models.AlternativeOneHotDecoder(database, self.N_font,
                                                     self.N_class, N_theta)
        self.model.eval()
    def load_data(self):
        """
        Load data.
        """

        assert self.args.batch_size % 4 == 0

        self.database = utils.read_hdf5(self.args.database_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.database_file)

        self.N_font = self.database.shape[0]
        self.N_class = self.database.shape[1]

        self.database = self.database.reshape(
            (self.database.shape[0] * self.database.shape[1],
             self.database.shape[2], self.database.shape[3]))
        self.database = torch.from_numpy(self.database)
        if self.args.use_gpu:
            self.database = self.database.cuda()
            self.database = torch.autograd.Variable(self.database, False)

        self.train_images = utils.read_hdf5(
            self.args.train_images_file).astype(numpy.float32)
        log('[Training] read %s' % self.args.train_images_file)

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.test_images_file)

        # For handling both color and gray images.
        if len(self.train_images.shape) < 4:
            self.train_images = numpy.expand_dims(self.train_images, axis=3)
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
            log('[Training] no color images, adjusted size')
        self.resolution = self.train_images.shape[2]
        log('[Training] resolution %d' % self.resolution)

        self.train_codes = utils.read_hdf5(self.args.train_codes_file).astype(
            numpy.int)
        assert self.train_codes.shape[1] == 3
        log('[Training] read %s' % self.args.train_codes_file)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(
            numpy.int)
        assert self.test_codes.shape[1] == 3
        log('[Training] read %s' % self.args.test_codes_file)

        self.train_theta = utils.read_hdf5(self.args.train_theta_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.train_theta_file)

        self.min_bound = numpy.min(self.train_theta, axis=0)
        self.max_bound = numpy.max(self.train_theta, axis=0)

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.test_theta_file)

        assert self.train_codes.shape[0] == self.train_images.shape[0]
        assert self.test_codes.shape[0] == self.test_images.shape[0]
        assert self.train_theta.shape[0] == self.train_images.shape[0]
        assert self.test_theta.shape[0] == self.test_images.shape[0]

        # Select subset of samples
        if self.args.training_samples < 0:
            self.args.training_samples = self.train_images.shape[0]
        else:
            self.args.training_samples = min(self.args.training_samples,
                                             self.train_images.shape[0])
        log('[Training] found %d classes' % self.N_class)
        log('[Training] using %d training samples' %
            self.args.training_samples)

        if self.args.test_samples < 0:
            self.args.test_samples = self.test_images.shape[0]
        else:
            self.args.test_samples = min(self.args.test_samples,
                                         self.test_images.shape[0])

        if self.args.early_stopping:
            assert self.args.validation_samples > 0
            assert self.args.training_samples + self.args.validation_samples <= self.train_images.shape[
                0]
            self.val_images = self.train_images[self.train_images.shape[0] -
                                                self.args.validation_samples:]
            self.val_codes = self.train_codes[self.train_codes.shape[0] -
                                              self.args.validation_samples:,
                                              self.args.label_index]
            self.train_images = self.train_images[:self.train_images.shape[0] -
                                                  self.args.validation_samples]
            self.train_codes = self.train_codes[:self.train_codes.shape[0] -
                                                self.args.validation_samples]
            assert self.val_images.shape[
                0] == self.args.validation_samples and self.val_codes.shape[
                    0] == self.args.validation_samples

        if self.args.random_samples:
            perm = numpy.random.permutation(self.train_images.shape[0] // 10)
            perm = perm[:self.args.training_samples // 10]
            perm = numpy.repeat(perm, self.N_class, axis=0) * 10 + numpy.tile(
                numpy.array(range(self.N_class)), (perm.shape[0]))
            self.train_images = self.train_images[perm]
            self.train_codes = self.train_codes[perm]
            self.train_theta = self.train_theta[perm]
        else:
            self.train_images = self.train_images[:self.args.training_samples]
            self.train_codes = self.train_codes[:self.args.training_samples]
            self.train_theta = self.train_theta[:self.args.training_samples]

        # Check that the dataset is balanced.
        number_samples = self.train_codes.shape[0] // self.N_class
        for c in range(self.N_class):
            number_samples_ = numpy.sum(
                self.train_codes[:, self.args.label_index] == c)
            if number_samples_ != number_samples:
                log(
                    '[Training] dataset not balanced, class %d should have %d samples but has %d'
                    % (c, number_samples, number_samples_), LogLevel.WARNING)
    def test(self):
        """
        Test the model.
        """

        self.model.eval()
        log('[Training] %d set classifier to eval' % self.epoch)

        loss = error = perturbation_loss = perturbation_error = 0
        num_batches = int(
            math.ceil(self.args.test_samples / self.args.batch_size))
        assert self.model.training is False

        for b in range(num_batches):
            perm = numpy.take(range(self.args.test_samples),
                              range(b * self.args.batch_size,
                                    (b + 1) * self.args.batch_size),
                              mode='clip')
            batch_images = common.torch.as_variable(self.test_images[perm],
                                                    self.args.use_gpu)
            batch_theta = common.torch.as_variable(self.test_theta[perm],
                                                   self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            batch_fonts = self.test_codes[perm, 1]
            batch_classes = self.test_codes[perm, self.args.label_index]
            batch_code = numpy.concatenate(
                (common.numpy.one_hot(batch_fonts, self.N_font),
                 common.numpy.one_hot(batch_classes, self.N_class)),
                axis=1).astype(numpy.float32)

            batch_code = common.torch.as_variable(batch_code,
                                                  self.args.use_gpu)
            batch_classes = common.torch.as_variable(batch_classes,
                                                     self.args.use_gpu)

            output_classes = self.model(batch_images)
            e = self.loss(batch_classes, output_classes)
            loss += e.item()
            a = self.error(batch_classes, output_classes)
            error += a.item()

            if self.args.strong_variant:
                min_bound = numpy.repeat(self.min_bound.reshape(1, -1),
                                         batch_theta.size(0),
                                         axis=0)
                max_bound = numpy.repeat(self.max_bound.reshape(1, -1),
                                         batch_theta.size(0),
                                         axis=0)
                random = numpy.random.uniform(
                    min_bound, max_bound,
                    (batch_theta.size(0), batch_theta.size(1)))

                batch_perturbed_theta = common.torch.as_variable(
                    random.astype(numpy.float32), self.args.use_gpu)

                self.decoder.set_code(batch_code)
                batch_perturbed_images = self.decoder(batch_perturbed_theta)
            else:
                random = common.numpy.uniform_ball(batch_theta.size(0),
                                                   batch_theta.size(1),
                                                   epsilon=self.args.epsilon,
                                                   ord=self.norm)
                batch_perturbed_theta = batch_theta + common.torch.as_variable(
                    random.astype(numpy.float32), self.args.use_gpu)
                batch_perturbed_theta = torch.min(
                    common.torch.as_variable(self.max_bound,
                                             self.args.use_gpu),
                    batch_perturbed_theta)
                batch_perturbed_theta = torch.max(
                    common.torch.as_variable(self.min_bound,
                                             self.args.use_gpu),
                    batch_perturbed_theta)

                self.decoder.set_code(batch_code)
                batch_perturbed_images = self.decoder(batch_perturbed_theta)

            output_classes = self.model(batch_perturbed_images)

            l = self.loss(batch_classes, output_classes)
            perturbation_loss += l.item()

            e = self.error(batch_classes, output_classes)
            perturbation_error += e.item()

        loss /= num_batches
        error /= num_batches
        perturbation_loss /= num_batches
        perturbation_error /= num_batches
        log('[Training] %d: test %g (%g) %g (%g)' %
            (self.epoch, loss, error, perturbation_loss, perturbation_error))

        num_batches = int(
            math.ceil(self.train_images.shape[0] / self.args.batch_size))
        iteration = self.epoch * num_batches
        self.test_statistics = numpy.vstack((
            self.test_statistics,
            numpy.array([[
                iteration,  # iterations
                iteration * (1 + self.args.max_iterations) *
                self.args.batch_size,  # samples seen
                min(num_batches, iteration) * self.args.batch_size +
                iteration * self.args.max_iterations *
                self.args.batch_size,  # unique samples seen
                loss,
                error,
                perturbation_loss,
                perturbation_error
            ]])))
    def train(self):
        """
        Train with fair data augmentation.
        """

        self.model.train()
        assert self.model.training is True

        split = self.args.batch_size // 2
        num_batches = int(
            math.ceil(self.train_images.shape[0] / self.args.batch_size))
        permutation = numpy.random.permutation(self.train_images.shape[0])

        for b in range(num_batches):
            self.scheduler.update(self.epoch, float(b) / num_batches)

            perm = numpy.take(permutation,
                              range(b * self.args.batch_size,
                                    (b + 1) * self.args.batch_size),
                              mode='wrap')
            batch_images = common.torch.as_variable(self.train_images[perm],
                                                    self.args.use_gpu)
            batch_theta = common.torch.as_variable(self.train_theta[perm],
                                                   self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            batch_fonts = self.train_codes[perm, 1]
            batch_classes = self.train_codes[perm, self.args.label_index]
            batch_code = numpy.concatenate(
                (common.numpy.one_hot(batch_fonts, self.N_font),
                 common.numpy.one_hot(batch_classes, self.N_class)),
                axis=1).astype(numpy.float32)

            batch_code = common.torch.as_variable(batch_code,
                                                  self.args.use_gpu)
            batch_classes = common.torch.as_variable(batch_classes,
                                                     self.args.use_gpu)

            loss = error = gradient = 0
            if self.args.full_variant:
                for t in range(self.args.max_iterations):
                    if self.args.strong_variant:
                        # Here, we want to uniformly sample all allowed transformations, so that's OK.
                        min_bound = numpy.repeat(self.min_bound.reshape(1, -1),
                                                 self.args.batch_size,
                                                 axis=0)
                        max_bound = numpy.repeat(self.max_bound.reshape(1, -1),
                                                 self.args.batch_size,
                                                 axis=0)
                        random = numpy.random.uniform(
                            min_bound, max_bound,
                            (batch_theta.size(0), batch_theta.size(1)))

                        batch_perturbed_theta = common.torch.as_variable(
                            random.astype(numpy.float32), self.args.use_gpu)

                        self.decoder.set_code(batch_code)
                        batch_perturbed_images = self.decoder(
                            batch_perturbed_theta)
                    else:
                        random = common.numpy.uniform_ball(
                            batch_theta.size(0),
                            batch_theta.size(1),
                            epsilon=self.args.epsilon,
                            ord=self.norm)
                        batch_perturbed_theta = batch_theta + common.torch.as_variable(
                            random.astype(numpy.float32), self.args.use_gpu)
                        batch_perturbed_theta = torch.min(
                            common.torch.as_variable(self.max_bound,
                                                     self.args.use_gpu),
                            batch_perturbed_theta)
                        batch_perturbed_theta = torch.max(
                            common.torch.as_variable(self.min_bound,
                                                     self.args.use_gpu),
                            batch_perturbed_theta)

                        self.decoder.set_code(batch_code)
                        batch_perturbed_images = self.decoder(
                            batch_perturbed_theta)

                    output_classes = self.model(batch_perturbed_images)

                    self.scheduler.optimizer.zero_grad()
                    l = self.loss(batch_classes, output_classes)
                    l.backward()
                    self.scheduler.optimizer.step()
                    loss += l.item()

                    g = torch.mean(
                        torch.abs(list(self.model.parameters())[0].grad))
                    gradient += g.item()

                    e = self.error(batch_classes, output_classes)
                    error += e.item()

                batch_perturbations = batch_perturbed_images - batch_images
                gradient /= self.args.max_iterations
                loss /= self.args.max_iterations
                error /= self.args.max_iterations
                perturbation_loss = loss
                perturbation_error = error
            else:
                output_classes = self.model(batch_images[:split])

                self.scheduler.optimizer.zero_grad()
                l = self.loss(batch_classes[:split], output_classes)
                l.backward()
                self.scheduler.optimizer.step()
                loss = l.item()

                gradient = torch.mean(
                    torch.abs(list(self.model.parameters())[0].grad))
                gradient = gradient.item()

                e = self.error(batch_classes[:split], output_classes)
                error = e.item()

                perturbation_loss = perturbation_error = 0
                for t in range(self.args.max_iterations):
                    if self.args.strong_variant:
                        # Again, sampling all possible transformations.
                        min_bound = numpy.repeat(self.min_bound.reshape(1, -1),
                                                 split,
                                                 axis=0)
                        max_bound = numpy.repeat(self.max_bound.reshape(1, -1),
                                                 split,
                                                 axis=0)
                        random = numpy.random.uniform(
                            min_bound, max_bound, (split, batch_theta.size(1)))

                        batch_perturbed_theta = common.torch.as_variable(
                            random.astype(numpy.float32), self.args.use_gpu)

                        self.decoder.set_code(batch_code[split:])
                        batch_perturbed_images = self.decoder(
                            batch_perturbed_theta)
                    else:
                        random = common.numpy.uniform_ball(
                            split,
                            batch_theta.size(1),
                            epsilon=self.args.epsilon,
                            ord=self.norm)
                        batch_perturbed_theta = batch_theta[
                            split:] + common.torch.as_variable(
                                random.astype(numpy.float32),
                                self.args.use_gpu)
                        batch_perturbed_theta = torch.min(
                            common.torch.as_variable(self.max_bound,
                                                     self.args.use_gpu),
                            batch_perturbed_theta)
                        batch_perturbed_theta = torch.max(
                            common.torch.as_variable(self.min_bound,
                                                     self.args.use_gpu),
                            batch_perturbed_theta)

                        self.decoder.set_code(batch_code[split:])
                        batch_perturbed_images = self.decoder(
                            batch_perturbed_theta)

                    output_classes = self.model(batch_perturbed_images)

                    self.scheduler.optimizer.zero_grad()
                    l = self.loss(batch_classes[split:], output_classes)
                    l.backward()
                    self.scheduler.optimizer.step()
                    perturbation_loss += l.item()

                    g = torch.mean(
                        torch.abs(list(self.model.parameters())[0].grad))
                    gradient += g.item()

                    e = self.error(batch_classes[split:], output_classes)
                    perturbation_error += e.item()

                batch_perturbations = batch_perturbed_images - batch_images[
                    split:]
                gradient /= self.args.max_iterations + 1
                perturbation_loss /= self.args.max_iterations
                perturbation_error /= self.args.max_iterations

            iteration = self.epoch * num_batches + b + 1
            self.train_statistics = numpy.vstack((
                self.train_statistics,
                numpy.array([[
                    iteration,  # iterations
                    iteration * (1 + self.args.max_iterations) *
                    self.args.batch_size,  # samples seen
                    min(num_batches, iteration) * self.args.batch_size +
                    iteration * self.args.max_iterations *
                    self.args.batch_size,  # unique samples seen
                    loss,
                    error,
                    perturbation_loss,
                    perturbation_error,
                    gradient
                ]])))

            if b % self.args.skip == self.args.skip // 2:
                log('[Training] %d | %d: %g (%g) %g (%g) [%g]' % (
                    self.epoch,
                    b,
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 3]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 4]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 5]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 6]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, -1]),
                ))

        self.debug('clean.%d.png' % self.epoch,
                   batch_images.permute(0, 2, 3, 1))
        self.debug('perturbed.%d.png' % self.epoch,
                   batch_perturbed_images.permute(0, 2, 3, 1))
        self.debug('perturbation.%d.png' % self.epoch,
                   batch_perturbations.permute(0, 2, 3, 1),
                   cmap='seismic')
    def compute_local_normalized_pca(self, inclusive):
        """
        Compute PCA.
        """

        success = numpy.logical_and(self.success >= 0, self.accuracy)
        log('[Detection] %d valid attacked samples' % numpy.sum(success))

        nearest_neighbor_images = self.nearest_neighbor_images.reshape(self.nearest_neighbor_images.shape[0], -1)
        nearest_neighbor_images = nearest_neighbor_images[:self.args.n_fit]

        perturbations = self.perturbations.reshape(self.perturbations.shape[0], -1)
        test_images = self.test_images.reshape(self.test_images.shape[0], -1)
        pure_perturbations = perturbations - test_images

        nearest_neighbor_images_norms = numpy.linalg.norm(nearest_neighbor_images, ord=2, axis=1)
        perturbations_norms = numpy.linalg.norm(perturbations, ord=2, axis=1)
        test_images_norms = numpy.linalg.norm(test_images, ord=2, axis=1)
        pure_perturbations_norms = numpy.linalg.norm(pure_perturbations, ord=2, axis=1)

        success = numpy.logical_and(numpy.logical_and(self.success >= 0, self.accuracy), pure_perturbations_norms > 1e-4)
        log('[Detection] %d valid attacked samples' % numpy.sum(success))

        perturbations_norms = perturbations_norms[success]
        test_images_norms = test_images_norms[success]
        pure_perturbations_norms = pure_perturbations_norms[success]

        perturbations = perturbations[success]
        test_images = test_images[success]
        pure_perturbations = pure_perturbations[success]

        nearest_neighbor_images /= numpy.repeat(nearest_neighbor_images_norms.reshape(-1, 1), nearest_neighbor_images.shape[1], axis=1)
        perturbations /= numpy.repeat(perturbations_norms.reshape(-1, 1), perturbations.shape[1], axis=1)
        test_images /= numpy.repeat(test_images_norms.reshape(-1, 1), test_images.shape[1], axis=1)
        pure_perturbations /= numpy.repeat(pure_perturbations_norms.reshape(-1, 1), pure_perturbations.shape[1], axis=1)

        nearest_neighbors_indices = self.compute_nearest_neighbors(perturbations)

        self.distances['true'] = numpy.zeros((numpy.sum(success)))
        self.distances['test'] = numpy.zeros((numpy.sum(success)))
        self.distances['perturbation'] = numpy.zeros((numpy.sum(success)))

        self.angles['true'] = numpy.zeros((numpy.sum(success)))
        self.angles['test'] = numpy.zeros((numpy.sum(success)))
        self.angles['perturbation'] = numpy.zeros((numpy.sum(success)))

        for n in range(pure_perturbations.shape[0]):
            nearest_neighbors = nearest_neighbor_images[nearest_neighbors_indices[n, :]]

            if inclusive:
                nearest_neighbors = numpy.concatenate((nearest_neighbors, test_images[n].reshape(1, -1)), axis=0)

            pca = sklearn.decomposition.IncrementalPCA(n_components=self.args.n_pca)
            pca.fit(nearest_neighbors)

            reconstructed_test_images = pca.inverse_transform(pca.transform(test_images[n].reshape(1, -1)))
            reconstructed_perturbations = pca.inverse_transform(pca.transform(perturbations[n].reshape(1, -1)))
            reconstructed_pure_perturbations = pca.inverse_transform(pca.transform(pure_perturbations[n].reshape(1, -1)))

            self.distances['test'][n] = numpy.average(numpy.multiply(reconstructed_test_images - test_images[n], reconstructed_test_images - test_images[n]), axis=1)
            self.distances['perturbation'][n] = numpy.average(numpy.multiply(reconstructed_perturbations - perturbations[n], reconstructed_perturbations - perturbations[n]), axis=1)
            self.distances['true'][n] = numpy.average(numpy.multiply(reconstructed_pure_perturbations - pure_perturbations[n], reconstructed_pure_perturbations - pure_perturbations[n]), axis=1)

            self.angles['test'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_test_images.T, test_images[n].T))
            self.angles['perturbation'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_perturbations.T, perturbations[n].T))
            self.angles['true'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_pure_perturbations.T, pure_perturbations[n].T))

            log('[Detection] %d: true distance=%g angle=%g' % (n, self.distances['true'][n], self.angles['true'][n]))
            log('[Detection] %d: perturbation distance=%g angle=%g' % (n, self.distances['perturbation'][n], self.angles['perturbation'][n]))
            log('[Detection] %d: test distance=%g angle=%g' % (n, self.distances['test'][n], self.angles['test'][n]))
    def load_data(self):
        """
        Load data and model.
        """

        with logw('[Detection] read %s' % self.args.train_images_file):
            self.nearest_neighbor_images = utils.read_hdf5(self.args.train_images_file)
            assert len(self.nearest_neighbor_images.shape) == 3

        with logw('[Detection] read %s' % self.args.test_images_file):
            self.test_images = utils.read_hdf5(self.args.test_images_file)
            if len(self.test_images.shape) < 4:
                self.test_images = numpy.expand_dims(self.test_images, axis=3)

        with logw('[Detection] read %s' % self.args.perturbations_file):
            self.perturbations = utils.read_hdf5(self.args.perturbations_file)
            assert len(self.perturbations.shape) == 4

        with logw('[Detection] read %s' % self.args.success_file):
            self.success = utils.read_hdf5(self.args.success_file)

        with logw('[Detection] read %s' % self.args.accuracy_file):
            self.accuracy = utils.read_hdf5(self.args.accuracy_file)

        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        num_attempts = self.perturbations.shape[1]
        self.test_images = self.test_images[:self.perturbations.shape[0]]
        self.train_images = self.nearest_neighbor_images[:self.perturbations.shape[0]]
        self.accuracy = self.accuracy[:self.perturbations.shape[0]]

        self.perturbations = self.perturbations.reshape((self.perturbations.shape[0]*self.perturbations.shape[1], self.perturbations.shape[2], self.perturbations.shape[3]))
        self.success = numpy.swapaxes(self.success, 0, 1)
        self.success = self.success.reshape((self.success.shape[0]*self.success.shape[1]))

        self.accuracy = numpy.repeat(self.accuracy, num_attempts, axis=0)
        self.test_images = numpy.repeat(self.test_images, num_attempts, axis=0)
        self.train_images = numpy.repeat(self.train_images, num_attempts, axis=0)

        max_samples = self.args.max_samples
        self.success = self.success[:max_samples]
        self.accuracy = self.accuracy[:max_samples]
        self.perturbations = self.perturbations[:max_samples]
        self.test_images = self.test_images[:max_samples]
        self.train_images = self.train_images[:max_samples]

        if self.args.mode == 'true':
            assert self.args.database_file
            assert self.args.test_codes_file
            assert self.args.test_theta_file

            self.test_codes = utils.read_hdf5(self.args.test_codes_file)
            log('[Detection] read %s' % self.args.test_codes_file)

            self.test_theta = utils.read_hdf5(self.args.test_theta_file)
            log('[Detection] read %s' % self.args.test_theta_file)

            self.test_codes = self.test_codes[:self.perturbations.shape[0]]
            self.test_theta = self.test_theta[:self.perturbations.shape[0]]

            self.test_codes = numpy.repeat(self.test_codes, num_attempts, axis=0)
            self.test_theta = numpy.repeat(self.test_theta, num_attempts, axis=0)

            self.test_codes = self.test_codes[:max_samples]
            self.test_theta = self.test_theta[:max_samples]

            database = utils.read_hdf5(self.args.database_file)
            log('[Detection] read %s' % self.args.database_file)

            self.N_font = database.shape[0]
            self.N_class = database.shape[1]
            self.N_theta = self.test_theta.shape[1]

            database = database.reshape((database.shape[0]*database.shape[1], database.shape[2], database.shape[3]))
            database = torch.from_numpy(database)
            if self.args.use_gpu:
                database = database.cuda()
            database = torch.autograd.Variable(database, False)

            self.model = models.AlternativeOneHotDecoder(database, self.N_font, self.N_class, self.N_theta)
            self.model.eval()
            log('[Detection] initialized decoder')
        elif self.args.mode == 'appr':
            assert self.args.decoder_files
            assert self.args.test_codes_file
            assert self.args.test_theta_file

            self.test_codes = utils.read_hdf5(self.args.test_codes_file)
            log('[Detection] read %s' % self.args.test_codes_file)

            self.test_theta = utils.read_hdf5(self.args.test_theta_file)
            log('[Detection] read %s' % self.args.test_theta_file)

            self.test_codes = self.test_codes[:self.perturbations.shape[0]]
            self.test_theta = self.test_theta[:self.perturbations.shape[0]]

            self.test_codes = numpy.repeat(self.test_codes, num_attempts, axis=0)
            self.test_theta = numpy.repeat(self.test_theta, num_attempts, axis=0)

            self.test_codes = self.test_codes[:max_samples]
            self.test_theta = self.test_theta[:max_samples]

            assert self.args.decoder_files
            decoder_files = self.args.decoder_files.split(',')
            for decoder_file in decoder_files:
                assert os.path.exists(decoder_file), 'could not find %s' % decoder_file

            resolution = [1 if len(self.test_images.shape) <= 3 else self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]]
            decoder_units = list(map(int, self.args.decoder_units.split(',')))

            if len(decoder_files) > 1:
                log('[Detection] loading multiple decoders')
                decoders = []
                for i in range(len(decoder_files)):
                    decoder = models.LearnedDecoder(self.args.latent_space_size,
                                                    resolution=resolution,
                                                    architecture=self.args.decoder_architecture,
                                                    start_channels=self.args.decoder_channels,
                                                    activation=self.args.decoder_activation,
                                                    batch_normalization=not self.args.decoder_no_batch_normalization,
                                                    units=decoder_units)

                    state = State.load(decoder_files[i])
                    decoder.load_state_dict(state.model)
                    if self.args.use_gpu and not cuda.is_cuda(decoder):
                        decoder = decoder.cuda()
                    decoders.append(decoder)

                    decoder.eval()
                    log('[Detection] loaded %s' % decoder_files[i])
                self.model = models.SelectiveDecoder(decoders, resolution=resolution)
            else:
                log('[Detection] loading one decoder')
                decoder = models.LearnedDecoder(self.args.latent_space_size,
                                                resolution=resolution,
                                                architecture=self.args.decoder_architecture,
                                                start_channels=self.args.decoder_channels,
                                                activation=self.args.decoder_activation,
                                                batch_normalization=not self.args.decoder_no_batch_normalization,
                                                units=decoder_units)

                state = State.load(decoder_files[0])
                decoder.load_state_dict(state.model)
                if self.args.use_gpu and not cuda.is_cuda(decoder):
                    decoder = decoder.cuda()
                decoder.eval()
                log('[Detection] read decoder')

                self.model = decoder
예제 #16
0
    def load_data(self):
        """
        Load data and model.
        """

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(numpy.float32)
        log('[Testing] read %s' % self.args.test_images_file)

        # For handling both color and gray images.
        if len(self.test_images.shape) < 4:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
            log('[Testing] no color images, adjusted size')
        self.resolution = self.test_images.shape[2]
        log('[Testing] resolution %d' % self.resolution)

        self.train_images = utils.read_hdf5(self.args.train_images_file).astype(numpy.float32)
        # !
        self.train_images = self.train_images.reshape((self.train_images.shape[0], -1))
        log('[Testing] read %s' % self.args.train_images_file)

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(numpy.float32)
        log('[Testing] read %s' % self.args.test_theta_file)

        self.train_theta = utils.read_hdf5(self.args.train_theta_file).astype(numpy.float32)
        log('[Testing] read %s' % self.args.train_theta_file)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(numpy.int)
        self.test_codes = self.test_codes[:, self.args.label_index]
        self.N_class = numpy.max(self.test_codes) + 1
        log('[Testing] read %s' % self.args.test_codes_file)

        self.accuracy = utils.read_hdf5(self.args.accuracy_file)
        log('[Testing] read %s' % self.args.accuracy_file)

        self.perturbations = utils.read_hdf5(self.args.perturbations_file).astype(numpy.float32)
        self.N_attempts = self.perturbations.shape[0]
        assert not numpy.any(self.perturbations != self.perturbations), 'NaN in perturbations'

        # First, repeat relevant data.
        self.perturbation_theta = numpy.repeat(self.test_theta[:self.perturbations.shape[1]], self.N_attempts, axis=0)
        self.perturbation_codes = numpy.repeat(self.test_codes[:self.perturbations.shape[1]], self.N_attempts, axis=0)
        self.perturbation_codes = numpy.squeeze(self.perturbation_codes)
        self.accuracy = numpy.repeat(self.accuracy[:self.perturbations.shape[1]], self.N_attempts, axis=0)

        # Then, reshape the perturbations!
        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        self.perturbations = self.perturbations.reshape((self.perturbations.shape[0] * self.perturbations.shape[1], -1))
        log('[Testing] read %s' % self.args.perturbations_file)

        self.success = utils.read_hdf5(self.args.success_file)
        self.success = numpy.swapaxes(self.success, 0, 1)
        self.success = self.success.reshape((self.success.shape[0] * self.success.shape[1]))
        log('[Testing] read %s' % self.args.success_file)

        assert self.args.decoder_files
        decoder_files = self.args.decoder_files.split(',')
        for decoder_file in decoder_files:
            assert os.path.exists(decoder_file), 'could not find %s' % decoder_file

        log('[Testing] using %d input channels' % self.test_images.shape[3])
        decoder_units = list(map(int, self.args.decoder_units.split(',')))

        if len(decoder_files) > 1:
            log('[Testing] loading multiple decoders')
            decoders = []
            for i in range(len(decoder_files)):
                decoder = models.LearnedDecoder(self.args.latent_space_size, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]),
                                                architecture=self.args.decoder_architecture,
                                                start_channels=self.args.decoder_channels,
                                                activation=self.args.decoder_activation,
                                                batch_normalization=not self.args.decoder_no_batch_normalization,
                                                units=decoder_units)

                state = State.load(decoder_files[i])
                decoder.load_state_dict(state.model)
                if self.args.use_gpu and not cuda.is_cuda(decoder):
                    decoder = decoder.cuda()
                decoders.append(decoder)

                decoder.eval()
                log('[Testing] loaded %s' % decoder_files[i])
            self.model = models.SelectiveDecoder(decoders, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]))
        else:
            log('[Testing] loading one decoder')
            decoder = models.LearnedDecoder(self.args.latent_space_size, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]),
                                            architecture=self.args.decoder_architecture,
                                            start_channels=self.args.decoder_channels,
                                            activation=self.args.decoder_activation,
                                            batch_normalization=not self.args.decoder_no_batch_normalization,
                                            units=decoder_units)

            state = State.load(decoder_files[0])
            decoder.load_state_dict(state.model)
            if self.args.use_gpu and not cuda.is_cuda(decoder):
                decoder = decoder.cuda()
            decoder.eval()
            log('[Testing] read decoder')

            self.model = decoder