Exemple #1
0
def get_frame_saliency(classifier, inner_config=None, **kwargs):
    from art.attacks.evasion import FrameSaliencyAttack
    from armory.utils import config_loading

    attacker = config_loading.load_attack(inner_config, classifier)
    attack = FrameSaliencyAttack(classifier, attacker, **kwargs)
    return attack
Exemple #2
0
 def __init__(self, estimator, **kwargs):
     self._check_kwargs(kwargs)
     self.targeted = kwargs.get("targeted", False)
     self.attacks = []
     for inner_config in kwargs["inner_configs"]:
         inner_config["kwargs"]["targeted"] = self.targeted
         self.attacks.append(load_attack(inner_config, estimator))
     kwargs.pop("inner_configs")
     super().__init__(estimator=estimator, attacks=self.attacks, **kwargs)
    def _evaluate(self, config: dict) -> dict:
        """
        Evaluate the config and return a results dict
        """

        model_config = config["model"]
        classifier, preprocessing_fn = load_model(model_config)

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(f"Applying internal {defense_type} defense to classifier")
            classifier = load_defense_internal(config["defense"], classifier)

        if model_config["fit"]:
            classifier.set_learning_phase(True)
            logger.info(
                f"Fitting model {model_config['module']}.{model_config['name']}..."
            )
            train_epochs = config["model"]["fit_kwargs"]["nb_epochs"]
            batch_size = config["dataset"]["batch_size"]

            logger.info(f"Loading train dataset {config['dataset']['name']}...")
            train_data = load_dataset(
                config["dataset"],
                epochs=train_epochs,
                split_type="train",
                preprocessing_fn=preprocessing_fn,
            )

            if defense_type == "Trainer":
                logger.info(f"Training with {defense_type} defense...")
                defense = load_defense_wrapper(config["defense"], classifier)
            else:
                logger.info(f"Fitting classifier on clean train dataset...")

            for epoch in range(train_epochs):
                classifier.set_learning_phase(True)

                for _ in tqdm(
                    range(train_data.batches_per_epoch),
                    desc=f"Epoch: {epoch}/{train_epochs}",
                ):
                    x, y = train_data.get_batch()
                    # x_trains consists of one or more videos, each represented as an
                    # ndarray of shape (n_stacks, 3, 16, 112, 112).
                    # To train, randomly sample a batch of stacks
                    x = np.stack([x_i[np.random.randint(x_i.shape[0])] for x_i in x])
                    if defense_type == "Trainer":
                        defense.fit(x, y, batch_size=batch_size, nb_epochs=1)
                    else:
                        classifier.fit(x, y, batch_size=batch_size, nb_epochs=1)

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(f"Transforming classifier with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], classifier)
            classifier = defense()

        classifier.set_learning_phase(False)

        # Evaluate the ART classifier on benign test examples
        logger.info(f"Loading test dataset {config['dataset']['name']}...")
        test_data_generator = load_dataset(
            config["dataset"],
            epochs=1,
            split_type="test",
            preprocessing_fn=preprocessing_fn,
        )

        logger.info("Running inference on benign examples...")
        metrics_logger = metrics.MetricsLogger.from_config(config["metric"])

        for x_batch, y_batch in tqdm(test_data_generator, desc="Benign"):
            for x, y in zip(x_batch, y_batch):
                # combine predictions across all stacks
                y_pred = np.mean(classifier.predict(x), axis=0)
                metrics_logger.update_task(y, y_pred)
        metrics_logger.log_task()

        # Evaluate the ART classifier on adversarial test examples
        logger.info("Generating / testing adversarial examples...")

        attack = load_attack(config["attack"], classifier)
        test_data_generator = load_dataset(
            config["dataset"],
            epochs=1,
            split_type="test",
            preprocessing_fn=preprocessing_fn,
        )
        for x_batch, y_batch in tqdm(test_data_generator, desc="Attack"):
            for x, y in zip(x_batch, y_batch):
                # each x is of shape (n_stack, 3, 16, 112, 112)
                #    n_stack varies
                attack.set_params(batch_size=x.shape[0])
                x_adv = attack.generate(x=x)
                # combine predictions across all stacks
                y_pred = np.mean(classifier.predict(x), axis=0)
                metrics_logger.update_task(y, y_pred, adversarial=True)
                metrics_logger.update_perturbation([x], [x_adv])
        metrics_logger.log_task(adversarial=True)
        return metrics_logger.results()
    def _evaluate(
        self,
        config: dict,
        num_eval_batches: Optional[int],
        skip_benign: Optional[bool],
        skip_attack: Optional[bool],
        skip_misclassified: Optional[bool],
    ) -> dict:
        """
        Evaluate the config and return a results dict
        """
        model_config = config["model"]
        estimator, _ = load_model(model_config)

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(
                f"Applying internal {defense_type} defense to estimator")
            estimator = load_defense_internal(config["defense"], estimator)

        if model_config["fit"]:
            try:
                logger.info(
                    f"Fitting model {model_config['module']}.{model_config['name']}..."
                )
                fit_kwargs = model_config["fit_kwargs"]

                logger.info(
                    f"Loading train dataset {config['dataset']['name']}...")
                train_data = load_dataset(
                    config["dataset"],
                    epochs=fit_kwargs["nb_epochs"],
                    split=config["dataset"].get("train_split", "train"),
                    shuffle_files=True,
                )
                if defense_type == "Trainer":
                    logger.info(f"Training with {defense_type} defense...")
                    defense = load_defense_wrapper(config["defense"],
                                                   estimator)
                    defense.fit_generator(train_data, **fit_kwargs)
                else:
                    logger.info("Fitting estimator on clean train dataset...")
                    estimator.fit_generator(train_data, **fit_kwargs)
            except NotImplementedError:
                raise NotImplementedError(
                    "Training has not yet been implemented for object detectors"
                )

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(
                f"Transforming estimator with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], estimator)
            estimator = defense()

        attack_config = config["attack"]
        attack_type = attack_config.get("type")

        targeted = bool(attack_config.get("kwargs", {}).get("targeted"))
        metrics_logger = metrics.MetricsLogger.from_config(
            config["metric"],
            skip_benign=skip_benign,
            skip_attack=skip_attack,
            targeted=targeted,
        )

        eval_split = config["dataset"].get("eval_split", "test")
        if skip_benign:
            logger.info("Skipping benign classification...")
        else:
            # Evaluate the ART estimator on benign test examples
            logger.info(f"Loading test dataset {config['dataset']['name']}...")
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split=eval_split,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )

            logger.info("Running inference on benign examples...")
            for x, y in tqdm(test_data, desc="Benign"):
                # Ensure that input sample isn't overwritten by estimator
                x.flags.writeable = False
                with metrics.resource_context(
                        name="Inference",
                        profiler=config["metric"].get("profiler_type"),
                        computational_resource_dict=metrics_logger.
                        computational_resource_dict,
                ):
                    y_pred = estimator.predict(x)
                metrics_logger.update_task(y, y_pred)
            metrics_logger.log_task()

        if skip_attack:
            logger.info("Skipping attack generation...")
            return metrics_logger.results()

        # Evaluate the ART estimator on adversarial test examples
        logger.info("Generating or loading / testing adversarial examples...")

        if skip_misclassified:
            acc_task_idx = [i.name for i in metrics_logger.tasks
                            ].index("categorical_accuracy")
            benign_acc = metrics_logger.tasks[acc_task_idx].values()

        if targeted and attack_config.get("use_label"):
            raise ValueError("Targeted attacks cannot have 'use_label'")
        if attack_type == "preloaded":
            preloaded_split = attack_config.get("kwargs", {}).get(
                "split", "adversarial")
            test_data = load_adversarial_dataset(
                attack_config,
                epochs=1,
                split=preloaded_split,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
        else:
            attack = load_attack(attack_config, estimator)
            if targeted != getattr(attack, "targeted", False):
                logger.warning(
                    f"targeted config {targeted} != attack field {getattr(attack, 'targeted', False)}"
                )
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split=eval_split,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
            if targeted:
                label_targeter = load_label_targeter(
                    attack_config["targeted_labels"])

        export_samples = config["scenario"].get("export_samples")
        if export_samples is not None and export_samples > 0:
            sample_exporter = SampleExporter(self.scenario_output_dir,
                                             test_data.context, export_samples)
        else:
            sample_exporter = None

        for batch_idx, (x, y) in enumerate(tqdm(test_data, desc="Attack")):
            with metrics.resource_context(
                    name="Attack",
                    profiler=config["metric"].get("profiler_type"),
                    computational_resource_dict=metrics_logger.
                    computational_resource_dict,
            ):
                if attack_type == "preloaded":
                    if len(x) == 2:
                        x, x_adv = x
                    else:
                        x_adv = x
                    if targeted:
                        y, y_target = y
                else:
                    generate_kwargs = deepcopy(
                        attack_config.get("generate_kwargs", {}))
                    # Temporary workaround for ART code requirement of ndarray mask
                    if "mask" in generate_kwargs:
                        generate_kwargs["mask"] = np.array(
                            generate_kwargs["mask"])
                    if attack_config.get("use_label"):
                        generate_kwargs["y"] = y
                    elif targeted:
                        y_target = label_targeter.generate(y)
                        generate_kwargs["y"] = y_target

                    if skip_misclassified and benign_acc[batch_idx] == 0:
                        x_adv = x
                    else:
                        x_adv = attack.generate(x=x, **generate_kwargs)

            # Ensure that input sample isn't overwritten by estimator
            x_adv.flags.writeable = False
            y_pred_adv = estimator.predict(x_adv)
            metrics_logger.update_task(y, y_pred_adv, adversarial=True)
            if targeted:
                metrics_logger.update_task(y_target,
                                           y_pred_adv,
                                           adversarial=True,
                                           targeted=True)
            metrics_logger.update_perturbation(x, x_adv)
            if sample_exporter is not None:
                sample_exporter.export(x, x_adv, y, y_pred_adv)
        metrics_logger.log_task(adversarial=True)
        if targeted:
            metrics_logger.log_task(adversarial=True, targeted=True)
        return metrics_logger.results()
    def _evaluate(self, config: dict, num_eval_batches: Optional[int],
                  skip_benign: Optional[bool]) -> dict:
        """
        Evaluate the config and return a results dict
        """

        model_config = config["model"]
        classifier, preprocessing_fn = load_model(model_config)

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(
                f"Applying internal {defense_type} defense to classifier")
            classifier = load_defense_internal(config["defense"], classifier)

        if model_config["fit"]:
            classifier.set_learning_phase(True)
            logger.info(
                f"Fitting model {model_config['module']}.{model_config['name']}..."
            )
            train_epochs = config["model"]["fit_kwargs"]["nb_epochs"]
            batch_size = config["dataset"]["batch_size"]

            logger.info(
                f"Loading train dataset {config['dataset']['name']}...")
            train_data = load_dataset(
                config["dataset"],
                epochs=train_epochs,
                split_type="train",
                preprocessing_fn=preprocessing_fn,
                shuffle_files=True,
            )

            if defense_type == "Trainer":
                logger.info(f"Training with {defense_type} defense...")
                defense = load_defense_wrapper(config["defense"], classifier)
            else:
                logger.info("Fitting classifier on clean train dataset...")

            for epoch in range(train_epochs):
                classifier.set_learning_phase(True)

                for _ in tqdm(
                        range(train_data.batches_per_epoch),
                        desc=f"Epoch: {epoch}/{train_epochs}",
                ):
                    x, y = train_data.get_batch()
                    # x_trains consists of one or more videos, each represented as an
                    # ndarray of shape (n_stacks, 3, 16, 112, 112).
                    # To train, randomly sample a batch of stacks
                    x = np.stack(
                        [x_i[np.random.randint(x_i.shape[0])] for x_i in x])
                    if defense_type == "Trainer":
                        defense.fit(x, y, batch_size=batch_size, nb_epochs=1)
                    else:
                        classifier.fit(x,
                                       y,
                                       batch_size=batch_size,
                                       nb_epochs=1)

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(
                f"Transforming classifier with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], classifier)
            classifier = defense()

        classifier.set_learning_phase(False)

        metrics_logger = metrics.MetricsLogger.from_config(
            config["metric"], skip_benign=skip_benign)
        if skip_benign:
            logger.info("Skipping benign classification...")
        else:
            # Evaluate the ART classifier on benign test examples
            logger.info(f"Loading test dataset {config['dataset']['name']}...")
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split_type="test",
                preprocessing_fn=preprocessing_fn,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )

            logger.info("Running inference on benign examples...")

            for x_batch, y_batch in tqdm(test_data, desc="Benign"):
                for x, y in zip(x_batch, y_batch):
                    # combine predictions across all stacks
                    with metrics.resource_context(
                            name="Inference",
                            profiler=config["metric"].get("profiler_type"),
                            computational_resource_dict=metrics_logger.
                            computational_resource_dict,
                    ):
                        y_pred = np.mean(classifier.predict(x, batch_size=1),
                                         axis=0)
                    metrics_logger.update_task(y, y_pred)
            metrics_logger.log_task()

        # Evaluate the ART classifier on adversarial test examples
        logger.info("Generating or loading / testing adversarial examples...")

        attack_config = config["attack"]
        attack_type = attack_config.get("type")
        targeted = bool(attack_config.get("kwargs", {}).get("targeted"))
        if targeted and attack_config.get("use_label"):
            raise ValueError("Targeted attacks cannot have 'use_label'")
        if attack_type == "preloaded":
            test_data = load_adversarial_dataset(
                attack_config,
                epochs=1,
                split_type="adversarial",
                preprocessing_fn=preprocessing_fn,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
        else:
            attack = load_attack(attack_config, classifier)
            if targeted != getattr(attack, "targeted", False):
                logger.warning(
                    f"targeted config {targeted} != attack field {getattr(attack, 'targeted', False)}"
                )
            attack.set_params(batch_size=1)
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split_type="test",
                preprocessing_fn=preprocessing_fn,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
            if targeted:
                label_targeter = load_label_targeter(
                    attack_config["targeted_labels"])
        for x_batch, y_batch in tqdm(test_data, desc="Attack"):
            if attack_type == "preloaded":
                x_batch = list(zip(*x_batch))
                if targeted:
                    y_batch = list(zip(*y_batch))
            for x, y in zip(x_batch, y_batch):
                with metrics.resource_context(
                        name="Attack",
                        profiler=config["metric"].get("profiler_type"),
                        computational_resource_dict=metrics_logger.
                        computational_resource_dict,
                ):
                    if attack_type == "preloaded":
                        x, x_adv = x
                        if targeted:
                            y, y_target = y
                    else:
                        # each x is of shape (n_stack, 3, 16, 112, 112)
                        #    n_stack varies
                        if attack_config.get("use_label"):
                            # expansion required due to preprocessing
                            y_input = np.repeat(y, x.shape[0])
                            x_adv = attack.generate(x=x, y=y_input)
                        elif targeted:
                            y_target = label_targeter.generate(y)
                            y_input = np.repeat(y_target, x.shape[0])
                            x_adv = attack.generate(x=x, y=y_input)
                        else:
                            x_adv = attack.generate(x=x)
                # combine predictions across all stacks
                y_pred_adv = np.mean(classifier.predict(x_adv, batch_size=1),
                                     axis=0)
                if targeted:
                    metrics_logger.update_task(y_target,
                                               y_pred_adv,
                                               adversarial=True)
                else:
                    metrics_logger.update_task(y, y_pred_adv, adversarial=True)
                metrics_logger.update_perturbation([x], [x_adv])
        metrics_logger.log_task(adversarial=True, targeted=targeted)
        return metrics_logger.results()
Exemple #6
0
    def _evaluate(self, config: dict) -> dict:
        """
        Evaluate the config and return a results dict
        """

        model_config = config["model"]
        classifier, preprocessing_fn = load_model(model_config)

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(f"Applying internal {defense_type} defense to classifier")
            classifier = load_defense_internal(config["defense"], classifier)

        if model_config["fit"]:
            classifier.set_learning_phase(True)
            logger.info(
                f"Fitting model {model_config['module']}.{model_config['name']}..."
            )
            fit_kwargs = model_config["fit_kwargs"]

            logger.info(f"Loading train dataset {config['dataset']['name']}...")
            train_data = load_dataset(
                config["dataset"],
                epochs=fit_kwargs["nb_epochs"],
                split_type="train",
                preprocessing_fn=preprocessing_fn,
            )
            if defense_type == "Trainer":
                logger.info(f"Training with {defense_type} defense...")
                defense = load_defense_wrapper(config["defense"], classifier)
                defense.fit_generator(train_data, **fit_kwargs)
            else:
                logger.info(f"Fitting classifier on clean train dataset...")
                classifier.fit_generator(train_data, **fit_kwargs)

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(f"Transforming classifier with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], classifier)
            classifier = defense()

        classifier.set_learning_phase(False)

        # Evaluate the ART classifier on benign test examples
        logger.info(f"Loading test dataset {config['dataset']['name']}...")
        test_data_generator = load_dataset(
            config["dataset"],
            epochs=1,
            split_type="test",
            preprocessing_fn=preprocessing_fn,
        )
        logger.info("Running inference on benign examples...")
        metrics_logger = metrics.MetricsLogger.from_config(config["metric"])

        for x, y in tqdm(test_data_generator, desc="Benign"):
            y_pred = classifier.predict(x)
            metrics_logger.update_task(y, y_pred)
        metrics_logger.log_task()

        # Evaluate the ART classifier on adversarial test examples
        logger.info("Generating / testing adversarial examples...")

        attack = load_attack(config["attack"], classifier)
        test_data_generator = load_dataset(
            config["dataset"],
            epochs=1,
            split_type="test",
            preprocessing_fn=preprocessing_fn,
        )
        for x, y in tqdm(test_data_generator, desc="Attack"):
            x_adv = attack.generate(x=x)
            y_pred_adv = classifier.predict(x_adv)
            metrics_logger.update_task(y, y_pred_adv, adversarial=True)
            metrics_logger.update_perturbation(x, x_adv)
        metrics_logger.log_task(adversarial=True)
        return metrics_logger.results()
Exemple #7
0
    def _evaluate(
        self,
        config: dict,
        num_eval_batches: Optional[int],
        skip_benign: Optional[bool],
        skip_attack: Optional[bool],
    ) -> dict:
        """
        Evaluate the config and return a results dict
        """
        if config["dataset"]["batch_size"] != 1:
            raise ValueError(
                "batch_size must be 1 for evaluation, due to variable length inputs.\n"
                "    If training, set config['model']['fit_kwargs']['fit_batch_size']"
            )

        model_config = config["model"]
        classifier, fit_preprocessing_fn = load_model(model_config)

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(f"Applying internal {defense_type} defense to classifier")
            classifier = load_defense_internal(config["defense"], classifier)

        if model_config["fit"]:
            classifier.set_learning_phase(True)
            logger.info(
                f"Fitting model {model_config['module']}.{model_config['name']}..."
            )
            fit_kwargs = model_config["fit_kwargs"]

            logger.info(f"Loading train dataset {config['dataset']['name']}...")
            batch_size = config["dataset"].pop("batch_size")
            config["dataset"]["batch_size"] = fit_kwargs.get(
                "fit_batch_size", batch_size
            )
            train_data = load_dataset(
                config["dataset"],
                epochs=fit_kwargs["nb_epochs"],
                split=config["dataset"].get("train_split", "train"),
                preprocessing_fn=fit_preprocessing_fn,
                shuffle_files=True,
            )
            config["dataset"]["batch_size"] = batch_size
            if defense_type == "Trainer":
                logger.info(f"Training with {defense_type} defense...")
                defense = load_defense_wrapper(config["defense"], classifier)
                defense.fit_generator(train_data, **fit_kwargs)
            else:
                logger.info("Fitting classifier on clean train dataset...")
                classifier.fit_generator(train_data, **fit_kwargs)

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(f"Transforming classifier with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], classifier)
            classifier = defense()

        classifier.set_learning_phase(False)

        attack_config = config["attack"]
        attack_type = attack_config.get("type")

        targeted = bool(attack_config.get("kwargs", {}).get("targeted"))
        metrics_logger = metrics.MetricsLogger.from_config(
            config["metric"],
            skip_benign=skip_benign,
            skip_attack=skip_attack,
            targeted=targeted,
        )

        if config["dataset"]["batch_size"] != 1:
            logger.warning("Evaluation batch_size != 1 may not be supported.")

        eval_split = config["dataset"].get("eval_split", "test")
        if skip_benign:
            logger.info("Skipping benign classification...")
        else:
            # Evaluate the ART classifier on benign test examples
            logger.info(f"Loading test dataset {config['dataset']['name']}...")
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split=eval_split,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )

            logger.info("Running inference on benign examples...")
            for x, y in tqdm(test_data, desc="Benign"):
                # Ensure that input sample isn't overwritten by classifier
                x.flags.writeable = False
                with metrics.resource_context(
                    name="Inference",
                    profiler=config["metric"].get("profiler_type"),
                    computational_resource_dict=metrics_logger.computational_resource_dict,
                ):
                    y_pred = classifier.predict(x)
                metrics_logger.update_task(y, y_pred)
            metrics_logger.log_task()

        if skip_attack:
            logger.info("Skipping attack generation...")
            return metrics_logger.results()

        # Evaluate the ART classifier on adversarial test examples
        logger.info("Generating or loading / testing adversarial examples...")

        if targeted and attack_config.get("use_label"):
            raise ValueError("Targeted attacks cannot have 'use_label'")
        if attack_type == "preloaded":
            test_data = load_adversarial_dataset(
                attack_config,
                epochs=1,
                split="adversarial",
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
        else:
            attack = load_attack(attack_config, classifier)
            if targeted != getattr(attack, "targeted", False):
                logger.warning(
                    f"targeted config {targeted} != attack field {getattr(attack, 'targeted', False)}"
                )
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split=eval_split,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
            if targeted:
                label_targeter = load_label_targeter(attack_config["targeted_labels"])

        export_samples = config["scenario"].get("export_samples")
        if export_samples is not None and export_samples > 0:
            sample_exporter = SampleExporter(
                self.scenario_output_dir, test_data.context, export_samples
            )
        else:
            sample_exporter = None

        for x, y in tqdm(test_data, desc="Attack"):
            with metrics.resource_context(
                name="Attack",
                profiler=config["metric"].get("profiler_type"),
                computational_resource_dict=metrics_logger.computational_resource_dict,
            ):
                if attack_type == "preloaded":
                    x, x_adv = x
                    if targeted:
                        y, y_target = y
                else:
                    generate_kwargs = deepcopy(attack_config.get("generate_kwargs", {}))
                    if attack_config.get("use_label"):
                        generate_kwargs["y"] = y
                    elif targeted:
                        y_target = label_targeter.generate(y)
                        generate_kwargs["y"] = y_target
                    x_adv = attack.generate(x=x, **generate_kwargs)

            # Ensure that input sample isn't overwritten by classifier
            x_adv.flags.writeable = False
            y_pred_adv = classifier.predict(x_adv)
            metrics_logger.update_task(y, y_pred_adv, adversarial=True)
            if targeted:
                metrics_logger.update_task(
                    y_target, y_pred_adv, adversarial=True, targeted=True
                )
            metrics_logger.update_perturbation(x, x_adv)
            if sample_exporter is not None:
                sample_exporter.export(x, x_adv, y, y_pred_adv)
        metrics_logger.log_task(adversarial=True)
        if targeted:
            metrics_logger.log_task(adversarial=True, targeted=True)
        return metrics_logger.results()
    def _evaluate(
        self,
        config: dict,
        num_eval_batches: Optional[int],
        skip_benign: Optional[bool],
        skip_attack: Optional[bool],
    ) -> dict:
        """
        Evaluate the config and return a results dict
        """

        model_config = config["model"]
        estimator, _ = load_model(model_config)

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(f"Applying internal {defense_type} defense to estimator")
            estimator = load_defense_internal(config["defense"], estimator)

        attack_config = config["attack"]
        attack_channels = attack_config.get("generate_kwargs", {}).get("channels")

        if attack_channels is None:
            if self.attack_modality == "sar":
                logger.info("No mask configured. Attacking all SAR channels")
                attack_channels = range(4)
            elif self.attack_modality == "eo":
                logger.info("No mask configured. Attacking all EO channels")
                attack_channels = range(4, 14)
            elif self.attack_modality == "both":
                logger.info("No mask configured. Attacking all SAR and EO channels")
                attack_channels = range(14)

        else:
            assert isinstance(
                attack_channels, list
            ), "Mask is specified, but incorrect format. Expected list"
            attack_channels = np.array(attack_channels)
            if self.attack_modality == "sar":
                assert np.all(
                    np.logical_and(attack_channels >= 0, attack_channels < 4)
                ), "Selected SAR-only attack modality, but specify non-SAR channels"
            elif self.attack_modality == "eo":
                assert np.all(
                    np.logical_and(attack_channels >= 4, attack_channels < 14)
                ), "Selected EO-only attack modality, but specify non-EO channels"
            elif self.attack_modality == "both":
                assert np.all(
                    np.logical_and(attack_channels >= 0, attack_channels < 14)
                ), "Selected channels are out-of-bounds"

        if model_config["fit"]:
            try:
                estimator.set_learning_phase(True)
                logger.info(
                    f"Fitting model {model_config['module']}.{model_config['name']}..."
                )
                fit_kwargs = model_config["fit_kwargs"]

                logger.info(f"Loading train dataset {config['dataset']['name']}...")
                train_data = load_dataset(
                    config["dataset"],
                    epochs=fit_kwargs["nb_epochs"],
                    split=config["dataset"].get("train_split", "train"),
                    shuffle_files=True,
                )
                if defense_type == "Trainer":
                    logger.info(f"Training with {defense_type} defense...")
                    defense = load_defense_wrapper(config["defense"], estimator)
                    defense.fit_generator(train_data, **fit_kwargs)
                else:
                    logger.info("Fitting estimator on clean train dataset...")
                    estimator.fit_generator(train_data, **fit_kwargs)
            except NotImplementedError:
                raise NotImplementedError(
                    "Training has not yet been implemented for object detectors"
                )

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(f"Transforming estimator with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], estimator)
            estimator = defense()

        try:
            estimator.set_learning_phase(False)
        except NotImplementedError:
            logger.warning(
                "Unable to set estimator's learning phase. As of ART 1.4.1, "
                "this is not yet supported for object detectors."
            )

        attack_type = attack_config.get("type")
        targeted = bool(attack_config.get("kwargs", {}).get("targeted"))

        performance_metrics = deepcopy(config["metric"])
        performance_metrics.pop("perturbation")
        performance_logger = metrics.MetricsLogger.from_config(
            performance_metrics,
            skip_benign=skip_benign,
            skip_attack=skip_attack,
            targeted=targeted,
        )

        eval_split = config["dataset"].get("eval_split", "test")
        if skip_benign:
            logger.info("Skipping benign classification...")
        else:
            # Evaluate the ART estimator on benign test examples
            logger.info(f"Loading test dataset {config['dataset']['name']}...")
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split=eval_split,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )

            logger.info("Running inference on benign examples...")
            for x, y in tqdm(test_data, desc="Benign"):
                # Ensure that input sample isn't overwritten by estimator
                x.flags.writeable = False
                with metrics.resource_context(
                    name="Inference",
                    profiler=config["metric"].get("profiler_type"),
                    computational_resource_dict=performance_logger.computational_resource_dict,
                ):
                    y_pred = estimator.predict(x)
                performance_logger.update_task(y, y_pred)
            performance_logger.log_task()

        if skip_attack:
            logger.info("Skipping attack generation...")
            return performance_logger.results()

        # Evaluate the ART estimator on adversarial test examples
        logger.info("Generating or loading / testing adversarial examples...")

        perturbation_metrics = deepcopy(config["metric"])
        perturbation_metrics.pop("task")
        if self.attack_modality in ("sar", "both"):
            sar_perturbation_logger = metrics.MetricsLogger.from_config(
                perturbation_metrics,
                skip_benign=True,
                skip_attack=False,
                targeted=targeted,
            )
        else:
            sar_perturbation_logger = None

        if self.attack_modality in ("eo", "both"):
            eo_perturbation_logger = metrics.MetricsLogger.from_config(
                perturbation_metrics,
                skip_benign=True,
                skip_attack=False,
                targeted=targeted,
            )
        else:
            eo_perturbation_logger = None

        if targeted and attack_config.get("use_label"):
            raise ValueError("Targeted attacks cannot have 'use_label'")
        if attack_type == "preloaded":
            test_data = load_adversarial_dataset(
                attack_config,
                epochs=1,
                split="adversarial",
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
        else:
            attack = load_attack(attack_config, estimator)
            if targeted != getattr(attack, "targeted", False):
                logger.warning(
                    f"targeted config {targeted} != attack field {getattr(attack, 'targeted', False)}"
                )
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split=eval_split,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
            if targeted:
                label_targeter = load_label_targeter(attack_config["targeted_labels"])

        export_samples = config["scenario"].get("export_samples")
        if export_samples is not None and export_samples > 0:
            sample_exporter = SampleExporter(
                self.scenario_output_dir, test_data.context, export_samples
            )
        else:
            sample_exporter = None

        for x, y in tqdm(test_data, desc="Attack"):
            with metrics.resource_context(
                name="Attack",
                profiler=config["metric"].get("profiler_type"),
                computational_resource_dict=performance_logger.computational_resource_dict,
            ):
                if attack_type == "preloaded":
                    logger.warning(
                        "Specified preloaded attack. Ignoring `attack_modality` parameter"
                    )
                    if len(x) == 2:
                        x, x_adv = x
                    else:
                        x_adv = x
                    if targeted:
                        y, y_target = y
                else:
                    generate_kwargs = deepcopy(attack_config.get("generate_kwargs", {}))
                    generate_kwargs["mask"] = attack_channels
                    if attack_config.get("use_label"):
                        generate_kwargs["y"] = y
                    elif targeted:
                        y_target = label_targeter.generate(y)
                        generate_kwargs["y"] = y_target
                    x_adv = attack.generate(x=x, **generate_kwargs)

            # Ensure that input sample isn't overwritten by estimator
            x_adv.flags.writeable = False
            y_pred_adv = estimator.predict(x_adv)
            performance_logger.update_task(y, y_pred_adv, adversarial=True)
            if targeted:
                performance_logger.update_task(
                    y_target, y_pred_adv, adversarial=True, targeted=True
                )

            # Update perturbation metrics for SAR/EO separately
            x_sar = np.stack(
                (x[..., 0] + 1j * x[..., 1], x[..., 2] + 1j * x[..., 3]), axis=3
            )
            x_adv_sar = np.stack(
                (
                    x_adv[..., 0] + 1j * x_adv[..., 1],
                    x_adv[..., 2] + 1j * x_adv[..., 3],
                ),
                axis=3,
            )
            x_eo = x[..., 4:]
            x_adv_eo = x_adv[..., 4:]
            if sar_perturbation_logger is not None:
                sar_perturbation_logger.update_perturbation(x_sar, x_adv_sar)
            if eo_perturbation_logger is not None:
                eo_perturbation_logger.update_perturbation(x_eo, x_adv_eo)

            if sample_exporter is not None:
                sample_exporter.export(x, x_adv, y, y_pred_adv)

        performance_logger.log_task(adversarial=True)
        if targeted:
            performance_logger.log_task(adversarial=True, targeted=True)

        # Merge performance, SAR, EO results
        combined_results = performance_logger.results()
        if sar_perturbation_logger is not None:
            combined_results.update(
                {f"sar_{k}": v for k, v in sar_perturbation_logger.results().items()}
            )
        if eo_perturbation_logger is not None:
            combined_results.update(
                {f"eo_{k}": v for k, v in eo_perturbation_logger.results().items()}
            )
        return combined_results
    def _evaluate(self, config: dict, num_eval_batches: Optional[int],
                  skip_benign: Optional[bool]) -> dict:
        """
        Evaluate the config and return a results dict
        """
        model_config = config["model"]
        estimator, fit_preprocessing_fn = load_model(model_config)

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(
                f"Applying internal {defense_type} defense to estimator")
            estimator = load_defense_internal(config["defense"], estimator)

        if model_config["fit"]:
            try:
                estimator.set_learning_phase(True)
            except NotImplementedError:
                logger.exception(
                    "set_learning_phase error; training may not work.")

            logger.info(
                f"Fitting model {model_config['module']}.{model_config['name']}..."
            )
            fit_kwargs = model_config["fit_kwargs"]

            logger.info(
                f"Loading train dataset {config['dataset']['name']}...")
            batch_size = config["dataset"].pop("batch_size")
            config["dataset"]["batch_size"] = fit_kwargs.get(
                "fit_batch_size", batch_size)
            train_data = load_dataset(
                config["dataset"],
                epochs=fit_kwargs["nb_epochs"],
                split_type=config["dataset"].get("train_split",
                                                 "train_clean100"),
                preprocessing_fn=fit_preprocessing_fn,
                shuffle_files=True,
            )
            config["dataset"]["batch_size"] = batch_size
            if defense_type == "Trainer":
                logger.info(f"Training with {defense_type} defense...")
                defense = load_defense_wrapper(config["defense"], estimator)
                defense.fit_generator(train_data, **fit_kwargs)
            else:
                logger.info("Fitting estimator on clean train dataset...")
                estimator.fit_generator(train_data, **fit_kwargs)

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(
                f"Transforming estimator with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], estimator)
            estimator = defense()

        try:
            estimator.set_learning_phase(False)
        except NotImplementedError:
            logger.warning(
                "Unable to set estimator's learning phase. As of ART 1.4.1, "
                "this is not yet supported for speech recognition models.")

        metrics_logger = metrics.MetricsLogger.from_config(
            config["metric"], skip_benign=skip_benign)
        if config["dataset"]["batch_size"] != 1:
            logger.warning("Evaluation batch_size != 1 may not be supported.")

        predict_kwargs = config["model"].get("predict_kwargs", {})
        eval_split = config["dataset"].get("eval_split", "test_clean")
        if skip_benign:
            logger.info("Skipping benign classification...")
        else:
            # Evaluate the ART estimator on benign test examples
            logger.info(f"Loading test dataset {config['dataset']['name']}...")
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split_type=eval_split,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
            logger.info("Running inference on benign examples...")
            for x, y in tqdm(test_data, desc="Benign"):
                # Ensure that input sample isn't overwritten by estimator
                x.flags.writeable = False
                with metrics.resource_context(
                        name="Inference",
                        profiler=config["metric"].get("profiler_type"),
                        computational_resource_dict=metrics_logger.
                        computational_resource_dict,
                ):
                    y_pred = estimator.predict(x, **predict_kwargs)
                metrics_logger.update_task(y, y_pred)
            metrics_logger.log_task()

        # Imperceptible attack still WIP
        if (config.get("adhoc") or {}).get("skip_adversarial"):
            logger.info("Skipping adversarial classification...")
            return metrics_logger.results()

        # Evaluate the ART estimator on adversarial test examples
        logger.info("Generating or loading / testing adversarial examples...")

        attack_config = config["attack"]
        attack_type = attack_config.get("type")

        targeted = bool(attack_config.get("targeted"))
        if attack_type == "preloaded":
            test_data = load_adversarial_dataset(
                attack_config,
                epochs=1,
                split_type="adversarial",
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
        else:
            attack = load_attack(attack_config, estimator)
            if targeted != attack.targeted:
                logger.warning(
                    f"targeted config {targeted} != attack field {attack.targeted}"
                )
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split_type=eval_split,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
            if targeted:
                label_targeter = load_label_targeter(
                    attack_config["targeted_labels"])
        for x, y in tqdm(test_data, desc="Attack"):
            with metrics.resource_context(
                    name="Attack",
                    profiler=config["metric"].get("profiler_type"),
                    computational_resource_dict=metrics_logger.
                    computational_resource_dict,
            ):
                if attack_type == "preloaded":
                    x, x_adv = x
                    if targeted:
                        y, y_target = y
                elif attack_config.get("use_label"):
                    x_adv = attack.generate(x=x, y=y)
                elif targeted:
                    y_target = label_targeter.generate(y)
                    x_adv = attack.generate(x=x, y=y_target)
                else:
                    x_adv = attack.generate(x=x)

            # Ensure that input sample isn't overwritten by estimator
            x_adv.flags.writeable = False
            y_pred_adv = estimator.predict(x_adv, **predict_kwargs)
            metrics_logger.update_task(y, y_pred_adv, adversarial=True)
            metrics_logger.update_perturbation(x, x_adv)
        metrics_logger.log_task(adversarial=True, targeted=True)
        return metrics_logger.results()
Exemple #10
0
    def _evaluate(
        self,
        config: dict,
        num_eval_batches: Optional[int],
        skip_benign: Optional[bool],
        skip_attack: Optional[bool],
        skip_misclassified: Optional[bool],
    ) -> dict:
        """
        Evaluate the config and return a results dict
        """
        if skip_misclassified:
            raise ValueError(
                "skip_misclassified shouldn't be set for ASR scenario")
        model_config = config["model"]
        estimator, fit_preprocessing_fn = load_model(model_config)

        audio_channel_config = config.get("adhoc", {}).get("audio_channel")
        if audio_channel_config is not None:
            logger.info("loading audio channel")
            for k in "delay", "attenuation":
                if k not in audio_channel_config:
                    raise ValueError(f"audio_channel must have key {k}")
            audio_channel = load_audio_channel(**audio_channel_config)
            if estimator.preprocessing_defences:
                estimator.preprocessing_defences.insert(0, audio_channel)
            else:
                estimator.preprocessing_defences = [audio_channel]
            estimator._update_preprocessing_operations()

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(
                f"Applying internal {defense_type} defense to estimator")
            estimator = load_defense_internal(config["defense"], estimator)

        if model_config["fit"]:
            logger.info(
                f"Fitting model {model_config['module']}.{model_config['name']}..."
            )
            fit_kwargs = model_config["fit_kwargs"]

            logger.info(
                f"Loading train dataset {config['dataset']['name']}...")
            batch_size = config["dataset"].pop("batch_size")
            config["dataset"]["batch_size"] = fit_kwargs.get(
                "fit_batch_size", batch_size)
            train_data = load_dataset(
                config["dataset"],
                epochs=fit_kwargs["nb_epochs"],
                split=config["dataset"].get("train_split", "train_clean100"),
                preprocessing_fn=fit_preprocessing_fn,
                shuffle_files=True,
            )
            config["dataset"]["batch_size"] = batch_size
            if defense_type == "Trainer":
                logger.info(f"Training with {defense_type} defense...")
                defense = load_defense_wrapper(config["defense"], estimator)
                defense.fit_generator(train_data, **fit_kwargs)
            else:
                logger.info("Fitting estimator on clean train dataset...")
                estimator.fit_generator(train_data, **fit_kwargs)

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(
                f"Transforming estimator with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], estimator)
            estimator = defense()

        attack_config = config["attack"]
        attack_type = attack_config.get("type")

        targeted = bool(attack_config.get("targeted"))
        metrics_logger = metrics.MetricsLogger.from_config(
            config["metric"],
            skip_benign=skip_benign,
            skip_attack=skip_attack,
            targeted=targeted,
        )

        if config["dataset"]["batch_size"] != 1:
            logger.warning("Evaluation batch_size != 1 may not be supported.")

        predict_kwargs = config["model"].get("predict_kwargs", {})
        eval_split = config["dataset"].get("eval_split", "test_clean")
        if skip_benign:
            logger.info("Skipping benign classification...")
        else:
            # Evaluate the ART estimator on benign test examples
            logger.info(f"Loading test dataset {config['dataset']['name']}...")
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split=eval_split,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
            logger.info("Running inference on benign examples...")
            for x, y in tqdm(test_data, desc="Benign"):
                # Ensure that input sample isn't overwritten by estimator
                x.flags.writeable = False
                with metrics.resource_context(
                        name="Inference",
                        profiler=config["metric"].get("profiler_type"),
                        computational_resource_dict=metrics_logger.
                        computational_resource_dict,
                ):
                    y_pred = estimator.predict(x, **predict_kwargs)
                metrics_logger.update_task(y, y_pred)
            metrics_logger.log_task()

        if skip_attack:
            logger.info("Skipping attack generation...")
            return metrics_logger.results()

        # Imperceptible attack still WIP
        if (config.get("adhoc") or {}).get("skip_adversarial"):
            logger.info("Skipping adversarial classification...")
            return metrics_logger.results()

        # Evaluate the ART estimator on adversarial test examples
        logger.info("Generating or loading / testing adversarial examples...")

        if attack_type == "preloaded":
            test_data = load_adversarial_dataset(
                attack_config,
                epochs=1,
                split="adversarial",
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
        else:
            attack = load_attack(attack_config, estimator)
            if targeted != attack.targeted:
                logger.warning(
                    f"targeted config {targeted} != attack field {attack.targeted}"
                )
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split=eval_split,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
            if targeted:
                label_targeter = load_label_targeter(
                    attack_config["targeted_labels"])

        export_samples = config["scenario"].get("export_samples")
        if export_samples is not None and export_samples > 0:
            sample_exporter = SampleExporter(self.scenario_output_dir,
                                             test_data.context, export_samples)
        else:
            sample_exporter = None

        for x, y in tqdm(test_data, desc="Attack"):
            with metrics.resource_context(
                    name="Attack",
                    profiler=config["metric"].get("profiler_type"),
                    computational_resource_dict=metrics_logger.
                    computational_resource_dict,
            ):
                if attack_type == "preloaded":
                    x, x_adv = x
                    if targeted:
                        y, y_target = y
                elif attack_config.get("use_label"):
                    x_adv = attack.generate(x=x, y=y)
                elif targeted:
                    y_target = label_targeter.generate(y)
                    x_adv = attack.generate(x=x, y=y_target)
                else:
                    x_adv = attack.generate(x=x)

            # Ensure that input sample isn't overwritten by estimator
            x_adv.flags.writeable = False
            y_pred_adv = estimator.predict(x_adv, **predict_kwargs)
            metrics_logger.update_task(y, y_pred_adv, adversarial=True)
            if targeted:
                metrics_logger.update_task(
                    y_target,
                    y_pred_adv,
                    adversarial=True,
                    targeted=True,
                )
            metrics_logger.update_perturbation(x, x_adv)
            if sample_exporter is not None:
                sample_exporter.export(x, x_adv, y, y_pred_adv)
        metrics_logger.log_task(adversarial=True)
        if targeted:
            metrics_logger.log_task(adversarial=True, targeted=True)
        return metrics_logger.results()
Exemple #11
0
    def _evaluate(
        self,
        config: dict,
        num_eval_batches: Optional[int],
        skip_benign: Optional[bool],
        skip_attack: Optional[bool],
        skip_misclassified: Optional[bool],
    ) -> dict:
        """
        Evaluate the config and return a results dict
        """
        if skip_misclassified:
            raise ValueError(
                "skip_misclassified shouldn't be set for D-APRICOT scenario")
        if skip_attack:
            raise ValueError(
                "--skip-attack should not be set for D-APRICOT scenario.")
        if skip_benign:
            logger.warning("--skip-benign is being ignored since the D-APRICOT"
                           " scenario doesn't include benign evaluation.")
        attack_config = config["attack"]
        attack_type = attack_config.get("type")
        if attack_type == "preloaded":
            raise ValueError(
                "D-APRICOT scenario should not have preloaded set to True in attack config"
            )
        elif "targeted_labels" not in attack_config:
            raise ValueError(
                "Attack config must have 'targeted_labels' key, as the "
                "D-APRICOT threat model is targeted.")
        elif attack_config.get("use_label"):
            raise ValueError(
                "The D-APRICOT scenario threat model is targeted, and"
                " thus 'use_label' should be set to false.")

        if config["dataset"].get("batch_size") != 1:
            raise ValueError(
                "batch_size of 1 is required for D-APRICOT scenario")

        model_config = config["model"]
        estimator, _ = load_model(model_config)

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        label_targeter = load_label_targeter(attack_config["targeted_labels"])

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(
                f"Applying internal {defense_type} defense to estimator")
            estimator = load_defense_internal(config["defense"], estimator)

        if model_config["fit"]:
            try:
                logger.info(
                    f"Fitting model {model_config['module']}.{model_config['name']}..."
                )
                fit_kwargs = model_config["fit_kwargs"]

                logger.info(
                    f"Loading train dataset {config['dataset']['name']}...")
                train_data = load_dataset(
                    config["dataset"],
                    epochs=fit_kwargs["nb_epochs"],
                    split=config["dataset"].get("train_split", "train"),
                    shuffle_files=True,
                )
                if defense_type == "Trainer":
                    logger.info(f"Training with {defense_type} defense...")
                    defense = load_defense_wrapper(config["defense"],
                                                   estimator)
                    defense.fit_generator(train_data, **fit_kwargs)
                else:
                    logger.info("Fitting estimator on clean train dataset...")
                    estimator.fit_generator(train_data, **fit_kwargs)
            except NotImplementedError:
                raise NotImplementedError(
                    "Training has not yet been implemented for object detectors"
                )

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(
                f"Transforming estimator with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], estimator)
            estimator = defense()

        metrics_logger = metrics.MetricsLogger.from_config(
            config["metric"],
            skip_benign=True,
            skip_attack=False,
            targeted=True,
        )

        eval_split = config["dataset"].get("eval_split", "test")

        # Evaluate the ART estimator on adversarial test examples
        logger.info("Generating or loading / testing adversarial examples...")

        attack = load_attack(attack_config, estimator)
        test_data = load_dataset(
            config["dataset"],
            epochs=1,
            split=eval_split,
            num_batches=num_eval_batches,
            shuffle_files=False,
        )

        export_samples = config["scenario"].get("export_samples")
        if export_samples is not None and export_samples > 0:
            sample_exporter = SampleExporter(self.scenario_output_dir,
                                             test_data.context, export_samples)
        else:
            sample_exporter = None

        for x, y in tqdm(test_data, desc="Attack"):
            with metrics.resource_context(
                    name="Attack",
                    profiler=config["metric"].get("profiler_type"),
                    computational_resource_dict=metrics_logger.
                    computational_resource_dict,
            ):

                if x.shape[0] != 1:
                    raise ValueError("D-APRICOT batch size must be set to 1")
                # (nb=1, num_cameras, h, w, c) --> (num_cameras, h, w, c)
                x = x[0]
                y_object, y_patch_metadata = y

                generate_kwargs = deepcopy(
                    attack_config.get("generate_kwargs", {}))
                generate_kwargs["y_patch_metadata"] = y_patch_metadata
                y_target = label_targeter.generate(y_object)
                generate_kwargs["y_object"] = y_target

                x_adv = attack.generate(x=x, **generate_kwargs)

            # Ensure that input sample isn't overwritten by estimator
            x_adv.flags.writeable = False
            y_pred_adv = estimator.predict(x_adv)
            for img_idx in range(len(y_object)):
                y_i_target = y_target[img_idx]
                y_i_pred = y_pred_adv[img_idx]
                metrics_logger.update_task([y_i_target], [y_i_pred],
                                           adversarial=True,
                                           targeted=True)

            metrics_logger.update_perturbation(x, x_adv)
            if sample_exporter is not None:
                sample_exporter.export(x, x_adv, y_object, y_pred_adv)

        metrics_logger.log_task(adversarial=True, targeted=True)
        return metrics_logger.results()
Exemple #12
0
    def _evaluate(self, config: dict) -> dict:
        """
        Evaluate the config and return a results dict
        """

        model_config = config["model"]
        classifier, preprocessing_fn = load_model(model_config)

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(
                f"Applying internal {defense_type} defense to classifier")
            classifier = load_defense_internal(config["defense"], classifier)

        if model_config["fit"]:
            classifier.set_learning_phase(True)
            logger.info(
                f"Fitting model {model_config['module']}.{model_config['name']}..."
            )
            fit_kwargs = model_config["fit_kwargs"]

            logger.info(
                f"Loading train dataset {config['dataset']['name']}...")
            train_data = load_dataset(
                config["dataset"],
                epochs=fit_kwargs["nb_epochs"],
                split_type="train",
                preprocessing_fn=preprocessing_fn,
            )
            if defense_type == "Trainer":
                logger.info(f"Training with {defense_type} defense...")
                defense = load_defense_wrapper(config["defense"], classifier)
                defense.fit_generator(train_data, **fit_kwargs)
            else:
                logger.info("Fitting classifier on clean train dataset...")
                classifier.fit_generator(train_data, **fit_kwargs)

            ################################################################
            #### Save weights at the end of training
            ################################################################
            ckpt_name = model_config['module'].replace('.', '_')
            ckpt_name += '_pretrained' if model_config['model_kwargs'][
                'pretrained'] else ''
            ckpt_name += '_epochs%d.pth' % model_config['fit_kwargs'][
                'nb_epochs']
            classifier.save(
                osp.join(paths.runtime_paths().saved_model_dir, ckpt_name))
            logger.info(f"Saved classifier {ckpt_name} ...")

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(
                f"Transforming classifier with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], classifier)
            classifier = defense()

        classifier.set_learning_phase(False)

        # Evaluate the ART classifier on benign test examples
        logger.info(f"Loading test dataset {config['dataset']['name']}...")
        test_data = load_dataset(
            config["dataset"],
            epochs=1,
            split_type="test",
            preprocessing_fn=preprocessing_fn,
        )
        logger.info("Running inference on benign examples...")
        metrics_logger = metrics.MetricsLogger.from_config(config["metric"])

        for x, y in tqdm(test_data, desc="Benign"):
            y_pred = classifier.predict(x)
            metrics_logger.update_task(y, y_pred)
        metrics_logger.log_task()

        # Evaluate the ART classifier on adversarial test examples
        logger.info("Generating or loading / testing adversarial examples...")

        attack_config = config["attack"]
        attack_type = attack_config.get("type")
        targeted = bool(attack_config.get("kwargs", {}).get("targeted"))
        if targeted and attack_config.get("use_label"):
            raise ValueError("Targeted attacks cannot have 'use_label'")
        if attack_type == "preloaded":
            test_data = load_adversarial_dataset(
                attack_config,
                epochs=1,
                split_type="adversarial",
                preprocessing_fn=preprocessing_fn,
            )
        else:
            attack = load_attack(attack_config, classifier)
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split_type="test",
                preprocessing_fn=preprocessing_fn,
            )
        for x, y in tqdm(test_data, desc="Attack"):
            if attack_type == "preloaded":
                x, x_adv = x
                if targeted:
                    y, y_target = y
            elif attack_config.get("use_label"):
                x_adv = attack.generate(x=x, y=y)
            elif targeted:
                raise NotImplementedError(
                    "Requires generation of target labels")
                # x_adv = attack.generate(x=x, y=y_target)
            else:
                x_adv = attack.generate(x=x)

            y_pred_adv = classifier.predict(x_adv)
            if targeted:
                # NOTE: does not remove data points where y == y_target
                metrics_logger.update_task(y_target,
                                           y_pred_adv,
                                           adversarial=True)
            else:
                metrics_logger.update_task(y, y_pred_adv, adversarial=True)
            metrics_logger.update_perturbation(x, x_adv)
        metrics_logger.log_task(adversarial=True, targeted=targeted)
        return metrics_logger.results()
    def _evaluate(self, config: dict, num_eval_batches: Optional[int],
                  skip_benign: Optional[bool]) -> dict:
        """
        Evaluate the config and return a results dict
        """

        model_config = config["model"]
        classifier, preprocessing_fn = load_model(model_config)

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(
                f"Applying internal {defense_type} defense to classifier")
            classifier = load_defense_internal(config["defense"], classifier)

        if model_config["fit"]:
            classifier.set_learning_phase(True)
            logger.info(
                f"Fitting model {model_config['module']}.{model_config['name']}..."
            )
            fit_kwargs = model_config["fit_kwargs"]

            logger.info(
                f"Loading train dataset {config['dataset']['name']}...")
            train_data = load_dataset(
                config["dataset"],
                epochs=fit_kwargs["nb_epochs"],
                split_type="train",
                preprocessing_fn=preprocessing_fn,
                shuffle_files=True,
            )
            if defense_type == "Trainer":
                logger.info(f"Training with {defense_type} defense...")
                defense = load_defense_wrapper(config["defense"], classifier)
                defense.fit_generator(train_data, **fit_kwargs)
            else:
                logger.info("Fitting classifier on clean train dataset...")
                classifier.fit_generator(train_data, **fit_kwargs)

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(
                f"Transforming classifier with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], classifier)
            classifier = defense()

        classifier.set_learning_phase(False)

        metrics_logger = metrics.MetricsLogger.from_config(
            config["metric"], skip_benign=skip_benign)
        if skip_benign:
            logger.info("Skipping benign classification...")
        else:
            # Evaluate the ART classifier on benign test examples
            logger.info(f"Loading test dataset {config['dataset']['name']}...")
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split_type="test",
                preprocessing_fn=preprocessing_fn,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )

            logger.info("Running inference on benign examples...")
            for x, y in tqdm(test_data, desc="Benign"):
                with metrics.resource_context(
                        name="Inference",
                        profiler=config["metric"].get("profiler_type"),
                        computational_resource_dict=metrics_logger.
                        computational_resource_dict,
                ):
                    y_pred = classifier.predict(x)
                metrics_logger.update_task(y, y_pred)
            metrics_logger.log_task()

        # Evaluate the ART classifier on adversarial test examples
        logger.info("Generating or loading / testing adversarial examples...")

        attack_config = config["attack"]
        attack_type = attack_config.get("type")
        targeted = bool(attack_config.get("kwargs", {}).get("targeted"))
        if targeted and attack_config.get("use_label"):
            raise ValueError("Targeted attacks cannot have 'use_label'")
        if attack_type == "preloaded":
            test_data = load_adversarial_dataset(
                attack_config,
                epochs=1,
                split_type="adversarial",
                preprocessing_fn=preprocessing_fn,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
        else:
            attack = load_attack(attack_config, classifier)
            if targeted != getattr(attack, "targeted", False):
                logger.warning(
                    f"targeted config {targeted} != attack field {getattr(attack, 'targeted', False)}"
                )
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split_type="test",
                preprocessing_fn=preprocessing_fn,
                num_batches=num_eval_batches,
                shuffle_files=False,
            )
            if targeted:
                label_targeter = load_label_targeter(
                    attack_config["targeted_labels"])
        for x, y in tqdm(test_data, desc="Attack"):
            with metrics.resource_context(
                    name="Attack",
                    profiler=config["metric"].get("profiler_type"),
                    computational_resource_dict=metrics_logger.
                    computational_resource_dict,
            ):
                if attack_type == "preloaded":
                    x, x_adv = x
                    if targeted:
                        y, y_target = y
                elif attack_config.get("use_label"):
                    x_adv = attack.generate(x=x, y=y)
                elif targeted:
                    y_target = label_targeter.generate(y)
                    x_adv = attack.generate(x=x, y=y_target)
                else:
                    x_adv = attack.generate(x=x)

            y_pred_adv = classifier.predict(x_adv)
            if targeted:
                metrics_logger.update_task(y_target,
                                           y_pred_adv,
                                           adversarial=True)
            else:
                metrics_logger.update_task(y, y_pred_adv, adversarial=True)
            metrics_logger.update_perturbation(x, x_adv)
        metrics_logger.log_task(adversarial=True, targeted=targeted)
        return metrics_logger.results()
    def _evaluate(self, config: dict) -> dict:
        """
        Evaluate a config file for classification robustness against attack.
        """
        model_config = config["model"]
        classifier, preprocessing_fn = load_model(model_config)

        n_tbins = 100  # number of time bins in spectrogram input to model

        task_metric = metrics.categorical_accuracy

        # Train ART classifier
        if not model_config["weights_file"]:
            classifier.set_learning_phase(True)
            logger.info(
                f"Fitting model {model_config['module']}.{model_config['name']}..."
            )
            fit_kwargs = model_config["fit_kwargs"]
            train_data_generator = load_dataset(
                config["dataset"],
                epochs=fit_kwargs["nb_epochs"],
                split_type="train",
                preprocessing_fn=preprocessing_fn,
            )

            for cnt, (x, y) in tqdm(enumerate(train_data_generator)):
                x_seg, y_seg = segment(x, y, n_tbins)
                classifier.fit(
                    x_seg,
                    y_seg,
                    batch_size=config["dataset"]["batch_size"],
                    nb_epochs=1,
                    verbose=True,
                )

                if (cnt + 1) % train_data_generator.batches_per_epoch == 0:
                    # evaluate on validation examples
                    val_data_generator = load_dataset(
                        config["dataset"],
                        epochs=1,
                        split_type="validation",
                        preprocessing_fn=preprocessing_fn,
                    )

                    cnt = 0
                    validation_accuracies = []
                    for x_val, y_val in tqdm(val_data_generator):
                        x_val_seg, y_val_seg = segment(x_val, y_val, n_tbins)
                        y_pred = classifier.predict(x_val_seg)
                        validation_accuracies.extend(
                            task_metric(y_val_seg, y_pred))
                        cnt += len(y_val_seg)
                    validation_accuracy = sum(validation_accuracies) / cnt
                    logger.info(
                        "Validation accuracy: {}".format(validation_accuracy))

        classifier.set_learning_phase(False)
        # Evaluate ART classifier on test examples
        logger.info(f"Loading testing dataset {config['dataset']['name']}...")
        test_data_generator = load_dataset(
            config["dataset"],
            epochs=1,
            split_type="test",
            preprocessing_fn=preprocessing_fn,
        )

        logger.info("Running inference on benign test examples...")

        cnt = 0
        benign_accuracies = []
        for x, y in tqdm(test_data_generator, desc="Benign"):
            x_seg, y_seg = segment(x, y, n_tbins)
            y_pred = classifier.predict(x_seg)
            benign_accuracies.extend(task_metric(y_seg, y_pred))
            cnt += len(y_seg)

        benign_accuracy = sum(benign_accuracies) / cnt
        logger.info(f"Accuracy on benign test examples: {benign_accuracy:.2%}")

        # Evaluate the ART classifier on adversarial test examples
        logger.info("Generating / testing adversarial examples...")
        attack = load_attack(config["attack"], classifier)

        test_data_generator = load_dataset(
            config["dataset"],
            epochs=1,
            split_type="test",
            preprocessing_fn=preprocessing_fn,
        )

        cnt = 0
        adversarial_accuracies = []
        for x, y in tqdm(test_data_generator, desc="Attack"):
            x_seg, y_seg = segment(x, y, n_tbins)
            x_adv = attack.generate(x=x_seg)
            y_pred = classifier.predict(x_adv)
            adversarial_accuracies.extend(task_metric(y_seg, y_pred))
            cnt += len(y_seg)
        adversarial_accuracy = sum(adversarial_accuracies) / cnt
        logger.info(
            f"Accuracy on adversarial test examples: {adversarial_accuracy:.2%}"
        )

        results = {
            "mean_benign_accuracy": benign_accuracy,
            "mean_adversarial_accuracy": adversarial_accuracy,
        }
        return results
Exemple #15
0
    def _evaluate(self, config: dict) -> dict:
        """
        Evaluate the config and return a results dict
        """

        model_config = config["model"]
        classifier, preprocessing_fn = load_model(model_config)
        if isinstance(preprocessing_fn, tuple):
            fit_preprocessing_fn, predict_preprocessing_fn = preprocessing_fn
        else:
            fit_preprocessing_fn = predict_preprocessing_fn = preprocessing_fn

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(
                f"Applying internal {defense_type} defense to classifier")
            classifier = load_defense_internal(config["defense"], classifier)

        if model_config["fit"]:
            classifier.set_learning_phase(True)
            logger.info(
                f"Fitting model {model_config['module']}.{model_config['name']}..."
            )
            fit_kwargs = model_config["fit_kwargs"]

            logger.info(
                f"Loading train dataset {config['dataset']['name']}...")
            batch_size = config["dataset"].pop("batch_size")
            config["dataset"]["batch_size"] = config.get("adhoc", {}).get(
                "fit_batch_size", batch_size)
            train_data = load_dataset(
                config["dataset"],
                epochs=fit_kwargs["nb_epochs"],
                split_type="train",
                preprocessing_fn=fit_preprocessing_fn,
            )
            config["dataset"]["batch_size"] = batch_size
            if defense_type == "Trainer":
                logger.info(f"Training with {defense_type} defense...")
                defense = load_defense_wrapper(config["defense"], classifier)
                defense.fit_generator(train_data, **fit_kwargs)
            else:
                logger.info("Fitting classifier on clean train dataset...")
                classifier.fit_generator(train_data, **fit_kwargs)

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(
                f"Transforming classifier with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], classifier)
            classifier = defense()

        #HACK: to save model -- currently commenting it out
        #SAIL-JATI ----------------------------------
        #ts = time.time()
        #st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d-%H-%M-%S')
        #model_save_dir_ = os.path.join("/nas/home/ajati/work/codes/SAIL_ALR_models/", st+"/")
        #os.system("mkdir -p "+model_save_dir_)
        #torch.save(classifier._model._model.state_dict(), model_save_dir_+"/sail_alr_model_state_dict.pt")
        #torch.save(classifier._model._model, model_save_dir_+"/sail_alr_model.pt")
        #torch.save(classifier._optimizer.state_dict(), model_save_dir_+"/sail_alr_optim_state_dict.pt")
        #torch.save(classifier._optimizer, model_save_dir_+"/sail_alr_optim.pt")
        ##-------------------------------------------

        classifier.set_learning_phase(False)

        # Evaluate the ART classifier on benign test examples
        logger.info(f"Loading test dataset {config['dataset']['name']}...")
        test_data = load_dataset(
            config["dataset"],
            epochs=1,
            split_type="test",
            preprocessing_fn=predict_preprocessing_fn,
        )
        logger.info("Running inference on benign examples...")
        metrics_logger = metrics.MetricsLogger.from_config(config["metric"])

        for x, y in tqdm(test_data, desc="Benign"):
            y_pred = classifier.predict(x)
            metrics_logger.update_task(y, y_pred)
        metrics_logger.log_task()

        # Evaluate the ART classifier on adversarial test examples
        logger.info("Generating or loading / testing adversarial examples...")
        attack_config = config["attack"]
        attack_type = attack_config.get("type")
        targeted = bool(attack_config.get("kwargs", {}).get("targeted"))
        if targeted and attack_config.get("use_label"):
            raise ValueError("Targeted attacks cannot have 'use_label'")
        if attack_type == "preloaded":
            test_data = load_adversarial_dataset(
                attack_config,
                epochs=1,
                split_type="adversarial",
                preprocessing_fn=predict_preprocessing_fn,
            )
        else:
            attack = load_attack(attack_config, classifier)
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split_type="test",
                preprocessing_fn=predict_preprocessing_fn,
            )

        #JATI -- snr
        snrs = []
        for x, y in tqdm(test_data, desc="Attack"):
            if attack_type == "preloaded":
                x, x_adv = x
                if targeted:
                    y, y_target = y
            elif attack_config.get("use_label"):
                x_adv = attack.generate(x=x, y=y)
            elif targeted:
                raise NotImplementedError(
                    "Requires generation of target labels")
                # x_adv = attack.generate(x=x, y=y_target)
            else:
                x_adv = attack.generate(x=x)

            #JATI - snr
            noise = x_adv - x
            snr = 10 * np.log10(np.mean(x**2) / np.mean(noise**2))
            snrs.append(snr)

            y_pred_adv = classifier.predict(x_adv)
            if targeted:
                # NOTE: does not remove data points where y == y_target
                metrics_logger.update_task(y_target,
                                           y_pred_adv,
                                           adversarial=True)
            else:
                metrics_logger.update_task(y, y_pred_adv, adversarial=True)
            metrics_logger.update_perturbation(x, x_adv)
        metrics_logger.log_task(adversarial=True, targeted=targeted)

        mean_snr = np.mean(snrs)
        logging.info(f"MEAN SNR of adversarial samples = {mean_snr}")

        return metrics_logger.results()