Ejemplo n.º 1
0
 def _check_and_download_file(uri: str, basename: str, *paths: str) -> str:
     """Check and download the file from given URI."""
     dir_path = os.path.join(*paths)
     file_path = os.path.join(dir_path, basename)
     if not os.path.isdir(dir_path):
         make_directory(dir_path)
     if not os.path.isfile(file_path):
         logger.info("Could not find %s. Downloading it now...", basename)
         get_file(basename, uri, path=dir_path)
     return file_path
def GetCifar10WithModel():
    """
    Function:
        Load cifar-10 dataset and load a pre-trained cifar10 model.
    """
    (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('cifar10')
    num_samples_train = 100
    num_samples_test = 100
    x_train = x_train[0:num_samples_train]
    y_train = y_train[0:num_samples_train]
    x_test = x_test[0:num_samples_test]
    y_test = y_test[0:num_samples_test]

    class_descr = [
        'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
        'horse', 'ship', 'truck'
    ]

    path = get_file(
        'cifar_resnet.h5',
        extract=False,
        path=DATA_PATH,
        url='https://www.dropbox.com/s/ta75pl4krya5djj/cifar_resnet.h5?dl=1')
    classifier_model = load_model(path)

    # classifier_model.summary()
    return x_train, y_train, x_test, y_test, classifier_model, min_, max_
Ejemplo n.º 3
0
    def test_loss_gradient(self, art_warning):
        try:
            transcripts = list()
            audios = list()
            for filename, sample in self.samples.items():
                file_path = get_file(filename, sample["uri"])
                _, audio = read(file_path)
                audios.append(audio)
                transcripts.append(sample["transcript"])

            audio_batch = np.array(audios, dtype=object)
            target_batch = np.array(transcripts)

            lingvo = TensorFlowLingvoASR()
            gradient_batch = lingvo._loss_gradient_per_batch(
                audio_batch, target_batch)
            gradient_sequence = lingvo._loss_gradient_per_sequence(
                audio_batch, target_batch)

            gradient_batch_sum = np.array(
                [np.abs(gb).sum() for gb in gradient_batch], dtype=object)
            gradient_sequence_sum = np.array(
                [np.abs(gs).sum() for gs in gradient_sequence], dtype=object)

            # test loss gradients per batch and per sequence are the same
            assert_allclose(gradient_sequence_sum,
                            gradient_batch_sum,
                            rtol=1e-05)
            # test gradient_batch, gradient_sequence and audios items have same shapes
            assert ([gb.shape for gb in gradient_batch] ==
                    [gs.shape
                     for gs in gradient_sequence] == [a.shape for a in audios])
        except ARTTestException as e:
            art_warning(e)
Ejemplo n.º 4
0
    def test_predict(self, art_warning):
        try:
            transcripts = list()
            audios = list()
            for filename, sample in self.samples.items():
                file_path = get_file(filename, sample["uri"])
                _, audio = read(file_path)
                audios.append(audio)
                transcripts.append(sample["transcript"])

            audio_batch = np.array(audios, dtype=object)

            lingvo = TensorFlowLingvoASR()
            prediction = lingvo.predict(audio_batch, batch_size=1)
            assert prediction[0] == transcripts[0]
        except ARTTestException as e:
            art_warning(e)
Ejemplo n.º 5
0
    def _load_model(
        images: "tf.Tensor",
        filename: Optional[str] = None,
        url: Optional[str] = None,
        obj_detection_model: Optional["FasterRCNNMetaArch"] = None,
        is_training: bool = False,
        groundtruth_boxes_list: Optional[List["tf.Tensor"]] = None,
        groundtruth_classes_list: Optional[List["tf.Tensor"]] = None,
        groundtruth_weights_list: Optional[List["tf.Tensor"]] = None,
    ) -> Tuple[Dict[str, "tf.Tensor"], ...]:
        """
        Download, extract and load a model from a URL if it not already in the cache. The file at indicated by `url`
        is downloaded to the path ~/.art/data and given the name `filename`. Files in tar, tar.gz, tar.bz, and zip
        formats will also be extracted. Then the model is loaded, pipelined and its outputs are returned as a tuple
        of (predictions, losses, detections).

        :param images: Input samples of shape (nb_samples, height, width, nb_channels).
        :param filename: Name of the file.
        :param url: Download URL.
        :param is_training: A boolean indicating whether the training version of the computation graph should be
                            constructed.
        :param groundtruth_boxes_list: A list of 2-D tf.float32 tensors of shape [num_boxes, 4] containing
                                       coordinates of the groundtruth boxes. Groundtruth boxes are provided in
                                       [y_min, x_min, y_max, x_max] format and also assumed to be normalized and
                                       clipped relative to the image window with conditions y_min <= y_max and
                                       x_min <= x_max.
        :param groundtruth_classes_list: A list of 1-D tf.float32 tensors of shape [num_boxes] containing the class
                                         targets with the zero index which is assumed to map to the first
                                         non-background class.
        :param groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape [num_boxes] containing weights for
                                         groundtruth boxes.
        :return: A tuple of (predictions, losses, detections):

                    - predictions: a dictionary holding "raw" prediction tensors.
                    - losses: a dictionary mapping loss keys (`Loss/RPNLoss/localization_loss`,
                              `Loss/RPNLoss/objectness_loss`, `Loss/BoxClassifierLoss/localization_loss`,
                              `Loss/BoxClassifierLoss/classification_loss`) to scalar tensors representing
                              corresponding loss values.
                    - detections: a dictionary containing final detection results.
        """
        import tensorflow.compat.v1 as tf  # lgtm [py/repeated-import]
        from object_detection.utils import variables_helper

        if obj_detection_model is None:
            from object_detection.utils import config_util
            from object_detection.builders import model_builder

            # If obj_detection_model is None, then we need to have parameters filename and url to download, extract
            # and load the object detection model
            if filename is None or url is None:  # pragma: no cover
                raise ValueError(
                    "Need input parameters `filename` and `url` to download, "
                    "extract and load the object detection model."
                )

            # Download and extract
            path = get_file(filename=filename, path=config.ART_DATA_PATH, url=url, extract=True)

            # Load model config
            pipeline_config = path + "/pipeline.config"
            configs = config_util.get_configs_from_pipeline_file(pipeline_config)
            configs["model"].faster_rcnn.second_stage_batch_size = configs[
                "model"
            ].faster_rcnn.first_stage_max_proposals

            # Load model
            obj_detection_model = model_builder.build(
                model_config=configs["model"], is_training=is_training, add_summaries=False
            )

        # Provide groundtruth
        if groundtruth_classes_list is not None:
            groundtruth_classes_list = [
                tf.one_hot(groundtruth_class, obj_detection_model.num_classes)
                for groundtruth_class in groundtruth_classes_list
            ]

        obj_detection_model.provide_groundtruth(
            groundtruth_boxes_list=groundtruth_boxes_list,
            groundtruth_classes_list=groundtruth_classes_list,
            groundtruth_weights_list=groundtruth_weights_list,
        )

        # Create model pipeline
        images *= 255.0
        preprocessed_images, true_image_shapes = obj_detection_model.preprocess(images)
        predictions = obj_detection_model.predict(preprocessed_images, true_image_shapes)
        losses = obj_detection_model.loss(predictions, true_image_shapes)
        detections = obj_detection_model.postprocess(predictions, true_image_shapes)

        # Initialize variables from checkpoint
        # Get variables to restore
        variables_to_restore = obj_detection_model.restore_map(
            fine_tune_checkpoint_type="detection", load_all_detection_checkpoint_vars=True
        )

        # Get variables from checkpoint
        fine_tune_checkpoint_path = path + "/model.ckpt"
        vars_in_ckpt = variables_helper.get_variables_available_in_checkpoint(
            variables_to_restore, fine_tune_checkpoint_path, include_global_step=False
        )

        # Initialize from checkpoint
        tf.train.init_from_checkpoint(fine_tune_checkpoint_path, vars_in_ckpt)

        return obj_detection_model, predictions, losses, detections
Ejemplo n.º 6
0
from deepfool import DeepFool
from dynamic_deepfool import DynamicDeepFool

import numpy as np
import timeit
# %matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
# tf.compat.v1.disable_eager_execution()
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
print(tf.__version__)

(x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('mnist')
path = get_file(
    'mnist_cnn_original.h5',
    extract=False,
    path=ART_DATA_PATH,
    url='https://www.dropbox.com/s/p2nyzne9chcerid/mnist_cnn_original.h5?dl=1')
classifier_model = load_model(path)

# (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('cifar10')
# path = get_file('cifar_resnet.h5',extract=False, path=ART_DATA_PATH,
#                 url='https://www.dropbox.com/s/ta75pl4krya5djj/cifar_resnet.h5?dl=1')
# classifier_model = load_model(path)

# # Discarded iris and stl10 dataset
# (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('stl10')
# (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('iris')
# # path = get_file('cifar_resnet.h5',extract=False, path=ART_DATA_PATH,
# #                 url='https://www.dropbox.com/s/ta75pl4krya5djj/cifar_resnet.h5?dl=1')
# # classifier_model = load_model(path)
Ejemplo n.º 7
0
num_samples_train = 1000
num_samples_test = 1000
x_train = x_train[0:num_samples_train]
y_train = y_train[0:num_samples_train]
x_test = x_test[0:num_samples_test]
y_test = y_test[0:num_samples_test]

class_descr = [
    'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
    'ship', 'truck'
]

path = get_file(
    'cifar_alexnet.h5',
    extract=False,
    path=config.ART_DATA_PATH,
    url='https://www.dropbox.com/s/ta75pl4krya5djj/cifar_alexnet.h5?dl=1')
classifier_model = load_model(path)
classifier = KerasClassifier(clip_values=(min_, max_),
                             model=classifier_model,
                             use_logits=False,
                             preprocessing=(0.5, 1))

target_class = "bird"  # one of ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
target_label = np.zeros(len(class_descr))
target_label[class_descr.index(target_class)] = 1
target_instance = np.expand_dims(
    x_test[np.argmax(y_test, axis=1) == class_descr.index(target_class)][3],
    axis=0)
Ejemplo n.º 8
0
    def __init__(
        self,
        espresso_config_filepath: Optional[str] = None,
        model: Optional[str] = None,
        clip_values: Optional["CLIP_VALUES_TYPE"] = None,
        preprocessing_defences: Union["Preprocessor", List["Preprocessor"],
                                      None] = None,
        postprocessing_defences: Union["Postprocessor", List["Postprocessor"],
                                       None] = None,
        preprocessing: "PREPROCESSING_TYPE" = None,
        device_type: str = "gpu",
        verbose: bool = True,
    ):
        """
        Initialization of an instance PyTorchEspresso

        :param espresso_config_filepath: The path of the espresso config file (yaml)
        :param model: The choice of pretrained model if a pretrained model is required.
        :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and
               maximum values allowed for features. If floats are provided, these will be used as the range of all
               features. If arrays are provided, each value will be considered the bound for a feature, thus
               the shape of clip values needs to match the total number of features.
        :param preprocessing_defences: Preprocessing defence(s) to be applied by the estimator.
        :param postprocessing_defences: Postprocessing defence(s) to be applied by the estimator.
        :param preprocessing: Tuple of the form `(subtrahend, divisor)` of floats or `np.ndarray` of values to be
               used for data preprocessing. The first value will be subtracted from the input. The input will then
               be divided by the second one.
        :param device_type: Type of device to be used for model and tensors, if `cpu` run on CPU, if `gpu` run on GPU
                            if available otherwise run on CPU.
        """
        import torch  # lgtm [py/repeated-import]
        import yaml
        from fairseq import checkpoint_utils, tasks, utils
        from fairseq.data import encoders
        import sentencepiece as spm

        # Super initialization
        super().__init__(
            model=None,
            clip_values=clip_values,
            channels_first=None,
            preprocessing_defences=preprocessing_defences,
            postprocessing_defences=postprocessing_defences,
            preprocessing=preprocessing,
        )
        self.verbose = verbose

        # Check clip values
        if self.clip_values is not None:
            if not np.all(self.clip_values[0] == -1):  # pragma: no cover
                raise ValueError(
                    "This estimator requires normalized input audios with clip_vales=(-1, 1)."
                )
            if not np.all(self.clip_values[1] == 1):  # pragma: no cover
                raise ValueError(
                    "This estimator requires normalized input audios with clip_vales=(-1, 1)."
                )

        # Check postprocessing defences
        if self.postprocessing_defences is not None:  # pragma: no cover
            raise ValueError(
                "This estimator does not support `postprocessing_defences`.")

        # Set cpu/gpu device
        self._device: torch.device
        if device_type == "cpu" or not torch.cuda.is_available():
            self._device = torch.device("cpu")
        else:  # pragma: no cover
            cuda_idx = torch.cuda.current_device()
            self._device = torch.device("cuda:{}".format(cuda_idx))

        # Load config/model
        if espresso_config_filepath is None:
            if model == "librispeech_transformer":
                config_filename, config_url = (
                    "libri960_transformer.yaml",
                    "https://github.com/YiwenShaoStephen/espresso/releases/download/v0.1-alpha/"
                    "libri960_transformer.yaml",
                )
                model_filename, model_url = (
                    "checkpoint_best.pt",
                    "https://github.com/YiwenShaoStephen/espresso/releases/download/v0.1-alpha/checkpoint_best.pt",
                )
                sp_filename, sp_url = (
                    "train_960_unigram5000.model",
                    "https://github.com/YiwenShaoStephen/espresso/releases/download/v0.1-alpha/"
                    "train_960_unigram5000.model",
                )
                dict_filename, dict_url = (
                    "train_960_unigram5000_units.txt",
                    "https://github.com/YiwenShaoStephen/espresso/releases/download/v0.1-alpha/"
                    "train_960_unigram5000_units.txt",
                )
            else:  # pragma: no cover
                raise ValueError("Model not recognised.")

            # Download files
            config_path = get_file(filename=config_filename,
                                   path=config.ART_DATA_PATH,
                                   url=config_url,
                                   extract=False,
                                   verbose=self.verbose)
            model_path = get_file(filename=model_filename,
                                  path=config.ART_DATA_PATH,
                                  url=model_url,
                                  extract=False,
                                  verbose=self.verbose)
            sp_path = get_file(filename=sp_filename,
                               path=config.ART_DATA_PATH,
                               url=sp_url,
                               extract=False,
                               verbose=self.verbose)
            dict_path = get_file(filename=dict_filename,
                                 path=config.ART_DATA_PATH,
                                 url=dict_url,
                                 extract=False,
                                 verbose=self.verbose)

        # construct espresso args
        with open(config_path) as file:
            esp_args_dict = yaml.load(file, Loader=yaml.FullLoader)
            esp_args = Namespace(**esp_args_dict)
            if espresso_config_filepath is None:  # overwrite paths in downloaded config with the actual ones
                esp_args.path = model_path
                esp_args.sentencepiece_model = sp_path
                esp_args.dict = dict_path
        self.esp_args = esp_args

        # setup espresso/fairseq task
        self.task = tasks.setup_task(self.esp_args)
        self.task.feat_dim = self.esp_args.feat_dim

        # load_model_ensemble
        self._models, self._model_args = checkpoint_utils.load_model_ensemble(
            utils.split_paths(self.esp_args.path),
            arg_overrides=ast.literal_eval(self.esp_args.model_overrides),
            task=self.task,
            suffix=getattr(self.esp_args, "checkpoint_suffix", ""),
        )
        for m in self._models:
            m.to(self._device)

        self._model = self._models[0]

        self.dictionary = self.task.target_dictionary
        self.generator = self.task.build_generator(self._models, self.esp_args)
        self.tokenizer = encoders.build_tokenizer(self.esp_args)
        self.bpe = encoders.build_bpe(self.esp_args)  # bpe encoder
        self.spp = spm.SentencePieceProcessor()  # sentence piece model
        self.spp.Load(self.esp_args.sentencepiece_model)

        self.criterion = self.task.build_criterion(self.esp_args)
        self._sampling_rate = self.esp_args.sampling_rate
    def __init__(
        self,
        model: Optional["DeepSpeech"] = None,
        pretrained_model: Optional[str] = None,
        filename: Optional[str] = None,
        url: Optional[str] = None,
        use_half: bool = False,
        optimizer: Optional["torch.optim.Optimizer"] = None,  # type: ignore
        use_amp: bool = False,
        opt_level: str = "O1",
        decoder_type: str = "greedy",
        lm_path: str = "",
        top_paths: int = 1,
        alpha: float = 0.0,
        beta: float = 0.0,
        cutoff_top_n: int = 40,
        cutoff_prob: float = 1.0,
        beam_width: int = 10,
        lm_workers: int = 4,
        clip_values: Optional["CLIP_VALUES_TYPE"] = None,
        preprocessing_defences: Union["Preprocessor", List["Preprocessor"],
                                      None] = None,
        postprocessing_defences: Union["Postprocessor", List["Postprocessor"],
                                       None] = None,
        preprocessing: "PREPROCESSING_TYPE" = None,
        device_type: str = "gpu",
        verbose: bool = True,
    ):
        """
        Initialization of an instance PyTorchDeepSpeech.

        :param model: DeepSpeech model.
        :param pretrained_model: The choice of pretrained model if a pretrained model is required. Currently this
                                 estimator supports 3 different pretrained models consisting of `an4`, `librispeech`
                                 and `tedlium`.
        :param filename: Name of the file.
        :param url: Download URL.
        :param use_half: Whether to use FP16 for pretrained model.
        :param optimizer: The optimizer used to train the estimator.
        :param use_amp: Whether to use the automatic mixed precision tool to enable mixed precision training or
                        gradient computation, e.g. with loss gradient computation. When set to True, this option is
                        only triggered if there are GPUs available.
        :param opt_level: Specify a pure or mixed precision optimization level. Used when use_amp is True. Accepted
                          values are `O0`, `O1`, `O2`, and `O3`.
        :param decoder_type: Decoder type. Either `greedy` or `beam`. This parameter is only used when users want
                             transcription outputs.
        :param lm_path: Path to an (optional) kenlm language model for use with beam search. This parameter is only
                        used when users want transcription outputs.
        :param top_paths: Number of beams to be returned. This parameter is only used when users want transcription
                          outputs.
        :param alpha: The weight used for the language model. This parameter is only used when users want transcription
                      outputs.
        :param beta: Language model word bonus (all words). This parameter is only used when users want transcription
                     outputs.
        :param cutoff_top_n: Cutoff_top_n characters with highest probs in vocabulary will be used in beam search. This
                             parameter is only used when users want transcription outputs.
        :param cutoff_prob: Cutoff probability in pruning. This parameter is only used when users want transcription
                            outputs.
        :param beam_width: The width of beam to be used. This parameter is only used when users want transcription
                           outputs.
        :param lm_workers: Number of language model processes to use. This parameter is only used when users want
                           transcription outputs.
        :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and
               maximum values allowed for features. If floats are provided, these will be used as the range of all
               features. If arrays are provided, each value will be considered the bound for a feature, thus
               the shape of clip values needs to match the total number of features.
        :param preprocessing_defences: Preprocessing defence(s) to be applied by the estimator.
        :param postprocessing_defences: Postprocessing defence(s) to be applied by the estimator.
        :param preprocessing: Tuple of the form `(subtrahend, divisor)` of floats or `np.ndarray` of values to be
               used for data preprocessing. The first value will be subtracted from the input. The input will then
               be divided by the second one.
        :param device_type: Type of device to be used for model and tensors, if `cpu` run on CPU, if `gpu` run on GPU
                            if available otherwise run on CPU.
        """
        import torch  # lgtm [py/repeated-import]

        from deepspeech_pytorch.configs.inference_config import LMConfig
        from deepspeech_pytorch.enums import DecoderType
        from deepspeech_pytorch.utils import load_decoder, load_model

        # Super initialization
        super().__init__(
            model=None,
            clip_values=clip_values,
            channels_first=None,
            preprocessing_defences=preprocessing_defences,
            postprocessing_defences=postprocessing_defences,
            preprocessing=preprocessing,
        )

        self.verbose = verbose

        # Check clip values
        if self.clip_values is not None:
            if not np.all(self.clip_values[0] == -1):
                raise ValueError(
                    "This estimator requires normalized input audios with clip_vales=(-1, 1)."
                )
            if not np.all(self.clip_values[1] == 1):
                raise ValueError(
                    "This estimator requires normalized input audios with clip_vales=(-1, 1)."
                )

        # Check postprocessing defences
        if self.postprocessing_defences is not None:
            raise ValueError(
                "This estimator does not support `postprocessing_defences`.")

        # Set cpu/gpu device
        self._device: torch.device
        if device_type == "cpu" or not torch.cuda.is_available():
            self._device = torch.device("cpu")
        else:
            cuda_idx = torch.cuda.current_device()
            self._device = torch.device("cuda:{}".format(cuda_idx))

        self._input_shape = None

        # Load model
        if model is None:
            if pretrained_model == "an4":
                filename, url = (
                    "an4_pretrained_v2.pth",
                    "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/an4_pretrained_v2.pth",
                )

            elif pretrained_model == "librispeech":
                filename, url = (
                    "librispeech_pretrained_v2.pth",
                    "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/"
                    "librispeech_pretrained_v2.pth",
                )

            elif pretrained_model == "tedlium":
                filename, url = (
                    "ted_pretrained_v2.pth",
                    "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/ted_pretrained_v2.pth",
                )

            elif pretrained_model is None:
                # If model is None and no pretrained model is selected, then we need to have parameters filename and
                # url to download, extract and load the automatic speech recognition model
                if filename is None or url is None:
                    filename, url = (
                        "librispeech_pretrained_v2.pth",
                        "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/"
                        "librispeech_pretrained_v2.pth",
                    )

            else:
                raise ValueError(
                    "The input pretrained model %s is not supported." %
                    pretrained_model)

            # Download model
            model_path = get_file(filename=filename,
                                  path=config.ART_DATA_PATH,
                                  url=url,
                                  extract=False,
                                  verbose=self.verbose)

            # Then load model
            self._model = load_model(device=self._device,
                                     model_path=model_path,
                                     use_half=use_half)

        else:
            self._model = model

            # Push model to the corresponding device
            self._model.to(self._device)

        # Save first version of the optimizer
        self._optimizer = optimizer
        self._use_amp = use_amp

        # Now create a decoder
        # Create the language model config first
        lm_config = LMConfig()

        # Then setup the config
        if decoder_type == "greedy":
            lm_config.decoder_type = DecoderType.greedy
        elif decoder_type == "beam":
            lm_config.decoder_type = DecoderType.beam
        else:
            raise ValueError("Decoder type %s currently not supported." %
                             decoder_type)

        lm_config.lm_path = lm_path
        lm_config.top_paths = top_paths
        lm_config.alpha = alpha
        lm_config.beta = beta
        lm_config.cutoff_top_n = cutoff_top_n
        lm_config.cutoff_prob = cutoff_prob
        lm_config.beam_width = beam_width
        lm_config.lm_workers = lm_workers

        # Create the decoder with the lm config
        self.decoder = load_decoder(labels=self._model.labels, cfg=lm_config)

        # Setup for AMP use
        if self._use_amp:
            from apex import amp

            if self._optimizer is None:
                logger.warning(
                    "An optimizer is needed to use the automatic mixed precision tool, but none for provided. "
                    "A default optimizer is used.")

                # Create the optimizers
                parameters = self._model.parameters()
                self._optimizer = torch.optim.SGD(parameters, lr=0.01)

            if self._device.type == "cpu":
                enabled = False
            else:
                enabled = True

            self._model, self._optimizer = amp.initialize(
                models=self._model,
                optimizers=self._optimizer,
                enabled=enabled,
                opt_level=opt_level,
                loss_scale=1.0,
            )
Ejemplo n.º 10
0
def main(args):
    batch_status_message = {'status':'Ready','modelurl':args.model}
    batch_count = 0
    model_filename = 'base_model.h5'

    logging.info('model={}'.format(args.model))
    location = os.path.join(ART_DATA_PATH, model_filename)
    try:
        os.remove(location)
    except OSError as error:
        pass
    path = get_file(model_filename, extract=False, path=ART_DATA_PATH, url=args.model)
    kmodel = load_model(path) 
    model = KerasClassifier(kmodel, use_logits=False, clip_values=[float(args.min),float(args.max)]) 
    logging.info('finished acquiring model')
    logging.info('creating attack {}'.format(args.attack))

    if args.attack == 'FGM':
        attack = FastGradientMethod(model, eps=0.3, eps_step=0.01, targeted=False) 
        logging.info('created FGM attack')
    elif args.attack == 'PGD':
        attack = ProjectedGradientDescent(model, eps=8, eps_step=2, max_iter=13, targeted=False, num_random_init=True)
        logging.info('created PGD attack')
    else:
        logging.error('Invalid attack provided {} must be one of {FGM, PGD}'.format(args.attack))
        exit(0)

    logging.info('finished creating attack')
    logging.info('brokers={}'.format(args.brokers))
    logging.info('readtopic={}'.format(args.readtopic))
    logging.info('creating kafka consumer')

    consumer = KafkaConsumer(
        args.readtopic,
        bootstrap_servers=args.brokers,
        value_deserializer=lambda val: loads(val.decode('utf-8')))
    logging.info("finished creating kafka consumer")

    if args.dbxtoken != '':
        dbx = dropbox.Dropbox(args.dbxtoken)
        logging.info('creating kafka producer')    
        producer = KafkaProducer(bootstrap_servers=args.brokers,
                                 value_serializer=lambda x: 
                                 dumps(x).encode('utf-8'))
        logging.info('finished creating kafka producer')    
    else:
        dbx = None

    while True:
        for message in consumer:
            if message.value['url']:
                conn = psycopg2.connect(
                    host = args.dbhost,
                    port = 5432,
                    dbname = args.dbname,
                    user = args.dbusername,
                    password = args.dbpassword)
                cur = conn.cursor()
                image_url = message.value['url']
                query = 'UPDATE images SET STATUS=%s where URL=%s'
                cur.execute(query, ('Processed', image_url))
                logging.info('updated database for {}'.format(image_url))
                cur.close()
                conn.close()
                batch_count = batch_count+1
                response = requests.get(image_url)
                img = Image.open(BytesIO(response.content))
                label = message.value['label']
                infilename = message.value['filename'].rpartition('.')[0]
                logging.info('received URL {}'.format(image_url))
                logging.info('received label {}'.format(label))
                logging.info('received filename {}'.format(infilename))
                logging.info('downloading image')
                image = np.array(img.getdata()).reshape(1,img.size[0], img.size[1], 3).astype('float32')
                logging.info('downloaded image {} and {}'.format(image.shape,image.dtype))
                images = np.ndarray(shape=(2,32,32,3))
                logging.info('created images storage')
                images[0] = image
                logging.info('assigned image to images')
                adversarial = attack.generate(image)
                logging.info('adversarial image generated')
                images[1] = adversarial
                logging.info('adversarial image assigned')
                preds = model.predict(images)
                orig_inf = np.argmax(preds[0])
                adv_inf = np.argmax(preds[1])
                logging.info('original inference: {}  adversarial inference: {}'.format(orig_inf, adv_inf))
                if (orig_inf != adv_inf) and (dbx != None):
                    fs=BytesIO()
                    imout=Image.fromarray(np.uint8(adversarial[0]))
                    imout.save(fs, format='jpeg')
                    outfilename = '/images/{}_{}_adv.jpg'.format(infilename,adv_inf) 
                    logging.info('Uploading file')
                    dbx.files_upload(f=fs.getvalue(), path=outfilename,mode=dropbox.files.WriteMode('overwrite', None))
                if (batch_count == int(args.batchsize)) and (dbx != None):
                    logging.info('Sending message {} to topic {}'.format(batch_status_message,args.writetopic))
                    producer.send(args.writetopic,batch_status_message)
                    batch_count=0
Ejemplo n.º 11
0
def main(args):
    logging.info('brokers={}'.format(args.brokers))
    logging.info('readtopic={}'.format(args.readtopic))
    logging.info('creating kafka consumer')

    consumer = KafkaConsumer(
        args.readtopic,
        bootstrap_servers=args.brokers,
        value_deserializer=lambda val: loads(val.decode('utf-8')))
    logging.info("finished creating kafka consumer")

    if args.dbxtoken != '':
        dbx = dropbox.Dropbox(args.dbxtoken)
    else:
        dbx = None
        logging.info('No Dropbox token provided')

    while True:
        for message in consumer:
            if (message.value['status']
                    == 'Ready') and (message.value['modelurl']):
                logging.info('Received {}'.format(message.value))
                (X_train, y_train), (X_test, y_test) = cifar10.load_data()
                X_train = X_train.reshape(X_train.shape[0], 32, 32,
                                          3).astype('float32')
                X_test = X_test.reshape(X_test.shape[0], 32, 32,
                                        3).astype('float32')
                y_train = to_categorical(y_train, 10)
                y_test = to_categorical(y_test, 10)
                modelurl = message.value['modelurl']
                logging.info('model={}'.format(modelurl))
                model_filename = 'base_model.h5'
                location = os.path.join(ART_DATA_PATH, model_filename)
                try:
                    os.remove(location)
                except OSError as error:
                    pass
                path = get_file(model_filename,
                                extract=False,
                                path=ART_DATA_PATH,
                                url=modelurl)
                kmodel = load_model(path)
                model = KerasClassifier(
                    kmodel,
                    use_logits=False,
                    clip_values=[float(args.min),
                                 float(args.max)])
                logging.info('finished acquiring model')
                imagefiles = dbx.files_list_folder('/images')
                adversaries = False
                for dbximage in imagefiles.entries:
                    filepath = '/images/' + dbximage.name
                    filename = dbximage.name
                    label = filename.split('_')[-3]
                    response = dbx.files_download(filepath)[1]
                    img = Image.open(BytesIO(response.content))
                    logging.info('downloaded file {}'.format(dbximage.name))
                    image = np.array(img.getdata()).reshape(
                        1, img.size[0], img.size[1], 3).astype('float32')
                    if adversaries is False:
                        X_adv = image
                        y_adv = [label]
                        adversaries = True
                    else:
                        X_adv = np.append(X_adv, image, axis=0)
                        y_adv = np.append(y_adv, [label], axis=0)
                y_adv = to_categorical(y_adv, 10)
                X_train = np.append(X_train, X_adv, axis=0)
                y_train = np.append(y_train, y_adv, axis=0)
                if args.testmode == '0':
                    model.fit(X_train, y_train, nb_epochs=83,
                              batch_size=50)  # Per ART 360 example
                else:
                    model.fit(X_train, y_train, nb_epochs=1,
                              batch_size=50)  # Testing only
                model_basename = model_filename.split('.')[0]
                adv_model_name = model_basename + '_adv'
                adv_model_filename = adv_model_name + '.h5'
                model.save(adv_model_filename)
                outfilename = '/models/{}'.format(adv_model_filename)
                adv_model_fullpath = '/opt/app-root/src/.art/data/' + adv_model_filename
                mfile = open(adv_model_fullpath, 'rb')
                dbx.files_upload(f=mfile.read(),
                                 path=outfilename,
                                 mode=dropbox.files.WriteMode(
                                     'overwrite', None))
                mfile.close()
                share_link = dbx.sharing_create_shared_link_with_settings(
                    outfilename)

                conn = psycopg2.connect(host=args.dbhost,
                                        port=5432,
                                        dbname=args.dbname,
                                        user=args.dbusername,
                                        password=args.dbpassword)
                cur = conn.cursor()
                query = 'INSERT into models(URL, FILENAME, MODELNAME) VALUES(%s, %s, %s)'
                cur.execute(query,
                            (share_link, adv_model_filename, adv_model_name))
                conn.commit()
                logging.info('updated database with new model')
                cur.close()
                conn.close()
Ejemplo n.º 12
0
x_train = x_train[0:num_samples_train]  # 取训练集50000张的前100张
y_train = y_train[0:num_samples_train]  # 100
x_test = x_test[0:num_samples_test]  # 取训练集10000张的前100张
y_test = y_test[0:num_samples_test]  # 100
# 10个类
class_descr = [
    'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
    'ship', 'truck'
]

# 2. 评估分类器

# 2.1 加载预训练分类器(一种ResNet架构)
path = get_file(
    'cifar_resnet.h5',
    extract=False,
    path=DATA_PATH,
    url='https://www.dropbox.com/s/ta75pl4krya5djj/cifar_resnet.h5?dl=1')
classifier_model = load_model(path)
classifier = KerasClassifier(clip_values=(min_, max_),
                             model=classifier_model,
                             use_logits=False,
                             preprocessing=(0.5, 1))
classifier_model.summary()
"""
Total params: 470,218
Trainable params: 467,946
Non-trainable params: 2,272
"""

# 2.2 在前100张测试图像上评估分类器