def _check_and_download_file(uri: str, basename: str, *paths: str) -> str: """Check and download the file from given URI.""" dir_path = os.path.join(*paths) file_path = os.path.join(dir_path, basename) if not os.path.isdir(dir_path): make_directory(dir_path) if not os.path.isfile(file_path): logger.info("Could not find %s. Downloading it now...", basename) get_file(basename, uri, path=dir_path) return file_path
def GetCifar10WithModel(): """ Function: Load cifar-10 dataset and load a pre-trained cifar10 model. """ (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('cifar10') num_samples_train = 100 num_samples_test = 100 x_train = x_train[0:num_samples_train] y_train = y_train[0:num_samples_train] x_test = x_test[0:num_samples_test] y_test = y_test[0:num_samples_test] class_descr = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] path = get_file( 'cifar_resnet.h5', extract=False, path=DATA_PATH, url='https://www.dropbox.com/s/ta75pl4krya5djj/cifar_resnet.h5?dl=1') classifier_model = load_model(path) # classifier_model.summary() return x_train, y_train, x_test, y_test, classifier_model, min_, max_
def test_loss_gradient(self, art_warning): try: transcripts = list() audios = list() for filename, sample in self.samples.items(): file_path = get_file(filename, sample["uri"]) _, audio = read(file_path) audios.append(audio) transcripts.append(sample["transcript"]) audio_batch = np.array(audios, dtype=object) target_batch = np.array(transcripts) lingvo = TensorFlowLingvoASR() gradient_batch = lingvo._loss_gradient_per_batch( audio_batch, target_batch) gradient_sequence = lingvo._loss_gradient_per_sequence( audio_batch, target_batch) gradient_batch_sum = np.array( [np.abs(gb).sum() for gb in gradient_batch], dtype=object) gradient_sequence_sum = np.array( [np.abs(gs).sum() for gs in gradient_sequence], dtype=object) # test loss gradients per batch and per sequence are the same assert_allclose(gradient_sequence_sum, gradient_batch_sum, rtol=1e-05) # test gradient_batch, gradient_sequence and audios items have same shapes assert ([gb.shape for gb in gradient_batch] == [gs.shape for gs in gradient_sequence] == [a.shape for a in audios]) except ARTTestException as e: art_warning(e)
def test_predict(self, art_warning): try: transcripts = list() audios = list() for filename, sample in self.samples.items(): file_path = get_file(filename, sample["uri"]) _, audio = read(file_path) audios.append(audio) transcripts.append(sample["transcript"]) audio_batch = np.array(audios, dtype=object) lingvo = TensorFlowLingvoASR() prediction = lingvo.predict(audio_batch, batch_size=1) assert prediction[0] == transcripts[0] except ARTTestException as e: art_warning(e)
def _load_model( images: "tf.Tensor", filename: Optional[str] = None, url: Optional[str] = None, obj_detection_model: Optional["FasterRCNNMetaArch"] = None, is_training: bool = False, groundtruth_boxes_list: Optional[List["tf.Tensor"]] = None, groundtruth_classes_list: Optional[List["tf.Tensor"]] = None, groundtruth_weights_list: Optional[List["tf.Tensor"]] = None, ) -> Tuple[Dict[str, "tf.Tensor"], ...]: """ Download, extract and load a model from a URL if it not already in the cache. The file at indicated by `url` is downloaded to the path ~/.art/data and given the name `filename`. Files in tar, tar.gz, tar.bz, and zip formats will also be extracted. Then the model is loaded, pipelined and its outputs are returned as a tuple of (predictions, losses, detections). :param images: Input samples of shape (nb_samples, height, width, nb_channels). :param filename: Name of the file. :param url: Download URL. :param is_training: A boolean indicating whether the training version of the computation graph should be constructed. :param groundtruth_boxes_list: A list of 2-D tf.float32 tensors of shape [num_boxes, 4] containing coordinates of the groundtruth boxes. Groundtruth boxes are provided in [y_min, x_min, y_max, x_max] format and also assumed to be normalized and clipped relative to the image window with conditions y_min <= y_max and x_min <= x_max. :param groundtruth_classes_list: A list of 1-D tf.float32 tensors of shape [num_boxes] containing the class targets with the zero index which is assumed to map to the first non-background class. :param groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape [num_boxes] containing weights for groundtruth boxes. :return: A tuple of (predictions, losses, detections): - predictions: a dictionary holding "raw" prediction tensors. - losses: a dictionary mapping loss keys (`Loss/RPNLoss/localization_loss`, `Loss/RPNLoss/objectness_loss`, `Loss/BoxClassifierLoss/localization_loss`, `Loss/BoxClassifierLoss/classification_loss`) to scalar tensors representing corresponding loss values. - detections: a dictionary containing final detection results. """ import tensorflow.compat.v1 as tf # lgtm [py/repeated-import] from object_detection.utils import variables_helper if obj_detection_model is None: from object_detection.utils import config_util from object_detection.builders import model_builder # If obj_detection_model is None, then we need to have parameters filename and url to download, extract # and load the object detection model if filename is None or url is None: # pragma: no cover raise ValueError( "Need input parameters `filename` and `url` to download, " "extract and load the object detection model." ) # Download and extract path = get_file(filename=filename, path=config.ART_DATA_PATH, url=url, extract=True) # Load model config pipeline_config = path + "/pipeline.config" configs = config_util.get_configs_from_pipeline_file(pipeline_config) configs["model"].faster_rcnn.second_stage_batch_size = configs[ "model" ].faster_rcnn.first_stage_max_proposals # Load model obj_detection_model = model_builder.build( model_config=configs["model"], is_training=is_training, add_summaries=False ) # Provide groundtruth if groundtruth_classes_list is not None: groundtruth_classes_list = [ tf.one_hot(groundtruth_class, obj_detection_model.num_classes) for groundtruth_class in groundtruth_classes_list ] obj_detection_model.provide_groundtruth( groundtruth_boxes_list=groundtruth_boxes_list, groundtruth_classes_list=groundtruth_classes_list, groundtruth_weights_list=groundtruth_weights_list, ) # Create model pipeline images *= 255.0 preprocessed_images, true_image_shapes = obj_detection_model.preprocess(images) predictions = obj_detection_model.predict(preprocessed_images, true_image_shapes) losses = obj_detection_model.loss(predictions, true_image_shapes) detections = obj_detection_model.postprocess(predictions, true_image_shapes) # Initialize variables from checkpoint # Get variables to restore variables_to_restore = obj_detection_model.restore_map( fine_tune_checkpoint_type="detection", load_all_detection_checkpoint_vars=True ) # Get variables from checkpoint fine_tune_checkpoint_path = path + "/model.ckpt" vars_in_ckpt = variables_helper.get_variables_available_in_checkpoint( variables_to_restore, fine_tune_checkpoint_path, include_global_step=False ) # Initialize from checkpoint tf.train.init_from_checkpoint(fine_tune_checkpoint_path, vars_in_ckpt) return obj_detection_model, predictions, losses, detections
from deepfool import DeepFool from dynamic_deepfool import DynamicDeepFool import numpy as np import timeit # %matplotlib inline import matplotlib.pyplot as plt import tensorflow as tf # tf.compat.v1.disable_eager_execution() tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) print(tf.__version__) (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('mnist') path = get_file( 'mnist_cnn_original.h5', extract=False, path=ART_DATA_PATH, url='https://www.dropbox.com/s/p2nyzne9chcerid/mnist_cnn_original.h5?dl=1') classifier_model = load_model(path) # (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('cifar10') # path = get_file('cifar_resnet.h5',extract=False, path=ART_DATA_PATH, # url='https://www.dropbox.com/s/ta75pl4krya5djj/cifar_resnet.h5?dl=1') # classifier_model = load_model(path) # # Discarded iris and stl10 dataset # (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('stl10') # (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('iris') # # path = get_file('cifar_resnet.h5',extract=False, path=ART_DATA_PATH, # # url='https://www.dropbox.com/s/ta75pl4krya5djj/cifar_resnet.h5?dl=1') # # classifier_model = load_model(path)
num_samples_train = 1000 num_samples_test = 1000 x_train = x_train[0:num_samples_train] y_train = y_train[0:num_samples_train] x_test = x_test[0:num_samples_test] y_test = y_test[0:num_samples_test] class_descr = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] path = get_file( 'cifar_alexnet.h5', extract=False, path=config.ART_DATA_PATH, url='https://www.dropbox.com/s/ta75pl4krya5djj/cifar_alexnet.h5?dl=1') classifier_model = load_model(path) classifier = KerasClassifier(clip_values=(min_, max_), model=classifier_model, use_logits=False, preprocessing=(0.5, 1)) target_class = "bird" # one of ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] target_label = np.zeros(len(class_descr)) target_label[class_descr.index(target_class)] = 1 target_instance = np.expand_dims( x_test[np.argmax(y_test, axis=1) == class_descr.index(target_class)][3], axis=0)
def __init__( self, espresso_config_filepath: Optional[str] = None, model: Optional[str] = None, clip_values: Optional["CLIP_VALUES_TYPE"] = None, preprocessing_defences: Union["Preprocessor", List["Preprocessor"], None] = None, postprocessing_defences: Union["Postprocessor", List["Postprocessor"], None] = None, preprocessing: "PREPROCESSING_TYPE" = None, device_type: str = "gpu", verbose: bool = True, ): """ Initialization of an instance PyTorchEspresso :param espresso_config_filepath: The path of the espresso config file (yaml) :param model: The choice of pretrained model if a pretrained model is required. :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and maximum values allowed for features. If floats are provided, these will be used as the range of all features. If arrays are provided, each value will be considered the bound for a feature, thus the shape of clip values needs to match the total number of features. :param preprocessing_defences: Preprocessing defence(s) to be applied by the estimator. :param postprocessing_defences: Postprocessing defence(s) to be applied by the estimator. :param preprocessing: Tuple of the form `(subtrahend, divisor)` of floats or `np.ndarray` of values to be used for data preprocessing. The first value will be subtracted from the input. The input will then be divided by the second one. :param device_type: Type of device to be used for model and tensors, if `cpu` run on CPU, if `gpu` run on GPU if available otherwise run on CPU. """ import torch # lgtm [py/repeated-import] import yaml from fairseq import checkpoint_utils, tasks, utils from fairseq.data import encoders import sentencepiece as spm # Super initialization super().__init__( model=None, clip_values=clip_values, channels_first=None, preprocessing_defences=preprocessing_defences, postprocessing_defences=postprocessing_defences, preprocessing=preprocessing, ) self.verbose = verbose # Check clip values if self.clip_values is not None: if not np.all(self.clip_values[0] == -1): # pragma: no cover raise ValueError( "This estimator requires normalized input audios with clip_vales=(-1, 1)." ) if not np.all(self.clip_values[1] == 1): # pragma: no cover raise ValueError( "This estimator requires normalized input audios with clip_vales=(-1, 1)." ) # Check postprocessing defences if self.postprocessing_defences is not None: # pragma: no cover raise ValueError( "This estimator does not support `postprocessing_defences`.") # Set cpu/gpu device self._device: torch.device if device_type == "cpu" or not torch.cuda.is_available(): self._device = torch.device("cpu") else: # pragma: no cover cuda_idx = torch.cuda.current_device() self._device = torch.device("cuda:{}".format(cuda_idx)) # Load config/model if espresso_config_filepath is None: if model == "librispeech_transformer": config_filename, config_url = ( "libri960_transformer.yaml", "https://github.com/YiwenShaoStephen/espresso/releases/download/v0.1-alpha/" "libri960_transformer.yaml", ) model_filename, model_url = ( "checkpoint_best.pt", "https://github.com/YiwenShaoStephen/espresso/releases/download/v0.1-alpha/checkpoint_best.pt", ) sp_filename, sp_url = ( "train_960_unigram5000.model", "https://github.com/YiwenShaoStephen/espresso/releases/download/v0.1-alpha/" "train_960_unigram5000.model", ) dict_filename, dict_url = ( "train_960_unigram5000_units.txt", "https://github.com/YiwenShaoStephen/espresso/releases/download/v0.1-alpha/" "train_960_unigram5000_units.txt", ) else: # pragma: no cover raise ValueError("Model not recognised.") # Download files config_path = get_file(filename=config_filename, path=config.ART_DATA_PATH, url=config_url, extract=False, verbose=self.verbose) model_path = get_file(filename=model_filename, path=config.ART_DATA_PATH, url=model_url, extract=False, verbose=self.verbose) sp_path = get_file(filename=sp_filename, path=config.ART_DATA_PATH, url=sp_url, extract=False, verbose=self.verbose) dict_path = get_file(filename=dict_filename, path=config.ART_DATA_PATH, url=dict_url, extract=False, verbose=self.verbose) # construct espresso args with open(config_path) as file: esp_args_dict = yaml.load(file, Loader=yaml.FullLoader) esp_args = Namespace(**esp_args_dict) if espresso_config_filepath is None: # overwrite paths in downloaded config with the actual ones esp_args.path = model_path esp_args.sentencepiece_model = sp_path esp_args.dict = dict_path self.esp_args = esp_args # setup espresso/fairseq task self.task = tasks.setup_task(self.esp_args) self.task.feat_dim = self.esp_args.feat_dim # load_model_ensemble self._models, self._model_args = checkpoint_utils.load_model_ensemble( utils.split_paths(self.esp_args.path), arg_overrides=ast.literal_eval(self.esp_args.model_overrides), task=self.task, suffix=getattr(self.esp_args, "checkpoint_suffix", ""), ) for m in self._models: m.to(self._device) self._model = self._models[0] self.dictionary = self.task.target_dictionary self.generator = self.task.build_generator(self._models, self.esp_args) self.tokenizer = encoders.build_tokenizer(self.esp_args) self.bpe = encoders.build_bpe(self.esp_args) # bpe encoder self.spp = spm.SentencePieceProcessor() # sentence piece model self.spp.Load(self.esp_args.sentencepiece_model) self.criterion = self.task.build_criterion(self.esp_args) self._sampling_rate = self.esp_args.sampling_rate
def __init__( self, model: Optional["DeepSpeech"] = None, pretrained_model: Optional[str] = None, filename: Optional[str] = None, url: Optional[str] = None, use_half: bool = False, optimizer: Optional["torch.optim.Optimizer"] = None, # type: ignore use_amp: bool = False, opt_level: str = "O1", decoder_type: str = "greedy", lm_path: str = "", top_paths: int = 1, alpha: float = 0.0, beta: float = 0.0, cutoff_top_n: int = 40, cutoff_prob: float = 1.0, beam_width: int = 10, lm_workers: int = 4, clip_values: Optional["CLIP_VALUES_TYPE"] = None, preprocessing_defences: Union["Preprocessor", List["Preprocessor"], None] = None, postprocessing_defences: Union["Postprocessor", List["Postprocessor"], None] = None, preprocessing: "PREPROCESSING_TYPE" = None, device_type: str = "gpu", verbose: bool = True, ): """ Initialization of an instance PyTorchDeepSpeech. :param model: DeepSpeech model. :param pretrained_model: The choice of pretrained model if a pretrained model is required. Currently this estimator supports 3 different pretrained models consisting of `an4`, `librispeech` and `tedlium`. :param filename: Name of the file. :param url: Download URL. :param use_half: Whether to use FP16 for pretrained model. :param optimizer: The optimizer used to train the estimator. :param use_amp: Whether to use the automatic mixed precision tool to enable mixed precision training or gradient computation, e.g. with loss gradient computation. When set to True, this option is only triggered if there are GPUs available. :param opt_level: Specify a pure or mixed precision optimization level. Used when use_amp is True. Accepted values are `O0`, `O1`, `O2`, and `O3`. :param decoder_type: Decoder type. Either `greedy` or `beam`. This parameter is only used when users want transcription outputs. :param lm_path: Path to an (optional) kenlm language model for use with beam search. This parameter is only used when users want transcription outputs. :param top_paths: Number of beams to be returned. This parameter is only used when users want transcription outputs. :param alpha: The weight used for the language model. This parameter is only used when users want transcription outputs. :param beta: Language model word bonus (all words). This parameter is only used when users want transcription outputs. :param cutoff_top_n: Cutoff_top_n characters with highest probs in vocabulary will be used in beam search. This parameter is only used when users want transcription outputs. :param cutoff_prob: Cutoff probability in pruning. This parameter is only used when users want transcription outputs. :param beam_width: The width of beam to be used. This parameter is only used when users want transcription outputs. :param lm_workers: Number of language model processes to use. This parameter is only used when users want transcription outputs. :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and maximum values allowed for features. If floats are provided, these will be used as the range of all features. If arrays are provided, each value will be considered the bound for a feature, thus the shape of clip values needs to match the total number of features. :param preprocessing_defences: Preprocessing defence(s) to be applied by the estimator. :param postprocessing_defences: Postprocessing defence(s) to be applied by the estimator. :param preprocessing: Tuple of the form `(subtrahend, divisor)` of floats or `np.ndarray` of values to be used for data preprocessing. The first value will be subtracted from the input. The input will then be divided by the second one. :param device_type: Type of device to be used for model and tensors, if `cpu` run on CPU, if `gpu` run on GPU if available otherwise run on CPU. """ import torch # lgtm [py/repeated-import] from deepspeech_pytorch.configs.inference_config import LMConfig from deepspeech_pytorch.enums import DecoderType from deepspeech_pytorch.utils import load_decoder, load_model # Super initialization super().__init__( model=None, clip_values=clip_values, channels_first=None, preprocessing_defences=preprocessing_defences, postprocessing_defences=postprocessing_defences, preprocessing=preprocessing, ) self.verbose = verbose # Check clip values if self.clip_values is not None: if not np.all(self.clip_values[0] == -1): raise ValueError( "This estimator requires normalized input audios with clip_vales=(-1, 1)." ) if not np.all(self.clip_values[1] == 1): raise ValueError( "This estimator requires normalized input audios with clip_vales=(-1, 1)." ) # Check postprocessing defences if self.postprocessing_defences is not None: raise ValueError( "This estimator does not support `postprocessing_defences`.") # Set cpu/gpu device self._device: torch.device if device_type == "cpu" or not torch.cuda.is_available(): self._device = torch.device("cpu") else: cuda_idx = torch.cuda.current_device() self._device = torch.device("cuda:{}".format(cuda_idx)) self._input_shape = None # Load model if model is None: if pretrained_model == "an4": filename, url = ( "an4_pretrained_v2.pth", "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/an4_pretrained_v2.pth", ) elif pretrained_model == "librispeech": filename, url = ( "librispeech_pretrained_v2.pth", "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/" "librispeech_pretrained_v2.pth", ) elif pretrained_model == "tedlium": filename, url = ( "ted_pretrained_v2.pth", "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/ted_pretrained_v2.pth", ) elif pretrained_model is None: # If model is None and no pretrained model is selected, then we need to have parameters filename and # url to download, extract and load the automatic speech recognition model if filename is None or url is None: filename, url = ( "librispeech_pretrained_v2.pth", "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/" "librispeech_pretrained_v2.pth", ) else: raise ValueError( "The input pretrained model %s is not supported." % pretrained_model) # Download model model_path = get_file(filename=filename, path=config.ART_DATA_PATH, url=url, extract=False, verbose=self.verbose) # Then load model self._model = load_model(device=self._device, model_path=model_path, use_half=use_half) else: self._model = model # Push model to the corresponding device self._model.to(self._device) # Save first version of the optimizer self._optimizer = optimizer self._use_amp = use_amp # Now create a decoder # Create the language model config first lm_config = LMConfig() # Then setup the config if decoder_type == "greedy": lm_config.decoder_type = DecoderType.greedy elif decoder_type == "beam": lm_config.decoder_type = DecoderType.beam else: raise ValueError("Decoder type %s currently not supported." % decoder_type) lm_config.lm_path = lm_path lm_config.top_paths = top_paths lm_config.alpha = alpha lm_config.beta = beta lm_config.cutoff_top_n = cutoff_top_n lm_config.cutoff_prob = cutoff_prob lm_config.beam_width = beam_width lm_config.lm_workers = lm_workers # Create the decoder with the lm config self.decoder = load_decoder(labels=self._model.labels, cfg=lm_config) # Setup for AMP use if self._use_amp: from apex import amp if self._optimizer is None: logger.warning( "An optimizer is needed to use the automatic mixed precision tool, but none for provided. " "A default optimizer is used.") # Create the optimizers parameters = self._model.parameters() self._optimizer = torch.optim.SGD(parameters, lr=0.01) if self._device.type == "cpu": enabled = False else: enabled = True self._model, self._optimizer = amp.initialize( models=self._model, optimizers=self._optimizer, enabled=enabled, opt_level=opt_level, loss_scale=1.0, )
def main(args): batch_status_message = {'status':'Ready','modelurl':args.model} batch_count = 0 model_filename = 'base_model.h5' logging.info('model={}'.format(args.model)) location = os.path.join(ART_DATA_PATH, model_filename) try: os.remove(location) except OSError as error: pass path = get_file(model_filename, extract=False, path=ART_DATA_PATH, url=args.model) kmodel = load_model(path) model = KerasClassifier(kmodel, use_logits=False, clip_values=[float(args.min),float(args.max)]) logging.info('finished acquiring model') logging.info('creating attack {}'.format(args.attack)) if args.attack == 'FGM': attack = FastGradientMethod(model, eps=0.3, eps_step=0.01, targeted=False) logging.info('created FGM attack') elif args.attack == 'PGD': attack = ProjectedGradientDescent(model, eps=8, eps_step=2, max_iter=13, targeted=False, num_random_init=True) logging.info('created PGD attack') else: logging.error('Invalid attack provided {} must be one of {FGM, PGD}'.format(args.attack)) exit(0) logging.info('finished creating attack') logging.info('brokers={}'.format(args.brokers)) logging.info('readtopic={}'.format(args.readtopic)) logging.info('creating kafka consumer') consumer = KafkaConsumer( args.readtopic, bootstrap_servers=args.brokers, value_deserializer=lambda val: loads(val.decode('utf-8'))) logging.info("finished creating kafka consumer") if args.dbxtoken != '': dbx = dropbox.Dropbox(args.dbxtoken) logging.info('creating kafka producer') producer = KafkaProducer(bootstrap_servers=args.brokers, value_serializer=lambda x: dumps(x).encode('utf-8')) logging.info('finished creating kafka producer') else: dbx = None while True: for message in consumer: if message.value['url']: conn = psycopg2.connect( host = args.dbhost, port = 5432, dbname = args.dbname, user = args.dbusername, password = args.dbpassword) cur = conn.cursor() image_url = message.value['url'] query = 'UPDATE images SET STATUS=%s where URL=%s' cur.execute(query, ('Processed', image_url)) logging.info('updated database for {}'.format(image_url)) cur.close() conn.close() batch_count = batch_count+1 response = requests.get(image_url) img = Image.open(BytesIO(response.content)) label = message.value['label'] infilename = message.value['filename'].rpartition('.')[0] logging.info('received URL {}'.format(image_url)) logging.info('received label {}'.format(label)) logging.info('received filename {}'.format(infilename)) logging.info('downloading image') image = np.array(img.getdata()).reshape(1,img.size[0], img.size[1], 3).astype('float32') logging.info('downloaded image {} and {}'.format(image.shape,image.dtype)) images = np.ndarray(shape=(2,32,32,3)) logging.info('created images storage') images[0] = image logging.info('assigned image to images') adversarial = attack.generate(image) logging.info('adversarial image generated') images[1] = adversarial logging.info('adversarial image assigned') preds = model.predict(images) orig_inf = np.argmax(preds[0]) adv_inf = np.argmax(preds[1]) logging.info('original inference: {} adversarial inference: {}'.format(orig_inf, adv_inf)) if (orig_inf != adv_inf) and (dbx != None): fs=BytesIO() imout=Image.fromarray(np.uint8(adversarial[0])) imout.save(fs, format='jpeg') outfilename = '/images/{}_{}_adv.jpg'.format(infilename,adv_inf) logging.info('Uploading file') dbx.files_upload(f=fs.getvalue(), path=outfilename,mode=dropbox.files.WriteMode('overwrite', None)) if (batch_count == int(args.batchsize)) and (dbx != None): logging.info('Sending message {} to topic {}'.format(batch_status_message,args.writetopic)) producer.send(args.writetopic,batch_status_message) batch_count=0
def main(args): logging.info('brokers={}'.format(args.brokers)) logging.info('readtopic={}'.format(args.readtopic)) logging.info('creating kafka consumer') consumer = KafkaConsumer( args.readtopic, bootstrap_servers=args.brokers, value_deserializer=lambda val: loads(val.decode('utf-8'))) logging.info("finished creating kafka consumer") if args.dbxtoken != '': dbx = dropbox.Dropbox(args.dbxtoken) else: dbx = None logging.info('No Dropbox token provided') while True: for message in consumer: if (message.value['status'] == 'Ready') and (message.value['modelurl']): logging.info('Received {}'.format(message.value)) (X_train, y_train), (X_test, y_test) = cifar10.load_data() X_train = X_train.reshape(X_train.shape[0], 32, 32, 3).astype('float32') X_test = X_test.reshape(X_test.shape[0], 32, 32, 3).astype('float32') y_train = to_categorical(y_train, 10) y_test = to_categorical(y_test, 10) modelurl = message.value['modelurl'] logging.info('model={}'.format(modelurl)) model_filename = 'base_model.h5' location = os.path.join(ART_DATA_PATH, model_filename) try: os.remove(location) except OSError as error: pass path = get_file(model_filename, extract=False, path=ART_DATA_PATH, url=modelurl) kmodel = load_model(path) model = KerasClassifier( kmodel, use_logits=False, clip_values=[float(args.min), float(args.max)]) logging.info('finished acquiring model') imagefiles = dbx.files_list_folder('/images') adversaries = False for dbximage in imagefiles.entries: filepath = '/images/' + dbximage.name filename = dbximage.name label = filename.split('_')[-3] response = dbx.files_download(filepath)[1] img = Image.open(BytesIO(response.content)) logging.info('downloaded file {}'.format(dbximage.name)) image = np.array(img.getdata()).reshape( 1, img.size[0], img.size[1], 3).astype('float32') if adversaries is False: X_adv = image y_adv = [label] adversaries = True else: X_adv = np.append(X_adv, image, axis=0) y_adv = np.append(y_adv, [label], axis=0) y_adv = to_categorical(y_adv, 10) X_train = np.append(X_train, X_adv, axis=0) y_train = np.append(y_train, y_adv, axis=0) if args.testmode == '0': model.fit(X_train, y_train, nb_epochs=83, batch_size=50) # Per ART 360 example else: model.fit(X_train, y_train, nb_epochs=1, batch_size=50) # Testing only model_basename = model_filename.split('.')[0] adv_model_name = model_basename + '_adv' adv_model_filename = adv_model_name + '.h5' model.save(adv_model_filename) outfilename = '/models/{}'.format(adv_model_filename) adv_model_fullpath = '/opt/app-root/src/.art/data/' + adv_model_filename mfile = open(adv_model_fullpath, 'rb') dbx.files_upload(f=mfile.read(), path=outfilename, mode=dropbox.files.WriteMode( 'overwrite', None)) mfile.close() share_link = dbx.sharing_create_shared_link_with_settings( outfilename) conn = psycopg2.connect(host=args.dbhost, port=5432, dbname=args.dbname, user=args.dbusername, password=args.dbpassword) cur = conn.cursor() query = 'INSERT into models(URL, FILENAME, MODELNAME) VALUES(%s, %s, %s)' cur.execute(query, (share_link, adv_model_filename, adv_model_name)) conn.commit() logging.info('updated database with new model') cur.close() conn.close()
x_train = x_train[0:num_samples_train] # 取训练集50000张的前100张 y_train = y_train[0:num_samples_train] # 100 x_test = x_test[0:num_samples_test] # 取训练集10000张的前100张 y_test = y_test[0:num_samples_test] # 100 # 10个类 class_descr = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] # 2. 评估分类器 # 2.1 加载预训练分类器(一种ResNet架构) path = get_file( 'cifar_resnet.h5', extract=False, path=DATA_PATH, url='https://www.dropbox.com/s/ta75pl4krya5djj/cifar_resnet.h5?dl=1') classifier_model = load_model(path) classifier = KerasClassifier(clip_values=(min_, max_), model=classifier_model, use_logits=False, preprocessing=(0.5, 1)) classifier_model.summary() """ Total params: 470,218 Trainable params: 467,946 Non-trainable params: 2,272 """ # 2.2 在前100张测试图像上评估分类器