Example #1
0
    def _gather_env_variables(self):
        """
        Update the extra env variable dictionary to pass into container or run on host
        """
        self.extra_env_vars["ARMORY_GITHUB_TOKEN"] = os.getenv(
            "ARMORY_GITHUB_TOKEN", default="")
        self.extra_env_vars["ARMORY_PRIVATE_S3_ID"] = os.getenv(
            "ARMORY_PRIVATE_S3_ID", default="")
        self.extra_env_vars["ARMORY_PRIVATE_S3_KEY"] = os.getenv(
            "ARMORY_PRIVATE_S3_KEY", default="")
        self.extra_env_vars["ARMORY_INCLUDE_SUBMISSION_BUCKETS"] = os.getenv(
            "ARMORY_INCLUDE_SUBMISSION_BUCKETS", default="")

        if not self.armory_global_config["verify_ssl"]:
            self.extra_env_vars["VERIFY_SSL"] = "false"

        if self.config["sysconfig"].get("use_gpu", None):
            gpus = self.config["sysconfig"].get("gpus")
            if gpus is not None:
                self.extra_env_vars["NVIDIA_VISIBLE_DEVICES"] = gpus
        if self.config["sysconfig"].get("set_pythonhashseed"):
            self.extra_env_vars["PYTHONHASHSEED"] = "0"

        # Because we may want to allow specification of ARMORY_TORCH_HOME
        # this constant path is placed here among the other imports
        self.extra_env_vars["TORCH_HOME"] = paths.runtime_paths().pytorch_dir

        self.extra_env_vars[environment.ARMORY_VERSION] = armory.__version__
Example #2
0
File: base.py Project: ajati/armory
def _scenario_setup(config: dict):
    """
    Creates scenario specific tmp and output directiories.

    Also pulls external repositories ahead of running the scenario in case the scenario
    itself is found in the external repository.
    """

    runtime_paths = paths.runtime_paths()

    scenario_output_dir = os.path.join(runtime_paths.output_dir,
                                       config["eval_id"])
    #scenario_tmp_dir = os.path.join(runtime_paths.tmp_dir, config["eval_id"])
    scenario_tmp_dir = os.path.join(runtime_paths.tmp_dir)
    os.makedirs(scenario_output_dir, exist_ok=True)
    os.makedirs(scenario_tmp_dir, exist_ok=True)
    logger.warning(f"Outputs will be written to {scenario_output_dir}")

    # Download any external repositories and add them to the sys path for use
    if config["sysconfig"].get("external_github_repo", None):
        external_repo_dir = os.path.join(scenario_tmp_dir, "external")
        external_repo.download_and_extract_repos(
            config["sysconfig"]["external_github_repo"],
            external_repo_dir=external_repo_dir,
        )
Example #3
0
 def fit(self, *args, save_weights_file=None, **kwargs):
     super(SmoothedDeepSpeech, self).fit(*args, **kwargs)
     if save_weights_file:
         saved_model_dir = paths.runtime_paths().saved_model_dir
         save_weights_path = os.path.join(saved_model_dir,
                                          save_weights_file)
         dic = self._model.state_dict()
         torch.save(dic, save_weights_path)
Example #4
0
    def _run_interactive_bash(
        self,
        runner: ArmoryInstance,
        check_run=False,
        num_eval_batches=None,
        skip_benign=None,
        skip_attack=None,
        validate_config=None,
    ) -> None:
        user_group_id = self.get_id()
        lines = [
            "Container ready for interactive use.",
            bold(
                "*** In a new terminal, run the following to attach to the container:"
            ),
            bold(
                red(
                    f"    docker exec -it -u {user_group_id} {runner.docker_container.short_id} bash"
                )
            ),
        ]
        if self.config.get("scenario"):
            options = self._build_options(
                check_run=check_run,
                num_eval_batches=num_eval_batches,
                skip_benign=skip_benign,
                skip_attack=skip_attack,
                validate_config=validate_config,
            )
            tmp_dir = os.path.join(self.host_paths.tmp_dir, self.config["eval_id"])
            os.makedirs(tmp_dir)
            self.tmp_config = os.path.join(tmp_dir, "interactive-config.json")
            docker_config_path = os.path.join(
                paths.runtime_paths().tmp_dir,
                self.config["eval_id"],
                "interactive-config.json",
            )
            with open(self.tmp_config, "w") as f:
                f.write(json.dumps(self.config, sort_keys=True, indent=4) + "\n")

            lines.extend(
                [
                    bold("*** To run your scenario in the container:"),
                    bold(
                        red(
                            f"    python -m armory.scenarios.base {docker_config_path}{options} --load-config-from-file"
                        )
                    ),
                    bold("*** To gracefully shut down container, press: Ctrl-C"),
                    "",
                ]
            )
        logger.info("\n".join(lines))
        while True:
            time.sleep(1)
Example #5
0
def maybe_download_weights_from_s3(weights_file: str) -> str:
    """

    :param weights_file:
    :return:
    """
    saved_model_dir = paths.runtime_paths().saved_model_dir
    filepath = os.path.join(saved_model_dir, weights_file)

    if os.path.isfile(filepath):
        logger.info(f"Using available {weights_file} in Armory `saved_model_dir`")
    else:
        logger.info(
            f"{weights_file} not found in Armory `saved_model_dir`. Attempting to pull weights from S3"
        )
        try:
            download_file_from_s3(
                "armory-public-data",
                f"model-weights/{weights_file}",
                f"{saved_model_dir}/{weights_file}",
            )
        except KeyError:
            if (
                "ARMORY_INCLUDE_SUBMISSION_BUCKETS" in os.environ
                and os.getenv("ARMORY_INCLUDE_SUBMISSION_BUCKETS") != ""
            ):
                try:
                    download_private_file_from_s3(
                        "armory-submission-data",
                        f"model-weights/{weights_file}",
                        f"{saved_model_dir}/{weights_file}",
                    )

                except KeyError:
                    raise ValueError(
                        (
                            f"{weights_file} was not found in the armory public & submission S3 buckets."
                        )
                    )
            else:
                raise ValueError(
                    (
                        f"{weights_file} was not found in the armory S3 bucket. If "
                        "you're attempting to load a custom set of weights for "
                        "your model be sure that they are available in the armory "
                        "`saved_model_dir` directory on your host environment."
                    )
                )
    return filepath
Example #6
0
 def __init__(self, scheme, exec_path, return_all=False):
     self.rover_path = exec_path
     self.rover_directory = os.path.join(paths.runtime_paths().tmp_dir,
                                         "rover")
     if not os.path.exists(self.rover_directory):
         os.makedirs(self.rover_directory)
     self.outfile = os.path.join(self.rover_directory, 'out.txt')
     if scheme == 'freq':
         self.rover_options = ['-m', "avgconf", "-a", "1.0", "-c", '0.0']
     elif scheme == 'conf':
         self.rover_options = ['-m', "avgconf"]
     else:
         assert scheme == 'max'
         self.rover_options = ['-m', 'maxconf']
     self.return_all = return_all
Example #7
0
def get_art_model(model_kwargs, wrapper_kwargs, weights_file=None):
    input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
    labels_ph = tf.placeholder(tf.int32, shape=[None, 10])
    training_ph = tf.placeholder(tf.bool, shape=())

    x = tf.layers.conv2d(input_ph,
                         filters=4,
                         kernel_size=(5, 5),
                         activation=tf.nn.relu)
    x = tf.layers.max_pooling2d(x, 2, 2)
    x = tf.layers.conv2d(x,
                         filters=10,
                         kernel_size=(5, 5),
                         activation=tf.nn.relu)
    x = tf.layers.max_pooling2d(x, 2, 2)
    x = tf.layers.flatten(x)
    x = tf.layers.dense(x, 100, activation=tf.nn.relu)
    logits = tf.layers.dense(x, 10)

    loss = tf.reduce_mean(
        tf.losses.softmax_cross_entropy(logits=logits,
                                        onehot_labels=labels_ph))
    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
    train_op = optimizer.minimize(loss)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    if weights_file:
        # Load Model using preferred save/restore method
        filepath = maybe_download_weights_from_s3(weights_file)
        tar = tarfile.open(filepath)
        tar.extractall(path=paths.runtime_paths().saved_model_dir)
        tar.close()
        # Restore variables...

    wrapped_model = TFClassifier(clip_values=(0.0, 1.0),
                                 input_ph=input_ph,
                                 output=logits,
                                 labels_ph=labels_ph,
                                 train=train_op,
                                 loss=loss,
                                 learning=training_ph,
                                 sess=sess,
                                 **wrapper_kwargs)

    return wrapped_model
Example #8
0
    def _save(self, output: dict):
        """
        Save json-formattable output to a file
        """

        runtime_paths = paths.runtime_paths()
        scenario_output_dir = os.path.join(runtime_paths.output_dir,
                                           output["config"]["eval_id"])

        override_name = output["config"]["sysconfig"].get(
            "output_filename", None)
        scenario_name = (override_name if override_name else
                         output["config"]["scenario"]["name"])
        filename = f"{scenario_name}_{output['timestamp']}.json"
        logger.info(
            f"Saving evaluation results saved to <output_dir>/{filename}")
        with open(os.path.join(scenario_output_dir, filename), "w") as f:
            f.write(json.dumps(output, sort_keys=True, indent=4) + "\n")
Example #9
0
    def _run_interactive_bash(self, runner: ArmoryInstance) -> None:
        user_id = os.getuid() if os.name != "nt" else 0
        group_id = os.getgid() if os.name != "nt" else 0
        lines = [
            "Container ready for interactive use.",
            bold(
                "*** In a new terminal, run the following to attach to the container:"
            ),
            bold(
                red(
                    f"    docker exec -it -u {user_id}:{group_id} {runner.docker_container.short_id} bash"
                )
            ),
        ]
        if self.config.get("scenario"):
            tmp_dir = os.path.join(self.host_paths.tmp_dir, self.config["eval_id"])
            os.makedirs(tmp_dir)
            self.tmp_config = os.path.join(tmp_dir, "interactive-config.json")
            docker_config_path = os.path.join(
                paths.runtime_paths().tmp_dir,
                self.config["eval_id"],
                "interactive-config.json",
            )
            with open(self.tmp_config, "w") as f:
                f.write(json.dumps(self.config, sort_keys=True, indent=4) + "\n")

            lines.extend(
                [
                    bold("*** To run your scenario in the container:"),
                    bold(
                        red(
                            f"    python -m armory.scenarios.base {docker_config_path} --load-config-from-file"
                        )
                    ),
                    bold("*** To gracefully shut down container, press: Ctrl-C"),
                    "",
                ]
            )
        logger.info("\n".join(lines))
        while True:
            time.sleep(1)
Example #10
0
def _download_weights(weights_file, force_download=False):
    if not weights_file:
        return

    saved_model_dir = paths.runtime_paths().saved_model_dir
    filepath = os.path.join(saved_model_dir, weights_file)

    if os.path.isfile(filepath) and not force_download:
        logger.info(f"Model weights file {filepath} found, skipping.")
    else:
        if os.path.isfile(filepath):
            logger.info("Forcing overwrite of old file.")
            os.remove(filepath)

        logger.info(f"Downloading weights file {weights_file} from s3...")

        download_file_from_s3(
            "armory-public-data",
            f"model-weights/{weights_file}",
            f"{saved_model_dir}/{weights_file}",
        )
Example #11
0
    def __init__(self,
                 load_weights_file,
                 cfg_file,
                 apply_fit: bool = True,
                 apply_predict: bool = True):
        from segan.models import *
        saved_model_dir = paths.runtime_paths().saved_model_dir
        model_path = os.path.join(saved_model_dir, load_weights_file)
        opts_path = os.path.join(saved_model_dir, cfg_file)
        with open(opts_path, 'r') as cfg_f:
            args = ArgParser(json.load(cfg_f))
        args.cuda = torch.cuda.is_available()
        self.device = "cuda" if args.cuda else "cpu"
        if hasattr(args, 'wsegan') and args.wsegan:
            self.model = WSEGAN(args)
        else:
            self.model = SEGAN(args)

        self.model.G.load_pretrained(model_path, True)
        self._apply_fit = apply_fit
        self._apply_predict = apply_predict
Example #12
0
def get_art_model(model_kwargs, wrapper_kwargs, weights_file=None):
    input_ph = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])
    labels_ph = tf.placeholder(tf.int32, shape=[None, 10])
    training_ph = tf.placeholder(tf.bool, shape=())

    # Conditional for handling training phase or inference phase
    output = tf.cond(
        training_ph,
        true_fn=lambda: _training_pass(input_ph),
        false_fn=lambda: _inference_pass(input_ph),
    )

    loss = tf.reduce_mean(
        tf.losses.softmax_cross_entropy(logits=output,
                                        onehot_labels=labels_ph))
    optimizer = tf.train.AdamOptimizer(learning_rate=0.003)
    train_op = optimizer.minimize(loss)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    if weights_file:
        # Load Model using preferred save/restore method
        filepath = maybe_download_weights_from_s3(weights_file)
        tar = tarfile.open(filepath)
        tar.extractall(path=paths.runtime_paths().saved_model_dir)
        tar.close()
        # Restore variables...

    wrapped_model = TFClassifier(clip_values=(0.0, 1.0),
                                 input_ph=input_ph,
                                 output=output,
                                 labels_ph=labels_ph,
                                 train=train_op,
                                 loss=loss,
                                 learning=training_ph,
                                 sess=sess,
                                 **wrapper_kwargs)

    return wrapped_model
Example #13
0
def _scenario_setup(config: dict):
    """
    Creates scenario specific tmp and output directiories.

    Also pulls external repositories ahead of running the scenario in case the scenario
    itself is found in the external repository.
    """

    runtime_paths = paths.runtime_paths()

    scenario_output_dir = os.path.join(runtime_paths.output_dir,
                                       config["eval_id"])
    scenario_tmp_dir = os.path.join(runtime_paths.tmp_dir, config["eval_id"])
    os.makedirs(scenario_output_dir, exist_ok=True)
    os.makedirs(scenario_tmp_dir, exist_ok=True)
    logger.warning(f"Outputs will be written to {scenario_output_dir}")

    # Download any external repositories and add them to the sys path for use
    if config["sysconfig"].get("external_github_repo", None):
        external_repo_dir = os.path.join(scenario_tmp_dir, "external")
        external_repo.download_and_extract_repos(
            config["sysconfig"]["external_github_repo"],
            external_repo_dir=external_repo_dir,
        )
    pythonpaths = config["sysconfig"].get("external_github_repo_pythonpath")
    if isinstance(pythonpaths, str):
        pythonpaths = [pythonpaths]
    elif pythonpaths is None:
        pythonpaths = []
    for pythonpath in pythonpaths:
        external_repo.add_pythonpath(pythonpath,
                                     external_repo_dir=external_repo_dir)
    local_paths = config["sysconfig"].get("local_repo_path")
    if isinstance(local_paths, str):
        local_paths = [local_paths]
    elif local_paths is None:
        local_paths = []
    for local_path in local_paths:
        external_repo.add_local_repo(local_path)
Example #14
0
 def __init__(self,
              *args,
              voting_kwargs,
              niters_forward=1,
              niters_backward=1,
              batch_backward=0,
              batch_forward=0,
              load_weights_file=None,
              use_half=False,
              random_init=False,
              **kwargs):
     filename = load_weights_file if load_weights_file else "librispeech_pretrained_v2.pth"
     saved_model_dir = paths.runtime_paths().saved_model_dir
     model_path = os.path.join(saved_model_dir, filename)
     model = load_model(device="cpu",
                        model_path=model_path,
                        use_half=use_half)
     optimizer = torch.optim.AdamW(model.parameters(),
                                   lr=1e-4,
                                   weight_decay=1e-5,
                                   amsgrad=False)
     super(SmoothedDeepSpeech, self).__init__(model,
                                              *args,
                                              optimizer=optimizer,
                                              **kwargs)
     self.model_path = model_path
     self.use_half = use_half
     self.niters_forward = niters_forward
     self.niters_backward = niters_backward
     if random_init:
         for p in self._model.parameters():
             if p.dim() > 1:
                 torch.nn.init.xavier_uniform(p)
             else:
                 torch.nn.init.zeros_(p)
     self.decoder = load_decoder_with_scores(self.decoder)
     self.set_voting_module(**voting_kwargs, **kwargs)
     self.batch_backward = batch_backward
     self.batch_forward = batch_forward
Example #15
0
from importlib import import_module
import os

import numpy as np
import pytest

from armory.data import datasets, adversarial_datasets
from armory import paths
from armory.utils.metrics import (
    object_detection_AP_per_class,
    apricot_patch_targeted_AP_per_class,
)

DATASET_DIR = paths.runtime_paths().dataset_dir


@pytest.mark.usefixtures("ensure_armory_dirs")
def test_tf1_mnist():
    classifier_module = import_module("armory.baseline_models.tf_graph.mnist")
    classifier_fn = getattr(classifier_module, "get_art_model")
    classifier = classifier_fn(model_kwargs={}, wrapper_kwargs={})

    train_dataset = datasets.mnist(
        split="train",
        epochs=1,
        batch_size=600,
        dataset_dir=DATASET_DIR,
    )
    test_dataset = datasets.mnist(
        split="test",
        epochs=1,
Example #16
0
 def _set_output_dir(self, config):
     runtime_paths = paths.runtime_paths()
     self.scenario_output_dir = os.path.join(runtime_paths.output_dir,
                                             config["eval_id"])
Example #17
0
def download_and_extract_repo(external_repo_name: str,
                              external_repo_dir: str = None) -> None:
    """
    Downloads and extracts an external repository for use within ARMORY. The external
    repositories project root will be added to the sys path.

    Private repositories require an `ARMORY_GITHUB_TOKEN` environment variable.
    :param external_repo_name: String name of "organization/repo-name" or "organization/repo-name@branch"
    """
    verify_ssl = get_verify_ssl()

    if external_repo_dir is None:
        external_repo_dir = paths.runtime_paths().external_repo_dir

    os.makedirs(external_repo_dir, exist_ok=True)
    headers = {}

    if "@" in external_repo_name:
        org_repo_name, branch = external_repo_name.split("@")
    else:
        org_repo_name = external_repo_name
        branch = "master"
    repo_name = org_repo_name.split("/")[-1]

    if "ARMORY_GITHUB_TOKEN" in os.environ and os.getenv(
            "ARMORY_GITHUB_TOKEN") != "":
        headers = {
            "Authorization": f'token {os.getenv("ARMORY_GITHUB_TOKEN")}'
        }

    response = requests.get(
        f"https://api.github.com/repos/{org_repo_name}/tarball/{branch}",
        headers=headers,
        stream=True,
        verify=verify_ssl,
    )

    if response.status_code == 200:
        logger.info(f"Downloading external repo: {external_repo_name}")

        tar_filename = os.path.join(external_repo_dir, repo_name + ".tar.gz")
        with open(tar_filename, "wb") as f:
            f.write(response.raw.read())
        tar = tarfile.open(tar_filename, "r:gz")
        dl_directory_name = tar.getnames()[0]
        tar.extractall(path=external_repo_dir)

        # Always overwrite existing repositories to keep them at HEAD
        final_dir_name = os.path.join(external_repo_dir, repo_name)
        if os.path.isdir(final_dir_name):
            shutil.rmtree(final_dir_name)
        os.rename(
            os.path.join(external_repo_dir, dl_directory_name),
            final_dir_name,
        )
        add_path(final_dir_name, include_parent=True)

    else:
        raise ConnectionError(
            "Unable to download repository. If it's private make sure "
            "`ARMORY_GITHUB_TOKEN` environment variable is set\n"
            f"status_code is {response.status_code}\n"
            f"full response is {response.text}")
Example #18
0
def add_pythonpath(subpath: str, external_repo_dir: str = None) -> None:
    if external_repo_dir is None:
        external_repo_dir = paths.runtime_paths().external_repo_dir

    path = os.path.join(external_repo_dir, subpath)
    add_path(path, include_parent=True)
Example #19
0
def add_local_repo(local_repo_name: str) -> None:
    local_repo_dir = paths.runtime_paths().local_git_dir
    path = os.path.join(local_repo_dir, local_repo_name)
    add_path(path, include_parent=True)
Example #20
0
Model contributed by: MITRE Corporation
"""

# BEGIN hacks
# Save deep speech model to armory
# This can be made less hacky after this ART issue:
# https://github.com/Trusted-AI/adversarial-robustness-toolbox/issues/693
import os
import logging
from typing import Optional

logger = logging.getLogger(__name__)

from armory import paths

ART_DATA_PATH = os.path.join(paths.runtime_paths().saved_model_dir, "art")
os.makedirs(ART_DATA_PATH, exist_ok=True)
from art.estimators.speech_recognition import pytorch_deep_speech

pytorch_deep_speech.ART_DATA_PATH = ART_DATA_PATH
logger.warning(f"Saving art deep speech model weights to {ART_DATA_PATH}")
# END hacks

from art.estimators.speech_recognition import PyTorchDeepSpeech


def get_art_model(
    model_kwargs: dict, wrapper_kwargs: dict, weights_path: Optional[str] = None
) -> PyTorchDeepSpeech:
    return PyTorchDeepSpeech(**wrapper_kwargs)
Example #21
0
def _generator_from_tfds(
    dataset_name: str,
    split_type: str,
    batch_size: int,
    epochs: int,
    dataset_dir: str,
    preprocessing_fn: Callable,
    as_supervised: bool = True,
    supervised_xy_keys=None,
    download_and_prepare_kwargs=None,
    variable_length=False,
    shuffle_files=True,
    cache_dataset: bool = True,
    framework: str = "numpy",
    lambda_map: Callable = None,
) -> Union[ArmoryDataGenerator, tf.data.Dataset]:
    """
    If as_supervised=False, must designate keys as a tuple in supervised_xy_keys:
        supervised_xy_keys=('video', 'label')  # ucf101 dataset
    if variable_length=True and batch_size > 1:
        output batches are 1D np.arrays of objects
    lambda_map - if not None, mapping function to apply to dataset elements
    """
    if not dataset_dir:
        dataset_dir = paths.runtime_paths().dataset_dir

    if cache_dataset:
        _cache_dataset(
            dataset_dir,
            dataset_name=dataset_name,
        )

    default_graph = tf.compat.v1.keras.backend.get_session().graph

    ds, ds_info = tfds.load(
        dataset_name,
        split=split_type,
        as_supervised=as_supervised,
        data_dir=dataset_dir,
        with_info=True,
        download_and_prepare_kwargs=download_and_prepare_kwargs,
        shuffle_files=shuffle_files,
    )
    if not as_supervised:
        try:
            x_key, y_key = supervised_xy_keys
        except (TypeError, ValueError):
            raise ValueError(
                f"When as_supervised=False, supervised_xy_keys must be a (x_key, y_key)"
                f" tuple, not {supervised_xy_keys}")
        if not isinstance(x_key, str) or not isinstance(y_key, str):
            raise ValueError(f"supervised_xy_keys be a tuple of strings,"
                             f" not {type(x_key), type(y_key)}")
        ds = ds.map(lambda x: (x[x_key], x[y_key]))
    if lambda_map is not None:
        ds = ds.map(lambda_map)

    ds = ds.repeat(epochs)
    if shuffle_files:
        ds = ds.shuffle(batch_size * 10, reshuffle_each_iteration=True)
    if variable_length and batch_size > 1:
        ds = ds.batch(1, drop_remainder=False)
    else:
        ds = ds.batch(batch_size, drop_remainder=False)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)

    if framework == "numpy":
        ds = tfds.as_numpy(ds, graph=default_graph)
        generator = ArmoryDataGenerator(
            ds,
            size=ds_info.splits[split_type].num_examples,
            batch_size=batch_size,
            epochs=epochs,
            preprocessing_fn=preprocessing_fn,
            variable_length=bool(variable_length and batch_size > 1),
        )

    elif framework == "tf":
        generator = ds

    elif framework == "pytorch":
        torch_ds = _get_pytorch_dataset(ds)
        generator = torch.utils.data.DataLoader(torch_ds,
                                                batch_size=None,
                                                collate_fn=lambda x: x,
                                                num_workers=0)

    else:
        raise ValueError(
            f"`framework` must be one of ['tf', 'pytorch', 'numpy']. Found {framework}"
        )

    return generator
Example #22
0
def _generator_from_tfds(
    dataset_name: str,
    split: str,
    batch_size: int,
    epochs: int,
    dataset_dir: str,
    preprocessing_fn: Callable,
    label_preprocessing_fn: Callable = None,
    as_supervised: bool = True,
    supervised_xy_keys=None,
    download_and_prepare_kwargs=None,
    variable_length=False,
    variable_y=False,
    shuffle_files=True,
    cache_dataset: bool = True,
    framework: str = "numpy",
    lambda_map: Callable = None,
    context=None,
    class_ids=None,
    index=None,
) -> Union[ArmoryDataGenerator, tf.data.Dataset]:
    """
    If as_supervised=False, must designate keys as a tuple in supervised_xy_keys:
        supervised_xy_keys=('video', 'label')  # ucf101 dataset
        supervised_xy_keys=('speech', 'text')  # librispeech-dev-clean with ASR
    if variable_length=True and batch_size > 1:
        output batches are 1D np.arrays of objects
    lambda_map - if not None, mapping function to apply to dataset elements
    """
    if not dataset_dir:
        dataset_dir = paths.runtime_paths().dataset_dir

    if cache_dataset:
        _cache_dataset(
            dataset_dir, dataset_name=dataset_name,
        )

    default_graph = tf.compat.v1.keras.backend.get_session().graph

    if not isinstance(split, str):
        raise ValueError(f"split must be str, not {type(split)}")

    try:
        ds, ds_info = tfds.load(
            dataset_name,
            split=split,
            as_supervised=as_supervised,
            data_dir=dataset_dir,
            with_info=True,
            download_and_prepare_kwargs=download_and_prepare_kwargs,
            shuffle_files=shuffle_files,
        )
    except AssertionError as e:
        if not str(e).startswith("Unrecognized instruction format: "):
            raise
        logger.warning(f"Caught AssertionError in TFDS load split argument: {e}")
        logger.warning(f"Attempting to parse split {split}")
        split = parse_split_index(split)
        logger.warning(f"Replacing split with {split}")
        ds, ds_info = tfds.load(
            dataset_name,
            split=split,
            as_supervised=as_supervised,
            data_dir=dataset_dir,
            with_info=True,
            download_and_prepare_kwargs=download_and_prepare_kwargs,
            shuffle_files=shuffle_files,
        )

    if not as_supervised:
        try:
            x_key, y_key = supervised_xy_keys
        except (TypeError, ValueError):
            raise ValueError(
                f"When as_supervised=False, supervised_xy_keys must be a (x_key, y_key)"
                f" tuple, not {supervised_xy_keys}"
            )
        for key in [x_key, y_key]:
            if not (isinstance(key, str) or isinstance(key, tuple)):
                raise ValueError(
                    f"supervised_xy_keys must be a tuple of strings or a tuple of tuple of strings"
                    f" not {type(x_key), type(y_key)}"
                )
        if isinstance(x_key, tuple):
            if isinstance(y_key, tuple):
                raise ValueError(
                    "Only one of (x_key, y_key) can be a tuple while the other must be a string."
                )
            for k in x_key:
                if not (isinstance(k, str)):
                    raise ValueError(
                        "supervised_xy_keys must be a tuple of strings or a tuple of tuple of strings"
                    )
            ds = ds.map(lambda x: (tuple(x[k] for k in x_key), x[y_key]))
        elif isinstance(y_key, tuple):
            for k in y_key:
                if not (isinstance(k, str)):
                    raise ValueError(
                        "supervised_xy_keys must be a tuple of strings or a tuple of tuple of strings"
                    )
            ds = ds.map(lambda x: (x[x_key], tuple(x[k] for k in y_key)))
        else:
            ds = ds.map(lambda x: (x[x_key], x[y_key]))
    if lambda_map is not None:
        ds = ds.map(lambda_map)

    dataset_size = ds_info.splits[split].num_examples

    # Add class-based filtering
    if class_ids is not None:
        if split == "train":
            logger.warning(
                "Filtering by class entails iterating over the whole dataset and thus "
                "can be very slow if using the 'train' split"
            )
        if isinstance(class_ids, list):
            ds, dataset_size = filter_by_class(ds, class_ids=class_ids)
        elif isinstance(class_ids, int):
            ds, dataset_size = filter_by_class(ds, class_ids=[class_ids])
        else:
            raise ValueError(
                f"class_ids must be a list, int, or None, not {type(class_ids)}"
            )

    # Add index-based filtering
    if isinstance(index, list):
        ds, dataset_size = filter_by_index(ds, index, dataset_size)
    elif isinstance(index, str):
        ds, dataset_size = filter_by_str_slice(ds, index, dataset_size)
    elif index is not None:
        raise ValueError(f"index must be a list, str, or None, not {type(index)}")

    ds = ds.repeat(epochs)
    if shuffle_files:
        ds = ds.shuffle(batch_size * 10, reshuffle_each_iteration=True)
    if variable_length and batch_size > 1:
        ds = ds.batch(1, drop_remainder=False)
    else:
        ds = ds.batch(batch_size, drop_remainder=False)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)

    if framework != "numpy" and (
        preprocessing_fn is not None or label_preprocessing_fn is not None
    ):
        raise ValueError(
            f"Data/label preprocessing functions only supported for numpy framework.  Selected {framework} framework"
        )

    if framework == "numpy":
        ds = tfds.as_numpy(ds, graph=default_graph)
        generator = ArmoryDataGenerator(
            ds,
            size=dataset_size,
            batch_size=batch_size,
            epochs=epochs,
            preprocessing_fn=preprocessing_fn,
            label_preprocessing_fn=label_preprocessing_fn,
            variable_length=bool(variable_length and batch_size > 1),
            variable_y=bool(variable_y and batch_size > 1),
            context=context,
        )

    elif framework == "tf":
        generator = ds

    elif framework == "pytorch":
        torch_ds = _get_pytorch_dataset(ds)
        generator = torch.utils.data.DataLoader(
            torch_ds, batch_size=None, collate_fn=lambda x: x, num_workers=0
        )

    else:
        raise ValueError(
            f"`framework` must be one of ['tf', 'pytorch', 'numpy']. Found {framework}"
        )

    return generator
Example #23
0
def maybe_download_weights_from_s3(weights_file: str,
                                   *,
                                   auto_expand_tars: bool = False) -> str:
    """

    :param weights_file:
    :param auto_expand_tars:
    :return:
    """
    saved_model_dir = paths.runtime_paths().saved_model_dir
    filepath = os.path.join(saved_model_dir, weights_file)

    if os.path.isfile(filepath):
        logger.info(
            f"Using available {weights_file} in Armory `saved_model_dir`")
    else:
        logger.info(
            f"{weights_file} not found in Armory `saved_model_dir`. Attempting to pull weights from S3"
        )
        try:
            download_file_from_s3(
                "armory-public-data",
                f"model-weights/{weights_file}",
                f"{saved_model_dir}/{weights_file}",
            )
        except KeyError:
            if ("ARMORY_INCLUDE_SUBMISSION_BUCKETS" in os.environ
                    and os.getenv("ARMORY_INCLUDE_SUBMISSION_BUCKETS") != ""):
                try:
                    download_private_file_from_s3(
                        "armory-submission-data",
                        f"model-weights/{weights_file}",
                        f"{saved_model_dir}/{weights_file}",
                    )

                except KeyError:
                    raise ValueError((
                        f"{weights_file} was not found in the armory public & submission S3 buckets."
                    ))
            else:
                raise ValueError((
                    f"{weights_file} was not found in the armory S3 bucket. If "
                    "you're attempting to load a custom set of weights for "
                    "your model be sure that they are available in the armory "
                    "`saved_model_dir` directory on your host environment."))

    if auto_expand_tars:
        if tarfile.is_tarfile(filepath):
            logger.debug(
                f"Detected model weights file {weights_file} as a tar archive")
            with tarfile.open(filepath) as tar:
                # check if the tarfile contains a directory containing all its members
                # ie if the tarfile expands out entirely into a subdirectory
                dirs = [fi.name for fi in tar.getmembers() if fi.isdir()]
                commonpath = os.path.commonpath(tar.getnames())
                if not commonpath or commonpath not in dirs:
                    raise PermissionError((
                        f"{weights_file} does not expand into a subdirectory."
                        f" Weights files submitted as tarballs must expand into a subdirectory."
                    ))
                full_path = os.path.join(saved_model_dir, commonpath)
                if os.path.exists(full_path):
                    logger.warning(
                        f"Model weights folder {commonpath} from {weights_file} already exists"
                    )
                    logger.warning(
                        f"Skipping auto-unpacking of {weights_file}")
                    logger.warning(
                        f"Delete {commonpath} manually to force unpacking")
                else:
                    logger.info(
                        f"Auto-unpacking model weights from {weights_file}")
                    tar.extractall(path=saved_model_dir)
            filepath = commonpath

    return filepath
Example #24
0
    def _evaluate(self, config: dict) -> dict:
        """
        Evaluate the config and return a results dict
        """

        model_config = config["model"]
        classifier, preprocessing_fn = load_model(model_config)

        defense_config = config.get("defense") or {}
        defense_type = defense_config.get("type")

        if defense_type in ["Preprocessor", "Postprocessor"]:
            logger.info(
                f"Applying internal {defense_type} defense to classifier")
            classifier = load_defense_internal(config["defense"], classifier)

        if model_config["fit"]:
            classifier.set_learning_phase(True)
            logger.info(
                f"Fitting model {model_config['module']}.{model_config['name']}..."
            )
            fit_kwargs = model_config["fit_kwargs"]

            logger.info(
                f"Loading train dataset {config['dataset']['name']}...")
            train_data = load_dataset(
                config["dataset"],
                epochs=fit_kwargs["nb_epochs"],
                split_type="train",
                preprocessing_fn=preprocessing_fn,
            )
            if defense_type == "Trainer":
                logger.info(f"Training with {defense_type} defense...")
                defense = load_defense_wrapper(config["defense"], classifier)
                defense.fit_generator(train_data, **fit_kwargs)
            else:
                logger.info("Fitting classifier on clean train dataset...")
                classifier.fit_generator(train_data, **fit_kwargs)

            ################################################################
            #### Save weights at the end of training
            ################################################################
            ckpt_name = model_config['module'].replace('.', '_')
            ckpt_name += '_pretrained' if model_config['model_kwargs'][
                'pretrained'] else ''
            ckpt_name += '_epochs%d.pth' % model_config['fit_kwargs'][
                'nb_epochs']
            classifier.save(
                osp.join(paths.runtime_paths().saved_model_dir, ckpt_name))
            logger.info(f"Saved classifier {ckpt_name} ...")

        if defense_type == "Transform":
            # NOTE: Transform currently not supported
            logger.info(
                f"Transforming classifier with {defense_type} defense...")
            defense = load_defense_wrapper(config["defense"], classifier)
            classifier = defense()

        classifier.set_learning_phase(False)

        # Evaluate the ART classifier on benign test examples
        logger.info(f"Loading test dataset {config['dataset']['name']}...")
        test_data = load_dataset(
            config["dataset"],
            epochs=1,
            split_type="test",
            preprocessing_fn=preprocessing_fn,
        )
        logger.info("Running inference on benign examples...")
        metrics_logger = metrics.MetricsLogger.from_config(config["metric"])

        for x, y in tqdm(test_data, desc="Benign"):
            y_pred = classifier.predict(x)
            metrics_logger.update_task(y, y_pred)
        metrics_logger.log_task()

        # Evaluate the ART classifier on adversarial test examples
        logger.info("Generating or loading / testing adversarial examples...")

        attack_config = config["attack"]
        attack_type = attack_config.get("type")
        targeted = bool(attack_config.get("kwargs", {}).get("targeted"))
        if targeted and attack_config.get("use_label"):
            raise ValueError("Targeted attacks cannot have 'use_label'")
        if attack_type == "preloaded":
            test_data = load_adversarial_dataset(
                attack_config,
                epochs=1,
                split_type="adversarial",
                preprocessing_fn=preprocessing_fn,
            )
        else:
            attack = load_attack(attack_config, classifier)
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split_type="test",
                preprocessing_fn=preprocessing_fn,
            )
        for x, y in tqdm(test_data, desc="Attack"):
            if attack_type == "preloaded":
                x, x_adv = x
                if targeted:
                    y, y_target = y
            elif attack_config.get("use_label"):
                x_adv = attack.generate(x=x, y=y)
            elif targeted:
                raise NotImplementedError(
                    "Requires generation of target labels")
                # x_adv = attack.generate(x=x, y=y_target)
            else:
                x_adv = attack.generate(x=x)

            y_pred_adv = classifier.predict(x_adv)
            if targeted:
                # NOTE: does not remove data points where y == y_target
                metrics_logger.update_task(y_target,
                                           y_pred_adv,
                                           adversarial=True)
            else:
                metrics_logger.update_task(y, y_pred_adv, adversarial=True)
            metrics_logger.update_perturbation(x, x_adv)
        metrics_logger.log_task(adversarial=True, targeted=targeted)
        return metrics_logger.results()
Example #25
0
def locate_data(dataset_name, dataset_ver, split):
    data_dir = paths.runtime_paths().dataset_dir
    ds_dir = os.path.join(data_dir, dataset_name, dataset_ver)

    return list(glob.glob(f"{ds_dir}/*{split}*.tfrecord*"))