def test_default_device():
    default = default_device()

    if torch.cuda.is_available():
        assert "cuda" in default
    else:
        assert "cpu" in default
Exemple #2
0
def train_setup():
    def _create_optim(_model: Module) -> Optimizer:
        return SGD(
            _model.parameters(),
            lr=0.1,
            momentum=0.9,
            nesterov=True,
            weight_decay=0.0001,
        )

    # fill in the appropriate values below for your training flow
    train(
        working_dir=clean_path("."),
        config_path="/PATH/TO/CONFIG.yaml",
        model=None,
        train_dataset=None,
        val_dataset=None,
        batch_size=64,
        optim_const=_create_optim,
        loss=None,
        devices=default_device(),
    )
Exemple #3
0
class LRAnalysisArguments:
    """
    Represents the arguments we use in our PyTorch integration scripts for
    learning rate analysis.
    Using :class:`NmArgumentParser` we can turn this class into `argparse
    <https://docs.python.org/3/library/argparse.html#module-argparse>`
    arguments that can be specified on the command line.

    :param batch_size: The batch size to use for analysis.
    :param arch_key: A str key representing the type of model to use,
        ex:resnet50.
    :param dataset: The dataset to use for analysis, ex imagenet,
        imagenette, etc; Set to `imagefolder` for a custom dataset.
    :param dataset_path: Root path to dataset location.
    :param pretrained: The type of pretrained weights to use default is true to
        load the default pretrained weights for the model. Otherwise should be
        set to the desired weights type: [base, optim, optim-perf];
        To not load any weights set to one of [none, false].
    :param pretrained_dataset: str representing the dataset to load pretrained
        weights for; if pretrained is set; Default is None which will load the
        default dataset for the architecture; Ex can be set to imagenet,
        cifar10, etc.
    :param model_kwargs: json object containing keyword arguments to be passed
        to the model constructor.
    :param dataset_kwargs: json object to load keyword arguments to be passed
        to dataset constructor.
    :param model_tag: A str tag to use for the model for saving results under
        save-dir, defaults to the model arch and dataset used.
    :param save_dir: The path to the directory for saving results,
        default="pytorch_vision".
    :param device: str represnting the device to run on (can also include ids
        for data parallel), ex:{cpu, cuda, cuda:0,1}.
    :param loader_num_workers: int number of workers to use for data loading,
        default=4.
    :param loader_pin_memory: bool to use pinned memory for data loading,
        default=True.
    :param checkpoint_path: A path to a previous checkpoint to load the state
        from and resume the state for; Also works with SparseZoo recipes;
        Set to zoo to automatically download and load weights associated with a
        recipe.
    :param init_lr: float representing the initial learning for analysis,
        default=1e-5.
    :param optim_args: Additional arguments to be passed in to the optimizer as
        a json object.
    :param final_lr: The final learning rate to use for the sensitivity
        analysis.
    :param steps_per_measurement: The number of steps (batches) to run for each
        measurement.
    """

    batch_size: int = field(
        metadata={"help": "The batch size to use for analysis"})
    arch_key: str = field(
        metadata={
            "help":
            "The type of model to use, ex: resnet50, vgg16, mobilenet "
            "put as help to see the full list"
            "(will raise an exception with the list)",
        })

    dataset: str = field(
        metadata={
            "help":
            "The dataset to use for analysis, "
            "ex: imagenet, imagenette, cifar10, etc. "
            "Set to imagefolder for a generic dataset setup "
            "with an image folder structure setup like imagenet or "
            "loadable by a dataset in sparseml.pytorch.datasets"
        })

    dataset_path: str = field(
        metadata={
            "help": "The root path to where the dataset is stored",
        })
    pretrained: str = field(
        default=True,
        metadata={
            "help":
            "The type of pretrained weights to use, "
            "default is true to load the default pretrained weights "
            "for the model. "
            "Otherwise should be set to the desired weights type: "
            "[base, optim, optim-perf]. "
            "To not load any weights set to one of [none, false]"
        },
    )

    pretrained_dataset: str = field(
        default=None,
        metadata={
            "help":
            "The dataset to load pretrained weights for if pretrained is"
            "set. Default is None which will load the default dataset for "
            "the architecture. Ex can be set to imagenet, cifar10, etc.",
        },
    )

    model_kwargs: json.loads = field(
        default_factory=lambda: {},
        metadata={
            "help":
            "Keyword arguments to be passed to model constructor, should "
            "be given as a json object"
        },
    )

    dataset_kwargs: json.loads = field(
        default_factory=lambda: {},
        metadata={
            "help":
            "Keyword arguments to be passed to dataset constructor,"
            " should be given as a json object",
        },
    )

    model_tag: str = field(
        default=None,
        metadata={
            "help":
            "A tag to use for the model for saving results under save-dir, "
            "defaults to the model arch and dataset used",
        },
    )

    save_dir: str = field(
        default="pytorch_vision",
        metadata={
            "help": "The path to the directory for saving results",
        },
    )

    device: str = field(
        default=default_device(),
        metadata={
            "help":
            "The device to run on (can also include ids for "
            "data parallel), ex:cpu, cuda, cuda:0,1"
        },
    )

    loader_num_workers: int = field(
        default=4,
        metadata={"help": "The number of workers to use for data loading"})

    loader_pin_memory: bool = field(
        default=True, metadata={"help": "Use pinned memory for data loading"})

    checkpoint_path: str = field(
        default=None,
        metadata={
            "help":
            "A path to a previous checkpoint to load the state from "
            "and resume the state for. If provided, pretrained will be"
            "ignored. If using a SparseZoo recipe, can also provide "
            "'zoo' to load the base weights associated with that recipe"
        },
    )

    init_lr: float = field(
        default=1e-5,
        metadata={
            "help":
            "The initial learning rate to use for the sensitivity analysis"
        },
    )

    optim_args: json.loads = field(
        default_factory=lambda: {},
        metadata={
            "help":
            "Additional args to be passed to the optimizer passed in"
            " as a json object"
        },
    )

    final_lr: float = field(
        default=0.5,
        metadata={
            "help":
            "The final learning rate to use for the sensitivity analysis",
        },
    )

    steps_per_measurement: int = field(
        default=20,
        metadata={
            "help": "The number of steps (batches) to run for each measurement"
        },
    )

    def __post_init__(self):
        if "preprocessing_type" not in self.dataset_kwargs and (
                "coco" in self.dataset.lower()
                or "voc" in self.dataset.lower()):
            if "ssd" in self.arch_key.lower():
                self.dataset_kwargs["preprocessing_type"] = "ssd"
            elif "yolo" in self.arch_key.lower():
                self.dataset_kwargs["preprocessing_type"] = "yolo"

        self.is_main_process = True
        self.local_rank = -1
        self.rank = -1
Exemple #4
0
class TrainingArguments:
    """
    Represents the arguments we use in our PyTorch integration scripts for
    training tasks

    Using :class:`NmArgumentParser` we can turn this class into `argparse
    <https://docs.python.org/3/library/argparse.html#module-argparse>`__
    arguments that can be specified on the command line.

    :param train_batch_size: An int representing the training batch size.
    :param test_batch_size: An int representing the test batch size.
    :param arch_key: A str key representing the type of model to use,
        ex:resnet50.
    :param dataset: The dataset to use for training, ex imagenet, imagenette,
        etc; Set to `imagefolder` for a custom dataset.
    :param dataset_path: Root path to dataset location.
    :param local_rank: DDP argument set by PyTorch in DDP mode, default -1
    :param checkpoint_path: A path to a previous checkpoint to load the state
        from and resume the state for; Also works with SparseZoo recipes;
        Set to zoo to automatically download and load weights associated with a
        recipe.
    :param init_lr: float representing the initial learning for training,
        default=1e-9 .
    :param optim_args: Additional arguments to be passed in to the optimizer as
        a json object
    :param recipe_path: The path to the yaml file containing the modifiers and
        schedule to apply them with; Can also provide a SparseZoo stub prefixed
        with 'zoo:'.
    :param sparse_transfer_learn: Boolean to enable sparse transfer learning
        modifiers to enforce
        the sparsity for already sparse layers. The modifiers are added to
        the ones to be loaded from the recipe-path.
    :param eval_mode: bool to start evaluation mode so that the model can be
        evaluated on the desired dataset.
    :param optim: str respresnting the optimizer type to use, one of
        [SGD, Adam, RMSprop].
    :param logs_dir: The path to the directory for saving logs.
    :param save_best_after: int epoch number to start saving the best
        validation result after until the end of training.
    :param save_epochs: int epochs to save checkpoints at.
    :param use_mixed_precision: bool to train model using mixed precision.
        Supported environments are single GPU and multiple GPUs using
        DistributedDataParallel with one GPU per process.
    :param debug_steps: int represnting amount of steps to run for training and
        testing for debug mode default=-1.
    :param pretrained: The type of pretrained weights to use default is true
        to load the default pretrained weights for the model Otherwise should
        be set to the desired weights type: [base, optim, optim-perf];
        To not load any weights set to one of [none, false].
    :param pretrained_dataset: str representing the dataset to load pretrained
        weights for if pretrained is set; Default is None which will load the
        default dataset for the architecture; Ex can be set to imagenet,
        cifar10, etc".
    :param model_kwargs: json object containing keyword arguments to be
        passed to model constructor.
    :param dataset_kwargs: json object to load keyword arguments to be passed
        to dataset constructor.
    :param model_tag: A str tag to use for the model for saving results
        under save-dir, defaults to the model arch and dataset used.
    :param save_dir: The path to the directory for saving results,
        default="pytorch_vision".
    :param device: str represnting the device to run on (can also include ids
        for data parallel), ex:{cpu, cuda, cuda:0,1}.
    :param loader_num_workers: int number of workers to use for data loading,
        default=4.
    :param loader_pin_memory: bool to use pinned memory for data loading,
        default=True.
    """

    train_batch_size: int = field(
        metadata={"help": "The batch size to use while training"})

    test_batch_size: int = field(
        metadata={"help": "The batch size to use while testing"})
    arch_key: str = field(
        metadata={
            "help":
            "The type of model to use, ex: resnet50, vgg16, mobilenet "
            "put as help to see the full list (will raise an exception"
            "with the list)",
        })

    dataset: str = field(
        metadata={
            "help":
            "The dataset to use for training, "
            "ex: imagenet, imagenette, cifar10, etc. "
            "Set to imagefolder for a generic dataset setup "
            "with an image folder structure setup like imagenet or"
            " loadable by a dataset in sparseml.pytorch.datasets"
        })

    dataset_path: str = field(
        metadata={
            "help": "The root path to where the dataset is stored",
        })
    local_rank: int = field(
        default=-1,
        metadata={
            "keep_underscores": True,
            "help": argparse.SUPPRESS,
        },
    )

    checkpoint_path: str = field(
        default=None,
        metadata={
            "help":
            "A path to a previous checkpoint to load the state from "
            "and resume the state for. If provided, pretrained will "
            "be ignored . If using a SparseZoo recipe, can also "
            "provide 'zoo' to load the base weights associated with "
            "that recipe"
        },
    )

    init_lr: float = field(
        default=1e-9,
        metadata={
            "help":
            "The initial learning rate to use while training, "
            "the actual initial value used should be set by the"
            " sparseml recipe"
        },
    )

    optim_args: json.loads = field(
        default_factory=lambda: {
            "momentum": 0.9,
            "nesterov": True,
            "weight_decay": 0.0001,
        },
        metadata={
            "help":
            "Additional args to be passed to the optimizer passed in"
            " as a json object",
        },
    )

    recipe_path: str = field(
        default=None,
        metadata={
            "help":
            "The path to the yaml file containing the modifiers and "
            "schedule to apply them with. Can also provide a "
            "SparseZoo stub prefixed with 'zoo:' with an optional "
            "'?recipe_type=' argument"
        },
    )

    sparse_transfer_learn: Optional[bool] = field(
        default=False,
        metadata={
            "help":
            "Enable sparse transfer learning modifiers to enforce the "
            "sparsity for already sparse layers. The modifiers are "
            "added to the ones to be loaded from the recipe-path"
        },
    )

    eval_mode: Optional[bool] = field(
        default=False,
        metadata={
            "help":
            "Puts into evaluation mode so that the model can be "
            "evaluated on the desired dataset"
        },
    )

    optim: str = field(
        default="SGD",
        metadata={
            "help": "The optimizer type to use, one of [SGD, Adam, RMSprop]"
        },
    )

    logs_dir: str = field(
        default=os.path.join("pytorch_vision_train", "tensorboard-logs"),
        metadata={
            "help": "The path to the directory for saving logs",
        },
    )

    save_best_after: int = field(
        default=-1,
        metadata={
            "help":
            "start saving the best validation result after the given "
            "epoch completes until the end of training"
        },
    )
    save_epochs: List[int] = field(
        default_factory=lambda: [],
        metadata={"help": "epochs to save checkpoints at"})

    use_mixed_precision: Optional[bool] = field(
        default=False,
        metadata={
            "help":
            "Trains model using mixed precision. Supported "
            "environments are single GPU and multiple GPUs using "
            "DistributedDataParallel with one GPU per process"
        },
    )

    debug_steps: int = field(
        default=-1,
        metadata={
            "help":
            "Amount of steps to run for training and testing for a "
            "debug mode"
        },
    )

    pretrained: str = field(
        default=True,
        metadata={
            "help":
            "The type of pretrained weights to use, "
            "default is true to load the default pretrained weights for "
            "the model. Otherwise should be set to the desired weights "
            "type: [base, optim, optim-perf]. To not load any weights set"
            "to one of [none, false]"
        },
    )

    pretrained_dataset: str = field(
        default=None,
        metadata={
            "help":
            "The dataset to load pretrained weights for if pretrained is "
            "set. Default is None which will load the default dataset for "
            "the architecture. Ex can be set to imagenet, cifar10, etc",
        },
    )

    model_kwargs: json.loads = field(
        default_factory=lambda: {},
        metadata={
            "help":
            "Keyword arguments to be passed to model constructor, should "
            "be given as a json object"
        },
    )

    dataset_kwargs: json.loads = field(
        default_factory=lambda: {},
        metadata={
            "help":
            "Keyword arguments to be passed to dataset constructor, "
            "should be given as a json object",
        },
    )

    model_tag: str = field(
        default=None,
        metadata={
            "help":
            "A tag to use for the model for saving results under save-dir, "
            "defaults to the model arch and dataset used",
        },
    )

    save_dir: str = field(
        default="pytorch_vision",
        metadata={
            "help": "The path to the directory for saving results",
        },
    )

    device: str = field(
        default=default_device(),
        metadata={
            "help":
            "The device to run on (can also include ids for data "
            "parallel), ex: cpu, cuda, cuda:0,1"
        },
    )

    loader_num_workers: int = field(
        default=4,
        metadata={"help": "The number of workers to use for data loading"})

    loader_pin_memory: bool = field(
        default=True, metadata={"help": "Use pinned memory for data loading"})

    def __post_init__(self):
        # add ddp args
        env_world_size = int(os.environ.get("WORLD_SIZE", 1))
        self.world_size = env_world_size

        env_rank = int(os.environ.get("RANK", -1))
        self.rank = env_rank

        self.is_main_process = self.rank in [
            -1,
            0,
        ]  # non DDP execution or 0th DDP process

        # modify training batch size for give world size
        assert self.train_batch_size % self.world_size == 0, (
            f"Invalid training batch size for world size {self.world_size} "
            f"given batch size {self.train_batch_size}. "
            f"world size must divide training batch size evenly.")

        self.train_batch_size = self.train_batch_size // self.world_size

        if "preprocessing_type" not in self.dataset_kwargs and (
                "coco" in self.dataset.lower()
                or "voc" in self.dataset.lower()):
            if "ssd" in self.arch_key.lower():
                self.dataset_kwargs["preprocessing_type"] = "ssd"
            elif "yolo" in self.arch_key.lower():
                self.dataset_kwargs["preprocessing_type"] = "yolo"

        if self.local_rank != -1:
            torch.distributed.init_process_group(backend="nccl",
                                                 init_method="env://")
            set_deterministic_seeds(0)

        self.approximate = False