def test_default_device(): default = default_device() if torch.cuda.is_available(): assert "cuda" in default else: assert "cpu" in default
def train_setup(): def _create_optim(_model: Module) -> Optimizer: return SGD( _model.parameters(), lr=0.1, momentum=0.9, nesterov=True, weight_decay=0.0001, ) # fill in the appropriate values below for your training flow train( working_dir=clean_path("."), config_path="/PATH/TO/CONFIG.yaml", model=None, train_dataset=None, val_dataset=None, batch_size=64, optim_const=_create_optim, loss=None, devices=default_device(), )
class LRAnalysisArguments: """ Represents the arguments we use in our PyTorch integration scripts for learning rate analysis. Using :class:`NmArgumentParser` we can turn this class into `argparse <https://docs.python.org/3/library/argparse.html#module-argparse>` arguments that can be specified on the command line. :param batch_size: The batch size to use for analysis. :param arch_key: A str key representing the type of model to use, ex:resnet50. :param dataset: The dataset to use for analysis, ex imagenet, imagenette, etc; Set to `imagefolder` for a custom dataset. :param dataset_path: Root path to dataset location. :param pretrained: The type of pretrained weights to use default is true to load the default pretrained weights for the model. Otherwise should be set to the desired weights type: [base, optim, optim-perf]; To not load any weights set to one of [none, false]. :param pretrained_dataset: str representing the dataset to load pretrained weights for; if pretrained is set; Default is None which will load the default dataset for the architecture; Ex can be set to imagenet, cifar10, etc. :param model_kwargs: json object containing keyword arguments to be passed to the model constructor. :param dataset_kwargs: json object to load keyword arguments to be passed to dataset constructor. :param model_tag: A str tag to use for the model for saving results under save-dir, defaults to the model arch and dataset used. :param save_dir: The path to the directory for saving results, default="pytorch_vision". :param device: str represnting the device to run on (can also include ids for data parallel), ex:{cpu, cuda, cuda:0,1}. :param loader_num_workers: int number of workers to use for data loading, default=4. :param loader_pin_memory: bool to use pinned memory for data loading, default=True. :param checkpoint_path: A path to a previous checkpoint to load the state from and resume the state for; Also works with SparseZoo recipes; Set to zoo to automatically download and load weights associated with a recipe. :param init_lr: float representing the initial learning for analysis, default=1e-5. :param optim_args: Additional arguments to be passed in to the optimizer as a json object. :param final_lr: The final learning rate to use for the sensitivity analysis. :param steps_per_measurement: The number of steps (batches) to run for each measurement. """ batch_size: int = field( metadata={"help": "The batch size to use for analysis"}) arch_key: str = field( metadata={ "help": "The type of model to use, ex: resnet50, vgg16, mobilenet " "put as help to see the full list" "(will raise an exception with the list)", }) dataset: str = field( metadata={ "help": "The dataset to use for analysis, " "ex: imagenet, imagenette, cifar10, etc. " "Set to imagefolder for a generic dataset setup " "with an image folder structure setup like imagenet or " "loadable by a dataset in sparseml.pytorch.datasets" }) dataset_path: str = field( metadata={ "help": "The root path to where the dataset is stored", }) pretrained: str = field( default=True, metadata={ "help": "The type of pretrained weights to use, " "default is true to load the default pretrained weights " "for the model. " "Otherwise should be set to the desired weights type: " "[base, optim, optim-perf]. " "To not load any weights set to one of [none, false]" }, ) pretrained_dataset: str = field( default=None, metadata={ "help": "The dataset to load pretrained weights for if pretrained is" "set. Default is None which will load the default dataset for " "the architecture. Ex can be set to imagenet, cifar10, etc.", }, ) model_kwargs: json.loads = field( default_factory=lambda: {}, metadata={ "help": "Keyword arguments to be passed to model constructor, should " "be given as a json object" }, ) dataset_kwargs: json.loads = field( default_factory=lambda: {}, metadata={ "help": "Keyword arguments to be passed to dataset constructor," " should be given as a json object", }, ) model_tag: str = field( default=None, metadata={ "help": "A tag to use for the model for saving results under save-dir, " "defaults to the model arch and dataset used", }, ) save_dir: str = field( default="pytorch_vision", metadata={ "help": "The path to the directory for saving results", }, ) device: str = field( default=default_device(), metadata={ "help": "The device to run on (can also include ids for " "data parallel), ex:cpu, cuda, cuda:0,1" }, ) loader_num_workers: int = field( default=4, metadata={"help": "The number of workers to use for data loading"}) loader_pin_memory: bool = field( default=True, metadata={"help": "Use pinned memory for data loading"}) checkpoint_path: str = field( default=None, metadata={ "help": "A path to a previous checkpoint to load the state from " "and resume the state for. If provided, pretrained will be" "ignored. If using a SparseZoo recipe, can also provide " "'zoo' to load the base weights associated with that recipe" }, ) init_lr: float = field( default=1e-5, metadata={ "help": "The initial learning rate to use for the sensitivity analysis" }, ) optim_args: json.loads = field( default_factory=lambda: {}, metadata={ "help": "Additional args to be passed to the optimizer passed in" " as a json object" }, ) final_lr: float = field( default=0.5, metadata={ "help": "The final learning rate to use for the sensitivity analysis", }, ) steps_per_measurement: int = field( default=20, metadata={ "help": "The number of steps (batches) to run for each measurement" }, ) def __post_init__(self): if "preprocessing_type" not in self.dataset_kwargs and ( "coco" in self.dataset.lower() or "voc" in self.dataset.lower()): if "ssd" in self.arch_key.lower(): self.dataset_kwargs["preprocessing_type"] = "ssd" elif "yolo" in self.arch_key.lower(): self.dataset_kwargs["preprocessing_type"] = "yolo" self.is_main_process = True self.local_rank = -1 self.rank = -1
class TrainingArguments: """ Represents the arguments we use in our PyTorch integration scripts for training tasks Using :class:`NmArgumentParser` we can turn this class into `argparse <https://docs.python.org/3/library/argparse.html#module-argparse>`__ arguments that can be specified on the command line. :param train_batch_size: An int representing the training batch size. :param test_batch_size: An int representing the test batch size. :param arch_key: A str key representing the type of model to use, ex:resnet50. :param dataset: The dataset to use for training, ex imagenet, imagenette, etc; Set to `imagefolder` for a custom dataset. :param dataset_path: Root path to dataset location. :param local_rank: DDP argument set by PyTorch in DDP mode, default -1 :param checkpoint_path: A path to a previous checkpoint to load the state from and resume the state for; Also works with SparseZoo recipes; Set to zoo to automatically download and load weights associated with a recipe. :param init_lr: float representing the initial learning for training, default=1e-9 . :param optim_args: Additional arguments to be passed in to the optimizer as a json object :param recipe_path: The path to the yaml file containing the modifiers and schedule to apply them with; Can also provide a SparseZoo stub prefixed with 'zoo:'. :param sparse_transfer_learn: Boolean to enable sparse transfer learning modifiers to enforce the sparsity for already sparse layers. The modifiers are added to the ones to be loaded from the recipe-path. :param eval_mode: bool to start evaluation mode so that the model can be evaluated on the desired dataset. :param optim: str respresnting the optimizer type to use, one of [SGD, Adam, RMSprop]. :param logs_dir: The path to the directory for saving logs. :param save_best_after: int epoch number to start saving the best validation result after until the end of training. :param save_epochs: int epochs to save checkpoints at. :param use_mixed_precision: bool to train model using mixed precision. Supported environments are single GPU and multiple GPUs using DistributedDataParallel with one GPU per process. :param debug_steps: int represnting amount of steps to run for training and testing for debug mode default=-1. :param pretrained: The type of pretrained weights to use default is true to load the default pretrained weights for the model Otherwise should be set to the desired weights type: [base, optim, optim-perf]; To not load any weights set to one of [none, false]. :param pretrained_dataset: str representing the dataset to load pretrained weights for if pretrained is set; Default is None which will load the default dataset for the architecture; Ex can be set to imagenet, cifar10, etc". :param model_kwargs: json object containing keyword arguments to be passed to model constructor. :param dataset_kwargs: json object to load keyword arguments to be passed to dataset constructor. :param model_tag: A str tag to use for the model for saving results under save-dir, defaults to the model arch and dataset used. :param save_dir: The path to the directory for saving results, default="pytorch_vision". :param device: str represnting the device to run on (can also include ids for data parallel), ex:{cpu, cuda, cuda:0,1}. :param loader_num_workers: int number of workers to use for data loading, default=4. :param loader_pin_memory: bool to use pinned memory for data loading, default=True. """ train_batch_size: int = field( metadata={"help": "The batch size to use while training"}) test_batch_size: int = field( metadata={"help": "The batch size to use while testing"}) arch_key: str = field( metadata={ "help": "The type of model to use, ex: resnet50, vgg16, mobilenet " "put as help to see the full list (will raise an exception" "with the list)", }) dataset: str = field( metadata={ "help": "The dataset to use for training, " "ex: imagenet, imagenette, cifar10, etc. " "Set to imagefolder for a generic dataset setup " "with an image folder structure setup like imagenet or" " loadable by a dataset in sparseml.pytorch.datasets" }) dataset_path: str = field( metadata={ "help": "The root path to where the dataset is stored", }) local_rank: int = field( default=-1, metadata={ "keep_underscores": True, "help": argparse.SUPPRESS, }, ) checkpoint_path: str = field( default=None, metadata={ "help": "A path to a previous checkpoint to load the state from " "and resume the state for. If provided, pretrained will " "be ignored . If using a SparseZoo recipe, can also " "provide 'zoo' to load the base weights associated with " "that recipe" }, ) init_lr: float = field( default=1e-9, metadata={ "help": "The initial learning rate to use while training, " "the actual initial value used should be set by the" " sparseml recipe" }, ) optim_args: json.loads = field( default_factory=lambda: { "momentum": 0.9, "nesterov": True, "weight_decay": 0.0001, }, metadata={ "help": "Additional args to be passed to the optimizer passed in" " as a json object", }, ) recipe_path: str = field( default=None, metadata={ "help": "The path to the yaml file containing the modifiers and " "schedule to apply them with. Can also provide a " "SparseZoo stub prefixed with 'zoo:' with an optional " "'?recipe_type=' argument" }, ) sparse_transfer_learn: Optional[bool] = field( default=False, metadata={ "help": "Enable sparse transfer learning modifiers to enforce the " "sparsity for already sparse layers. The modifiers are " "added to the ones to be loaded from the recipe-path" }, ) eval_mode: Optional[bool] = field( default=False, metadata={ "help": "Puts into evaluation mode so that the model can be " "evaluated on the desired dataset" }, ) optim: str = field( default="SGD", metadata={ "help": "The optimizer type to use, one of [SGD, Adam, RMSprop]" }, ) logs_dir: str = field( default=os.path.join("pytorch_vision_train", "tensorboard-logs"), metadata={ "help": "The path to the directory for saving logs", }, ) save_best_after: int = field( default=-1, metadata={ "help": "start saving the best validation result after the given " "epoch completes until the end of training" }, ) save_epochs: List[int] = field( default_factory=lambda: [], metadata={"help": "epochs to save checkpoints at"}) use_mixed_precision: Optional[bool] = field( default=False, metadata={ "help": "Trains model using mixed precision. Supported " "environments are single GPU and multiple GPUs using " "DistributedDataParallel with one GPU per process" }, ) debug_steps: int = field( default=-1, metadata={ "help": "Amount of steps to run for training and testing for a " "debug mode" }, ) pretrained: str = field( default=True, metadata={ "help": "The type of pretrained weights to use, " "default is true to load the default pretrained weights for " "the model. Otherwise should be set to the desired weights " "type: [base, optim, optim-perf]. To not load any weights set" "to one of [none, false]" }, ) pretrained_dataset: str = field( default=None, metadata={ "help": "The dataset to load pretrained weights for if pretrained is " "set. Default is None which will load the default dataset for " "the architecture. Ex can be set to imagenet, cifar10, etc", }, ) model_kwargs: json.loads = field( default_factory=lambda: {}, metadata={ "help": "Keyword arguments to be passed to model constructor, should " "be given as a json object" }, ) dataset_kwargs: json.loads = field( default_factory=lambda: {}, metadata={ "help": "Keyword arguments to be passed to dataset constructor, " "should be given as a json object", }, ) model_tag: str = field( default=None, metadata={ "help": "A tag to use for the model for saving results under save-dir, " "defaults to the model arch and dataset used", }, ) save_dir: str = field( default="pytorch_vision", metadata={ "help": "The path to the directory for saving results", }, ) device: str = field( default=default_device(), metadata={ "help": "The device to run on (can also include ids for data " "parallel), ex: cpu, cuda, cuda:0,1" }, ) loader_num_workers: int = field( default=4, metadata={"help": "The number of workers to use for data loading"}) loader_pin_memory: bool = field( default=True, metadata={"help": "Use pinned memory for data loading"}) def __post_init__(self): # add ddp args env_world_size = int(os.environ.get("WORLD_SIZE", 1)) self.world_size = env_world_size env_rank = int(os.environ.get("RANK", -1)) self.rank = env_rank self.is_main_process = self.rank in [ -1, 0, ] # non DDP execution or 0th DDP process # modify training batch size for give world size assert self.train_batch_size % self.world_size == 0, ( f"Invalid training batch size for world size {self.world_size} " f"given batch size {self.train_batch_size}. " f"world size must divide training batch size evenly.") self.train_batch_size = self.train_batch_size // self.world_size if "preprocessing_type" not in self.dataset_kwargs and ( "coco" in self.dataset.lower() or "voc" in self.dataset.lower()): if "ssd" in self.arch_key.lower(): self.dataset_kwargs["preprocessing_type"] = "ssd" elif "yolo" in self.arch_key.lower(): self.dataset_kwargs["preprocessing_type"] = "yolo" if self.local_rank != -1: torch.distributed.init_process_group(backend="nccl", init_method="env://") set_deterministic_seeds(0) self.approximate = False