def __init__(self, config: Config, model: torch.nn.Module): if config.early_stop_after > 0: assert config.do_eval, "can't do early stopping when not running evalution" if precision.FP16_ENABLED: self.optimizer: torch.optim.Optimizer = create_optimizer( config.fp16_args, model, config.optimizer, config.num_accumulated_batches, ) else: self.optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model ) self.scheduler: torch.optim.lr_scheduler = ( create_scheduler(config.scheduler, self.optimizer) if config.scheduler else Scheduler() ) self.sparsifier: Sparsifier = ( create_sparsifier(config.sparsifier) if config.sparsifier else Sparsifier() ) self.config = config
def __init__(self, config: Config, model: torch.nn.Module): optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model) self.scheduler: torch.optim.lr_scheduler = (create_scheduler( config.scheduler, optimizer) if config.scheduler else Scheduler()) model, self.optimizer = precision.initialize(model, optimizer) self.config = config
def from_config(cls, task_config, metadata=None, model_state=None): """ Create the task from config, and optionally load metadata/model_state This function will create components including :class:`~DataHandler`, :class:`~Trainer`, :class:`~Optimizer`, :class:`~Scheduler`, :class:`~MetricReporter`, :class:`~Exporter`, and wire them up. Args: task_config (Task.Config): the config of the current task metadata: saved global context of this task, e.g: vocabulary, will be generated by :class:`~DataHandler` if it's None model_state: saved model parameters, will be loaded into model when given """ featurizer = create_featurizer(task_config.featurizer, task_config.features) # load data data_handler = create_data_handler( task_config.data_handler, task_config.features, task_config.labels, featurizer=featurizer, ) print("\nLoading data...") if metadata: data_handler.load_metadata(metadata) else: data_handler.init_metadata() metadata = data_handler.metadata model = create_model(task_config.model, task_config.features, metadata) if model_state: model.load_state_dict(model_state) if cuda_utils.CUDA_ENABLED: model = model.cuda() metric_reporter = create_metric_reporter(task_config.metric_reporter, metadata) optimizer = create_optimizer(task_config.optimizer, model) if task_config.scheduler: scheduler = create_scheduler(task_config.scheduler, optimizer) else: scheduler = None exporter = (create_exporter( task_config.exporter, task_config.features, task_config.labels, data_handler.metadata, task_config.model, ) if task_config.exporter else None) return cls( trainer=create_trainer(task_config.trainer), data_handler=data_handler, model=model, metric_reporter=metric_reporter, optimizer=optimizer, lr_scheduler=scheduler, exporter=exporter, )
def __init__(self, config: Config, model: torch.nn.Module): self.optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model) self.scheduler: torch.optim.lr_scheduler = (create_scheduler( config.scheduler, self.optimizer) if config.scheduler else Scheduler()) self.config = config
def test_load_checkpoint(self): with tempfile.NamedTemporaryFile() as checkpoint_file: train_data = tests_module.test_file("train_data_tiny.tsv") eval_data = tests_module.test_file("test_data_tiny.tsv") config = PyTextConfig( task=DocumentClassificationTask.Config(data=Data.Config( source=TSVDataSource.Config( train_filename=train_data, eval_filename=eval_data, field_names=["label", "slots", "text"], ))), version=LATEST_VERSION, save_snapshot_path=checkpoint_file.name, ) task = create_task(config.task) model = task.model # test checkpoint saving and loading optimizer = create_optimizer(Adam.Config(), model) scheduler = create_scheduler(Scheduler.Config(), optimizer) training_state = TrainingState( model=model, optimizer=optimizer, scheduler=scheduler, start_time=0, epoch=0, rank=0, stage=Stage.TRAIN, epochs_since_last_improvement=0, best_model_state=None, best_model_metric=None, tensorizers=None, ) checkpoint_path = checkpoint_file.name save( config, model, None, task.data.tensorizers, training_state, checkpoint_file, ) task_restored, config_restored, training_state_restored = load( checkpoint_path) optimizer_restored = training_state_restored.optimizer scheduler_restored = training_state_restored.scheduler self.assertOptimizerEqual(optimizer, optimizer_restored) self.assertNotNone(scheduler_restored) self.assertEqual(config, config_restored) self.assertModulesEqual(model, task_restored.model) model.eval() task_restored.model.eval() inputs = torch.LongTensor([[1, 2, 3]]), torch.LongTensor([3]) self.assertEqual( model(*inputs).tolist(), task_restored.model(*inputs).tolist())
def __init__(self, config: Config, model: torch.nn.Module): if config.early_stop_after > 0: assert config.do_eval, "can't do early stopping when not running evalution" optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model) self.scheduler: torch.optim.lr_scheduler = (create_scheduler( config.scheduler, optimizer) if config.scheduler else Scheduler()) self.sparsifier: Sparsifier = (create_sparsifier(config.sparsifier) if config.sparsifier else Sparsifier()) model, self.optimizer = precision.initialize(model, optimizer) self.config = config
def test_load_checkpoint_in_dist_training(self): with tempfile.NamedTemporaryFile() as checkpoint_file: train_data = tests_module.test_file("train_data_tiny.tsv") eval_data = tests_module.test_file("test_data_tiny.tsv") config = PyTextConfig( task=DocumentClassificationTask.Config(data=Data.Config( source=BlockShardedTSVDataSource.Config( train_filename=train_data, eval_filename=eval_data, field_names=["label", "slots", "text"], ))), version=LATEST_VERSION, save_snapshot_path=checkpoint_file.name, ) task = create_task(config.task) model = task.model # test checkpoint saving and loading optimizer = create_optimizer(Adam.Config(), model) scheduler = create_scheduler(Scheduler.Config(), optimizer) training_state = TrainingState( model=model, optimizer=optimizer, scheduler=scheduler, start_time=0, epoch=0, rank=0, stage=Stage.TRAIN, epochs_since_last_improvement=0, best_model_state=None, best_model_metric=None, tensorizers=task.data.tensorizers, ) id = "epoch-1" saved_path = save(config, model, None, task.data.tensorizers, training_state, id) new_rank = 2 new_world_size = 4 task_restored, config_restored, training_state_restored = load( saved_path, rank=new_rank, world_size=new_world_size) self.assertCheckpointEqual( model, config, training_state, task_restored.model, config_restored, training_state_restored, ) self.assertEqual(task_restored.data.data_source.rank, new_rank) self.assertEqual(task_restored.data.data_source.world_size, new_world_size)
def __init__(self, config: Config, model: torch.nn.Module): if config.early_stop_after > 0: assert config.do_eval, "can't do early stopping when not running evalution" if (config.discriminative_lr is not None or config.freeze_params_pattern is not None): optimizer_grouped_parameters = [] optimizer_parameters_covered = [] if config.freeze_params_pattern is not None: tmp_param = { n: p for n, p in model.named_parameters() if any(nd in n for nd in config.freeze_params_pattern) } if len(tmp_param) > 0: optimizer_parameters_covered.extend(list(tmp_param.keys())) optimizer_grouped_parameters.append({ "params": list(tmp_param.values()), "lr": 0.0, }) if config.discriminative_lr is not None: assert ( config.discriminative_lr_params_pattern is not None), "Missing discriminative_lr_params_pattern" tmp_param = { n: p for n, p in model.named_parameters() if any(nd in n for nd in config.discriminative_lr_params_pattern) and n not in optimizer_parameters_covered } if len(tmp_param) > 0: optimizer_parameters_covered.extend(list(tmp_param.keys())) optimizer_grouped_parameters.append({ "params": list(tmp_param.values()), "lr": config.discriminative_lr, }) optimizer_grouped_parameters.append({ "params": [ p for n, p in model.named_parameters() if n not in optimizer_parameters_covered ] }) if precision.FP16_ENABLED: self.optimizer: torch.optim.Optimizer = create_optimizer( config.fp16_args, model, config.optimizer, config.num_accumulated_batches, optimizer_grouped_parameters, ) else: self.optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model, optimizer_grouped_parameters) else: if precision.FP16_ENABLED: self.optimizer: torch.optim.Optimizer = create_optimizer( config.fp16_args, model, config.optimizer, config.num_accumulated_batches, ) else: self.optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model) self.scheduler: torch.optim.lr_scheduler = (create_scheduler( config.scheduler, self.optimizer) if config.scheduler else Scheduler()) self.sparsifier: Sparsifier = (create_sparsifier(config.sparsifier) if config.sparsifier else Sparsifier()) self.config = config
def from_config(cls, config: Config, optimizer: Optimizer): warmup_scheduler = create_scheduler(config.warmup_scheduler, optimizer) scheduler = create_scheduler(config.scheduler, optimizer) return cls(optimizer, warmup_scheduler, scheduler, config.warmup_scheduler.warmup_steps)