def __init__(self, config: Config, model: torch.nn.Module): if config.early_stop_after > 0: assert config.do_eval, "can't do early stopping when not running evalution" if precision.FP16_ENABLED: self.optimizer: torch.optim.Optimizer = create_optimizer( config.fp16_args, model, config.optimizer, config.num_accumulated_batches, ) else: self.optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model ) self.scheduler: torch.optim.lr_scheduler = ( create_scheduler(config.scheduler, self.optimizer) if config.scheduler else Scheduler() ) self.sparsifier: Sparsifier = ( create_sparsifier(config.sparsifier) if config.sparsifier else Sparsifier() ) self.config = config
def __init__(self, config: Config, model: torch.nn.Module): optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model) self.scheduler: torch.optim.lr_scheduler = (create_scheduler( config.scheduler, optimizer) if config.scheduler else Scheduler()) model, self.optimizer = precision.initialize(model, optimizer) self.config = config
def from_config(cls, task_config, metadata=None, model_state=None): """ Create the task from config, and optionally load metadata/model_state This function will create components including :class:`~DataHandler`, :class:`~Trainer`, :class:`~Optimizer`, :class:`~Scheduler`, :class:`~MetricReporter`, :class:`~Exporter`, and wire them up. Args: task_config (Task.Config): the config of the current task metadata: saved global context of this task, e.g: vocabulary, will be generated by :class:`~DataHandler` if it's None model_state: saved model parameters, will be loaded into model when given """ print("Task parameters:\n") pprint(config_to_json(type(task_config), task_config)) featurizer = create_featurizer(task_config.featurizer, task_config.features) # load data data_handler = create_data_handler( task_config.data_handler, task_config.features, task_config.labels, featurizer=featurizer, ) print("\nLoading data...") if metadata: data_handler.load_metadata(metadata) else: data_handler.init_metadata() metadata = data_handler.metadata model = create_model(task_config.model, task_config.features, metadata) if model_state: model.load_state_dict(model_state) if cuda_utils.CUDA_ENABLED: model = model.cuda() metric_reporter = create_metric_reporter(task_config.metric_reporter, metadata) optimizer = create_optimizer(task_config.optimizer, model) exporter = ( create_exporter( task_config.exporter, task_config.features, task_config.labels, data_handler.metadata, task_config.model, ) if task_config.exporter else None ) return cls( trainer=create_trainer(task_config.trainer), data_handler=data_handler, model=model, metric_reporter=metric_reporter, optimizer=optimizer, lr_scheduler=Scheduler( optimizer, task_config.scheduler, metric_reporter.lower_is_better ), exporter=exporter, )
def from_config( cls, fp16_config: Config, model: torch.nn.Module, fp32_config: Optimizer.Config, num_accumulated_batches: int, ): model = model.half() fp16_params = list(filter(lambda p: p.requires_grad, model.parameters())) fp32_optimizer = create_optimizer(fp32_config, model) print( "| Fairseq FP16Optimizer with init_loss_scale={}".format( fp16_config.init_loss_scale ) ) return cls( fp16_params=fp16_params, fp32_optimizer=fp32_optimizer, init_loss_scale=fp16_config.init_loss_scale, scale_window=fp16_config.scale_window, scale_tolerance=fp16_config.scale_tolerance, threshold_loss_scale=fp16_config.threshold_loss_scale, min_loss_scale=fp16_config.min_loss_scale, num_accumulated_batches=num_accumulated_batches, )
def __init__(self, config: Config, model: torch.nn.Module): self.optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model) self.scheduler: torch.optim.lr_scheduler = (create_scheduler( config.scheduler, self.optimizer) if config.scheduler else Scheduler()) self.config = config
def test_load_checkpoint(self): with tempfile.NamedTemporaryFile() as checkpoint_file: train_data = tests_module.test_file("train_data_tiny.tsv") eval_data = tests_module.test_file("test_data_tiny.tsv") config = PyTextConfig( task=DocumentClassificationTask.Config(data=Data.Config( source=TSVDataSource.Config( train_filename=train_data, eval_filename=eval_data, field_names=["label", "slots", "text"], ))), version=LATEST_VERSION, save_snapshot_path=checkpoint_file.name, ) task = create_task(config.task) model = task.model # test checkpoint saving and loading optimizer = create_optimizer(Adam.Config(), model) scheduler = create_scheduler(Scheduler.Config(), optimizer) training_state = TrainingState( model=model, optimizer=optimizer, scheduler=scheduler, start_time=0, epoch=0, rank=0, stage=Stage.TRAIN, epochs_since_last_improvement=0, best_model_state=None, best_model_metric=None, tensorizers=None, ) checkpoint_path = checkpoint_file.name save( config, model, None, task.data.tensorizers, training_state, checkpoint_file, ) task_restored, config_restored, training_state_restored = load( checkpoint_path) optimizer_restored = training_state_restored.optimizer scheduler_restored = training_state_restored.scheduler self.assertOptimizerEqual(optimizer, optimizer_restored) self.assertNotNone(scheduler_restored) self.assertEqual(config, config_restored) self.assertModulesEqual(model, task_restored.model) model.eval() task_restored.model.eval() inputs = torch.LongTensor([[1, 2, 3]]), torch.LongTensor([3]) self.assertEqual( model(*inputs).tolist(), task_restored.model(*inputs).tolist())
def __init__(self, config: Config, model: torch.nn.Module): if config.early_stop_after > 0: assert config.do_eval, "can't do early stopping when not running evalution" optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model) self.scheduler: torch.optim.lr_scheduler = (create_scheduler( config.scheduler, optimizer) if config.scheduler else Scheduler()) self.sparsifier: Sparsifier = (create_sparsifier(config.sparsifier) if config.sparsifier else Sparsifier()) model, self.optimizer = precision.initialize(model, optimizer) self.config = config
def from_config( cls, fp16_config: Config, model: torch.nn.Module, fp32_config: Optimizer.Config, num_accumulated_batches: int, optimizer_grouped_parameters: List = None, *unused, ): if optimizer_grouped_parameters is None: fp32_optimizer = create_optimizer(fp32_config, model) else: fp32_optimizer = create_optimizer(fp32_config, model, optimizer_grouped_parameters) return cls( fp32_optimizer, model, fp16_config.opt_level, fp16_config.init_loss_scale, fp16_config.min_loss_scale, )
def test_load_checkpoint_in_dist_training(self): with tempfile.NamedTemporaryFile() as checkpoint_file: train_data = tests_module.test_file("train_data_tiny.tsv") eval_data = tests_module.test_file("test_data_tiny.tsv") config = PyTextConfig( task=DocumentClassificationTask.Config(data=Data.Config( source=BlockShardedTSVDataSource.Config( train_filename=train_data, eval_filename=eval_data, field_names=["label", "slots", "text"], ))), version=LATEST_VERSION, save_snapshot_path=checkpoint_file.name, ) task = create_task(config.task) model = task.model # test checkpoint saving and loading optimizer = create_optimizer(Adam.Config(), model) scheduler = create_scheduler(Scheduler.Config(), optimizer) training_state = TrainingState( model=model, optimizer=optimizer, scheduler=scheduler, start_time=0, epoch=0, rank=0, stage=Stage.TRAIN, epochs_since_last_improvement=0, best_model_state=None, best_model_metric=None, tensorizers=task.data.tensorizers, ) id = "epoch-1" saved_path = save(config, model, None, task.data.tensorizers, training_state, id) new_rank = 2 new_world_size = 4 task_restored, config_restored, training_state_restored = load( saved_path, rank=new_rank, world_size=new_world_size) self.assertCheckpointEqual( model, config, training_state, task_restored.model, config_restored, training_state_restored, ) self.assertEqual(task_restored.data.data_source.rank, new_rank) self.assertEqual(task_restored.data.data_source.world_size, new_world_size)
def from_config( cls, fp16_config: Config, model: torch.nn.Module, fp32_config: Optimizer.Config, *unused, ): fp32_optimizer = create_optimizer(fp32_config, model) return cls( fp32_optimizer, model, fp16_config.opt_level, fp16_config.init_loss_scale, fp16_config.min_loss_scale, )
def from_config(cls, config: Config, model: torch.nn.Module): base_opt = create_optimizer(config.optimizer, model) return cls(base_opt, config.start, config.frequency, config.swa_learning_rate)
def from_config(cls, task_config, metadata=None, model_state=None): print("Task parameters:\n") pprint(config_to_json(type(task_config), task_config)) data_handlers = OrderedDict() exporters = OrderedDict() for name, task in task_config.tasks.items(): featurizer = create_featurizer(task.featurizer, task.features) data_handlers[name] = create_data_handler(task.data_handler, task.features, task.labels, featurizer=featurizer) data_handler = DisjointMultitaskDataHandler( task_config.data_handler, data_handlers, target_task_name=task_config.target_task_name, ) print("\nLoading data...") if metadata: data_handler.load_metadata(metadata) else: data_handler.init_metadata() metadata = data_handler.metadata exporters = { name: (create_exporter( task.exporter, task.features, task.labels, data_handler.data_handlers[name].metadata, task.model, ) if task.exporter else None) for name, task in task_config.tasks.items() } task_weights = { task_name: task_config.task_weights.get(task_name, 1) for task_name in task_config.tasks.keys() } metric_reporter = DisjointMultitaskMetricReporter( OrderedDict( (name, create_metric_reporter(task.metric_reporter, metadata[name])) for name, task in task_config.tasks.items()), loss_weights=task_weights, target_task_name=task_config.target_task_name, ) model = DisjointMultitaskModel( OrderedDict( (name, create_model(task.model, task.features, metadata[name])) for name, task in task_config.tasks.items()), loss_weights=task_weights, ) if model_state: model.load_state_dict(model_state) if cuda_utils.CUDA_ENABLED: model = model.cuda() optimizer = create_optimizer(task_config.optimizer, model) return cls( exporters=exporters, trainer=create_trainer(task_config.trainer), data_handler=data_handler, model=model, metric_reporter=metric_reporter, optimizer=optimizer, lr_scheduler=Scheduler(optimizer, task_config.scheduler, metric_reporter.lower_is_better), )
def __init__(self, config: Config, model: torch.nn.Module): if config.early_stop_after > 0: assert config.do_eval, "can't do early stopping when not running evalution" if (config.discriminative_lr is not None or config.freeze_params_pattern is not None): optimizer_grouped_parameters = [] optimizer_parameters_covered = [] if config.freeze_params_pattern is not None: tmp_param = { n: p for n, p in model.named_parameters() if any(nd in n for nd in config.freeze_params_pattern) } if len(tmp_param) > 0: optimizer_parameters_covered.extend(list(tmp_param.keys())) optimizer_grouped_parameters.append({ "params": list(tmp_param.values()), "lr": 0.0, }) if config.discriminative_lr is not None: assert ( config.discriminative_lr_params_pattern is not None), "Missing discriminative_lr_params_pattern" tmp_param = { n: p for n, p in model.named_parameters() if any(nd in n for nd in config.discriminative_lr_params_pattern) and n not in optimizer_parameters_covered } if len(tmp_param) > 0: optimizer_parameters_covered.extend(list(tmp_param.keys())) optimizer_grouped_parameters.append({ "params": list(tmp_param.values()), "lr": config.discriminative_lr, }) optimizer_grouped_parameters.append({ "params": [ p for n, p in model.named_parameters() if n not in optimizer_parameters_covered ] }) if precision.FP16_ENABLED: self.optimizer: torch.optim.Optimizer = create_optimizer( config.fp16_args, model, config.optimizer, config.num_accumulated_batches, optimizer_grouped_parameters, ) else: self.optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model, optimizer_grouped_parameters) else: if precision.FP16_ENABLED: self.optimizer: torch.optim.Optimizer = create_optimizer( config.fp16_args, model, config.optimizer, config.num_accumulated_batches, ) else: self.optimizer: torch.optim.Optimizer = create_optimizer( config.optimizer, model) self.scheduler: torch.optim.lr_scheduler = (create_scheduler( config.scheduler, self.optimizer) if config.scheduler else Scheduler()) self.sparsifier: Sparsifier = (create_sparsifier(config.sparsifier) if config.sparsifier else Sparsifier()) self.config = config
def from_config(cls, task_config, metadata=None, model_state=None): """ Create the task from config, and optionally load metadata/model_state This function will create components including :class:`~DataHandler`, :class:`~Trainer`, :class:`~Optimizer`, :class:`~Scheduler`, :class:`~MetricReporter`, :class:`~Exporter`, and wire them up. Args: task_config (Task.Config): the config of the current task metadata: saved global context of this task, e.g: vocabulary, will be generated by :class:`~DataHandler` if it's None model_state: saved model parameters, will be loaded into model when given """ if hasattr(task_config.labels, "target_prob"): assert task_config.labels.target_prob == isinstance( task_config.model.output_layer.loss, ( KLDivergenceBCELoss.Config, KLDivergenceCELoss.Config, SoftHardBCELoss.Config, ), ), "target_prob must be set to True for KD losses" featurizer = create_featurizer(task_config.featurizer, task_config.features) # load data data_handler = create_data_handler( task_config.data_handler, task_config.features, task_config.labels, featurizer=featurizer, ) print("\nLoading data...") if metadata: data_handler.load_metadata(metadata) else: data_handler.init_metadata() metadata = data_handler.metadata model = create_model(task_config.model, task_config.features, metadata) if model_state: model.load_state_dict(model_state) if cuda_utils.CUDA_ENABLED: model = model.cuda() metric_reporter = create_metric_reporter(task_config.metric_reporter, metadata) optimizer = create_optimizer(task_config.optimizer, model) if task_config.scheduler: scheduler = create_scheduler(task_config.scheduler, optimizer) else: scheduler = None exporter = (create_exporter( task_config.exporter, task_config.features, task_config.labels, data_handler.metadata, task_config.model, ) if task_config.exporter else None) return cls( trainer=create_trainer(task_config.trainer), data_handler=data_handler, model=model, metric_reporter=metric_reporter, optimizer=optimizer, lr_scheduler=scheduler, exporter=exporter, )