def from_params(params: Params, serialization_dir: str, recover: bool = False) -> 'TrainerPieces': all_datasets = training_util.datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) if recover and os.path.exists(os.path.join(serialization_dir, "vocabulary")): vocab = Vocabulary.from_files(os.path.join(serialization_dir, "vocabulary")) params.pop("vocabulary", {}) else: vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation) ) model = Model.from_params(vocab=vocab, params=params.pop('model')) # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(model.vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params(validation_iterator_params) validation_iterator.index_with(model.vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) return TrainerPieces(model, iterator, train_data, validation_data, test_data, validation_iterator, trainer_params)
def dry_run_from_params(params: Params, serialization_dir: str) -> None: prepare_environment(params) vocab_params = params.pop("vocabulary", {}) os.makedirs(serialization_dir, exist_ok=True) vocab_dir = os.path.join(serialization_dir, "vocabulary") if os.path.isdir(vocab_dir) and os.listdir(vocab_dir) is not None: raise ConfigurationError("The 'vocabulary' directory in the provided " "serialization directory is non-empty") all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError( f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info( "From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) instances = [ instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation ] vocab = Vocabulary.from_params(vocab_params, instances) dataset = Batch(instances) dataset.index_instances(vocab) dataset.print_statistics() vocab.print_statistics() logger.info(f"writing the vocabulary to {vocab_dir}.") vocab.save_to_files(vocab_dir) model = Model.from_params(vocab=vocab, params=params.pop('model')) trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) stdout_handler = prepare_global_logging(serialization_dir, False) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) cleanup_global_logging(stdout_handler)
def test_get_frozen_and_tunable_parameter_names(self): model = torch.nn.Sequential( OrderedDict([("conv", torch.nn.Conv1d(5, 5, 5)), ("linear", torch.nn.Linear(5, 10))])) named_parameters = dict(model.named_parameters()) named_parameters["linear.weight"].requires_grad_(False) named_parameters["linear.bias"].requires_grad_(False) frozen_parameter_names, tunable_parameter_names = util.get_frozen_and_tunable_parameter_names( model) assert set(frozen_parameter_names) == {"linear.weight", "linear.bias"} assert set(tunable_parameter_names) == {"conv.weight", "conv.bias"}
def test_get_frozen_and_tunable_parameter_names(self): model = torch.nn.Sequential(OrderedDict([ ('conv', torch.nn.Conv1d(5, 5, 5)), ('linear', torch.nn.Linear(5, 10)), ])) named_parameters = dict(model.named_parameters()) named_parameters['linear.weight'].requires_grad_(False) named_parameters['linear.bias'].requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ util.get_frozen_and_tunable_parameter_names(model) assert set(frozen_parameter_names) == {'linear.weight', 'linear.bias'} assert set(tunable_parameter_names) == {'conv.weight', 'conv.bias'}
def test_get_frozen_and_tunable_parameter_names(self): model = torch.nn.Sequential(OrderedDict([ ('conv', torch.nn.Conv1d(5, 5, 5)), ('linear', torch.nn.Linear(5, 10)), ])) named_parameters = dict(model.named_parameters()) named_parameters['linear.weight'].requires_grad_(False) named_parameters['linear.bias'].requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ util.get_frozen_and_tunable_parameter_names(model) assert set(frozen_parameter_names) == {'linear.weight', 'linear.bias'} assert set(tunable_parameter_names) == {'conv.weight', 'conv.bias'}
def dry_run_from_params(params: Params, serialization_dir: str) -> None: prepare_environment(params) vocab_params = params.pop("vocabulary", {}) os.makedirs(serialization_dir, exist_ok=True) vocab_dir = os.path.join(serialization_dir, "vocabulary") if os.path.isdir(vocab_dir) and os.listdir(vocab_dir) is not None: raise ConfigurationError("The 'vocabulary' directory in the provided " "serialization directory is non-empty") all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) instances = [instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation] vocab = Vocabulary.from_params(vocab_params, instances) dataset = Batch(instances) dataset.index_instances(vocab) dataset.print_statistics() vocab.print_statistics() logger.info(f"writing the vocabulary to {vocab_dir}.") vocab.save_to_files(vocab_dir) model = Model.from_params(vocab=vocab, params=params.pop('model')) trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name)
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) prepare_global_logging(serialization_dir, file_friendly_logging) cuda_device = params.params.get('trainer').get('cuda_device', -1) if isinstance(cuda_device, list): for device in cuda_device: check_for_gpu(device) else: check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation) ) model = Model.from_params(vocab=vocab, params=params.pop('model')) # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params(validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_choice = trainer_params.pop_choice("type", Trainer.list_available(), default_to_first_choice=True) trainer = Trainer.by_name(trainer_choice).from_params(model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info("Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) logger.info("Loading the best epoch weights.") best_model_state_path = os.path.join(serialization_dir, 'best.th') best_model_state = torch.load(best_model_state_path) best_model = model best_model.load_state_dict(best_model_state) if test_data and evaluate_on_test: logger.info("The model will be evaluated using the best epoch weights.") test_metrics = evaluate( best_model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0] # pylint: disable=protected-access ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info("To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) return best_model
def fine_tune_model(model: Model, params: Params, serialization_dir: str, extend_vocab: bool = False, file_friendly_logging: bool = False, batch_weight_key: str = "") -> Model: """ Fine tunes the given model, using a set of parameters that is largely identical to those used for :func:`~allennlp.commands.train.train_model`, except that the ``model`` section is ignored, if it is present (as we are already given a ``Model`` here). The main difference between the logic done here and the logic done in ``train_model`` is that here we do not worry about vocabulary construction or creating the model object. Everything else is the same. Parameters ---------- archive : ``Archive`` A saved model archive that is the result of running the ``train`` command. train_data_path : ``str`` Path to the training data to use for fine-tuning. serialization_dir : ``str`` The directory in which to save results and logs. validation_data_path : ``str``, optional Path to the validation data to use while fine-tuning. extend_vocab: ``bool``, optional (default=False) If ``True``, we use the new instances to extend your vocabulary. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. """ prepare_environment(params) if os.path.exists(serialization_dir) and os.listdir(serialization_dir): raise ConfigurationError( f"Serialization directory ({serialization_dir}) " f"already exists and is not empty.") os.makedirs(serialization_dir, exist_ok=True) prepare_global_logging(serialization_dir, file_friendly_logging) serialization_params = deepcopy(params).as_dict(quiet=True) with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file: json.dump(serialization_params, param_file, indent=4) if params.pop('model', None): logger.warning( "You passed parameters for the model in your configuration file, but we " "are ignoring them, using instead the model parameters in the archive." ) vocabulary_params = params.pop('vocabulary', {}) if vocabulary_params.get('directory_path', None): logger.warning( "You passed `directory_path` in parameters for the vocabulary in " "your configuration file, but it will be ignored. ") all_datasets = datasets_from_params(params) vocab = model.vocab if extend_vocab: datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError( f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("Extending model vocabulary using %s data.", ", ".join(datasets_for_vocab_creation)) vocab.extend_from_instances( vocabulary_params, (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation)) vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(model.vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params( validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_type = trainer_params.pop("type", "default") if trainer_type == "default": trainer = Trainer.from_params(model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator) else: raise ConfigurationError( "currently fine-tune only works with the default Trainer") evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Fine-tuning interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Evaluate if test_data and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, batch_weight_key=batch_weight_key) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) metrics_json = json.dumps(metrics, indent=2) with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file: metrics_file.write(metrics_json) logger.info("Metrics: %s", metrics_json) return model
def fine_tune_model(model: Model, params: Params, serialization_dir: str, extend_vocab: bool = False, file_friendly_logging: bool = False, batch_weight_key: str = "", embedding_sources_mapping: Dict[str, str] = None, in_fold = None, num_folds = None, ewc_weight=None) -> Model: """ Fine tunes the given model, using a set of parameters that is largely identical to those used for :func:`~allennlp.commands.train.train_model`, except that the ``model`` section is ignored, if it is present (as we are already given a ``Model`` here). The main difference between the logic done here and the logic done in ``train_model`` is that here we do not worry about vocabulary construction or creating the model object. Everything else is the same. Parameters ---------- model : ``Model`` A model to fine tune. params : ``Params`` A parameter object specifying an AllenNLP Experiment serialization_dir : ``str`` The directory in which to save results and logs. extend_vocab: ``bool``, optional (default=False) If ``True``, we use the new instances to extend your vocabulary. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. batch_weight_key : ``str``, optional (default="") If non-empty, name of metric used to weight the loss on a per-batch basis. embedding_sources_mapping: ``Dict[str, str]``, optional (default=None) mapping from model paths to the pretrained embedding filepaths used during fine-tuning. """ prepare_environment(params) if os.path.exists(serialization_dir) and os.listdir(serialization_dir): raise ConfigurationError(f"Serialization directory ({serialization_dir}) " f"already exists and is not empty.") os.makedirs(serialization_dir, exist_ok=True) prepare_global_logging(serialization_dir, file_friendly_logging) serialization_params = deepcopy(params).as_dict(quiet=True) with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file: json.dump(serialization_params, param_file, indent=4) if params.pop('model', None): logger.warning("You passed parameters for the model in your configuration file, but we " "are ignoring them, using instead the model parameters in the archive.") vocabulary_params = params.pop('vocabulary', {}) if vocabulary_params.get('directory_path', None): logger.warning("You passed `directory_path` in parameters for the vocabulary in " "your configuration file, but it will be ignored. ") all_datasets = datasets_from_params(params) vocab = model.vocab if extend_vocab: datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("Extending model vocabulary using %s data.", ", ".join(datasets_for_vocab_creation)) vocab.extend_from_instances(vocabulary_params, (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation)) model.extend_embedder_vocab(embedding_sources_mapping) trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') dl_params = params.pop("data_loader") if test_data is not None: rand = random.Random(1234) test_data.index_with(vocab) shuffled_test = copy(test_data.instances) rand.shuffle(shuffled_test) extra_test = shuffled_test[:2000] keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": AllennlpDataset(extra_test, vocab)}) extra_test_loader = DataLoader.from_params(params.pop("test_data_loader", keys)) keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": test_data}) test_loader = DataLoader.from_params(params.pop("test_data_loader", keys)) master_model = model global_metrics = {} training_metrics = [] final_metrics = {} master_trainer = trainer_params.as_dict() if num_folds is not None: rand = random.Random(1234) fold_train = [] fold_test = [] fold_train_loader = [] fold_test_loader = [] shuffled_instances = copy(train_data.instances) rand.shuffle(shuffled_instances) kfold = KFold(n_splits=num_folds, random_state=None, shuffle=False) computed_folds = list(kfold.split(shuffled_instances)) for fold in range(num_folds): train_indexes, test_indexes = computed_folds[fold] new_train = [shuffled_instances[i] for i in train_indexes] new_test = [shuffled_instances[i] for i in test_indexes] fold_train.append(AllennlpDataset(new_train, vocab=vocab)) fold_test.append(AllennlpDataset(new_test, vocab=vocab)) keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": fold_test[-1]}) fold_test_loader.append(DataLoader.from_params(params.pop("fold_test_data_loader",keys))) keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": fold_train[-1]}) fold_train_loader.append(DataLoader.from_params(params.pop("fold_train_data_loader", keys))) for fold in ([in_fold] if in_fold is not None else range(num_folds)): fold_model = deepcopy(master_model) eval_epoch_callback = EvalEpochCallback(fold, fold_test_loader[fold], test_loader, global_metrics) callbacks = [eval_epoch_callback] if ewc_weight is not None: ewc = EWC(extra_test_loader) def ewc_forward(*args, **kwargs) -> Dict[str, torch.Tensor]: ewc_loss = 0 if ewc.model.training: ewc_loss = ewc.penalty(ewc.model) ret = ewc.model.old_forward(*args, **kwargs) ret["loss"] += ewc_weight * ewc_loss return ret fold_model.old_forward = fold_model.forward fold_model.forward = ewc_forward callbacks.append(CallLossCallback(ewc)) trainer = Trainer.from_params(model=fold_model, serialization_dir=serialization_dir, data_loader=fold_train_loader[fold], train_data=train_data, validation_data=None, params=Params(deepcopy(master_trainer)), validation_data_loader=None, epoch_callbacks=callbacks) training_metrics.append(trainer.train()) del fold_model del trainer del eval_epoch_callback state = glob(serialization_dir+"/*.th") for file in state: logger.info("deleting state - {}".format(file)) os.unlink(file) else: callbacks = [] if ewc_weight is not None: ewc = EWC(extra_test_loader) def ewc_forward(*args, **kwargs) -> Dict[str, torch.Tensor]: ewc_loss = 0 if ewc.model.training: ewc_loss = ewc.penalty(ewc.model) ret = ewc.model.old_forward(*args, **kwargs) ret["loss"] += ewc_weight * ewc_loss return ret model.old_forward = model.forward model.forward = ewc_forward callbacks.append(CallLossCallback(ewc)) keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": train_data}) train_data.index_with(vocab) train_data_loader = DataLoader.from_params(params.pop("train_loader",keys)) if validation_data is not None: validation_data.index_with(vocab) keys = deepcopy(dl_params.as_dict()) keys.update({"dataset": validation_data}) validation_data_loader = DataLoader.from_params(params.pop("validation_loader", keys)) else: validation_data_loader = None if "finetune" in dir(model): model.finetune() logger.info("Fine tuning model") trainer = Trainer.from_params(model=model, serialization_dir=serialization_dir, data_loader=train_data_loader, train_data=train_data, validation_data=None, params=Params(deepcopy(master_trainer)), validation_data_loader=validation_data_loader, epoch_callbacks=callbacks) training_metrics = trainer.train() archive_model(serialization_dir) final_metrics["fine_tune"] = global_metrics final_metrics["training"] = training_metrics metrics_json = json.dumps(final_metrics, indent=2) with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file: metrics_file.write(metrics_json) logger.info("Metrics: %s", metrics_json) return model
def train_model(params: Params, serialization_dir: str, results_fn: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False) -> Tuple[Model, Dict[str, Any]]: prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) prepare_global_logging(serialization_dir, file_friendly_logging) cuda_device = params.params.get('trainer').get('cuda_device', -1) if isinstance(cuda_device, list): for device in cuda_device: check_for_gpu(device) else: check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError( f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info( "From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation)) model = Model.from_params(vocab=vocab, params=params.pop('model')) # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params( validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None held_out_iterator_params = params.pop("held_out_iterator", None) if held_out_iterator_params: held_out_iterator = DataIterator.from_params(held_out_iterator_params) held_out_iterator.index_with(vocab) else: held_out_iterator = None train_data = all_datasets['train'] held_out_train_data = all_datasets.get('held_out_train') validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_choice = trainer_params.pop_choice("type", Trainer.list_available(), default_to_first_choice=True) trainer = Trainer.by_name(trainer_choice).from_params( model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, held_out_train_data=held_out_train_data, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator, held_out_iterator=held_out_iterator) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) logger.info("Loading the best epoch weights.") best_model_state_path = os.path.join(serialization_dir, 'best.th') best_model_state = torch.load(best_model_state_path) best_model = model best_model.load_state_dict(best_model_state) if test_data and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( best_model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0] # pylint: disable=protected-access ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") dump_metrics(os.path.join(results_dir, results_fn), metrics, log=True) return best_model, metrics
def dry_run_from_params(params: Params, serialization_dir: str, force: bool = False, cache_directory: str = None, cache_prefix: str = None) -> None: """ :param params: :param serialization_dir: force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. cache_directory : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. cache_prefix : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. :return: """ if os.path.exists(serialization_dir) and force: shutil.rmtree(serialization_dir) prepare_environment(params) vocab_params = params.pop("vocabulary", {}) os.makedirs(serialization_dir, exist_ok=True) vocab_dir = os.path.join(serialization_dir, "vocabulary") if os.path.isdir(vocab_dir) and os.listdir(vocab_dir) is not None: raise ConfigurationError("The 'vocabulary' directory in the provided " "serialization directory is non-empty") all_datasets = datasets_from_params(params, cache_directory, cache_prefix) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) instances = [instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation] vocab = Vocabulary.from_params(vocab_params, instances) dataset = Batch(instances) dataset.index_instances(vocab) dataset.print_statistics() vocab.print_statistics() logger.info(f"writing the vocabulary to {vocab_dir}.") vocab.save_to_files(vocab_dir) model = Model.from_params(vocab=vocab, params=params.pop('model')) trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name)
def train_model(params: Params, serialization_dir: str, selector: str, num_ensemble_models: Optional[int], file_friendly_logging: bool = False, recover: bool = False, force: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) prepare_global_logging(serialization_dir, file_friendly_logging) cuda_device = params.params.get('trainer').get('cuda_device', -1) if isinstance(cuda_device, list): for device in cuda_device: check_for_gpu(device) else: check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation) ) model_params = params.pop('model') if selector == 'qbc': assert num_ensemble_models is not None models_list = [Model.from_params(vocab=vocab, params=model_params.duplicate()) for i in range(num_ensemble_models)] ensemble_model = CorefEnsemble(models_list) model = ensemble_model.submodels[0] else: model = Model.from_params(vocab=vocab, params=model_params) ensemble_model = None # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params(validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None held_out_iterator_params = params.pop("held_out_iterator", None) if held_out_iterator_params: held_out_iterator = DataIterator.from_params(held_out_iterator_params) held_out_iterator.index_with(vocab) else: held_out_iterator = None train_data = all_datasets['train'] held_out_train_data = all_datasets.get('held_out_train') validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_choice = trainer_params.pop("type") trainer = ALCorefTrainer.by_name(trainer_choice).from_params(model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, held_out_train_data=held_out_train_data, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator, held_out_iterator=held_out_iterator, ensemble_model=ensemble_model) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics, query_info = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info("Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) best_model = None logger.info("Loading the best epoch weights.") best_model_state_path = os.path.join(serialization_dir, 'best.th') best_model_state = torch.load(best_model_state_path) best_model = model best_model.load_state_dict(best_model_state) if test_data and evaluate_on_test: logger.info("The model will be evaluated using the best epoch weights.") test_metrics = evaluate( best_model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0], batch_weight_key="", ) for key, value in test_metrics.items(): metrics["test_" + key] = value return best_model, metrics, query_info
def from_params( cls, params: Params, serialization_dir: str, recover: bool = False, cache_directory: str = None, cache_prefix: str = None, ) -> "TrainerPieces": all_datasets = training_util.meta_dataset_from_params(params, cache_directory, cache_prefix) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info( "From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation), ) if recover and os.path.exists(os.path.join(serialization_dir, "vocabulary")): vocab_params = params.pop("vocabulary", {}) vocab = Vocabulary.from_files( os.path.join(serialization_dir, "vocabulary"), vocab_params.get("padding_token", None), vocab_params.get("oov_token", None), ) else: instance_train = ( instance for key, dataset in all_datasets.items() if key == 'train' for subdata in dataset for instance in subdata ) instance_valid_test = ( instance for key, dataset in all_datasets.items() if key != 'train' for instance in dataset ) instances = chain(instance_train, instance_valid_test) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), # Using a generator comprehension here is important # because, being lazy, it allows us to not iterate over the # dataset when directory_path is specified. # ( # instance # for key, dataset in all_datasets.items() # if (key in datasets_for_vocab_creation) # for instance in dataset # ), instances ) model = Model.from_params(vocab=vocab, params=params.pop("model")) # If vocab extension is ON for training, embedding extension should also be # done. If vocab and embeddings are already in sync, it would be a no-op. model.extend_embedder_vocab() # Initializing the model can have side effect of expanding the vocabulary # Save the vocab only in the master if not is_distributed() or is_master(): vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) # print('[info] iterator in meta_pieces is:{}'.format(params.pop("iterator"))) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(model.vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params(validation_iterator_params) validation_iterator.index_with(model.vocab) else: validation_iterator = None train_data = all_datasets["train"] validation_data = all_datasets.get("validation") test_data = all_datasets.get("test") trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = get_frozen_and_tunable_parameter_names( model ) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) return cls( model, iterator, train_data, validation_data, test_data, validation_iterator, trainer_params, )
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) prepare_global_logging(serialization_dir, file_friendly_logging) cuda_device = params.params.get('trainer').get('cuda_device', -1) if isinstance(cuda_device, list): for device in cuda_device: check_for_gpu(device) else: check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation) ) model = Model.from_params(vocab=vocab, params=params.pop('model')) ############################################################################################################################################ prev_state_dict = torch.load("/home/ubuntu/Hurricanes/model/bestS.th", map_location='cpu') for n, p in model.named_parameters(): if ( n in prev_state_dict and n != 'linear.weight' and n != 'linear.bias' and n != 'classifier_feedforward._linear_layers.1.weight' and n != 'classifier_feedforward._linear_layers.1.bias' and n != 'classifier_feedforward._linear_layers.0.weight' and n != 'classifier_feedforward._linear_layers.0.bias' ): w = prev_state_dict[n] p.data.copy_(w.data) # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params(validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_choice = trainer_params.pop_choice("type", Trainer.list_available(), default_to_first_choice=True) trainer = Trainer.by_name(trainer_choice).from_params(model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info("Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) logger.info("Loading the best epoch weights.") best_model_state_path = os.path.join(serialization_dir, 'best.th') best_model_state = torch.load(best_model_state_path) best_model = model best_model.load_state_dict(best_model_state) if test_data and evaluate_on_test: logger.info("The model will be evaluated using the best epoch weights.") test_metrics = evaluate( best_model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0] # pylint: disable=protected-access ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info("To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) return best_model
def from_params(cls, params: Params, serialization_dir: str, recover: bool = False, cache_directory: str = None, cache_prefix: str = None) -> 'PtDistTrainer': all_datasets = training_util.datasets_from_params( params, cache_directory, cache_prefix) vocab = Vocabulary.from_files(params.vocabulary.directory_path) model = Model.from_params(vocab=vocab, params=params.pop('model')) model.extend_embedder_vocab() if is_master_rank(): vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) train_data = all_datasets['train'] validation_data = all_datasets.get('validation') batch_size = params.iterator.batch_size trainer_params = params.pop("trainer") keys = [key for key in params] for key in keys: params.pop(key) params = trainer_params validation_metric = params.pop("validation_metric", "-loss") shuffle = params.pop_bool("shuffle", True) num_epochs = params.pop_int("num_epochs", 20) cuda_device = parse_cuda_device(params.pop("cuda_device", -1)) grad_clipping = params.pop_float("grad_clipping", None) lr_scheduler_params = params.pop("learning_rate_scheduler", None) pretrain_file = params.pop("pretrain_file", None) no_grad_regexes = params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) model = model.cuda(dist.get_rank()) if pretrain_file: model_state = torch.load(pretrain_file, map_location=nn_util.device_mapping( dist.get_rank())) model.load_state_dict(model_state) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] # print([n for n, p in model.named_parameters() if p.requires_grad]) optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) if lr_scheduler_params: lr_scheduler = LearningRateScheduler.from_params( optimizer, lr_scheduler_params) else: lr_scheduler = None num_serialized_models_to_keep = params.pop_int( "num_serialized_models_to_keep", 20) checkpointer = Checkpointer( serialization_dir=serialization_dir, num_serialized_models_to_keep=num_serialized_models_to_keep, keep_serialized_model_every_num_seconds=None) return cls(model, optimizer, train_data, validation_data, batch_size=batch_size, validation_metric=validation_metric, shuffle=shuffle, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, grad_clipping=grad_clipping, learning_rate_scheduler=lr_scheduler, checkpointer=checkpointer)
def from_params(cls, params: Params, serialization_dir: str, recover: bool = False, cache_directory: str = None, cache_prefix: str = None) -> 'TrainerPieces': all_datasets = training_util.datasets_from_params( params, cache_directory, cache_prefix) datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError( f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info( "From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) if recover and os.path.exists( os.path.join(serialization_dir, "vocabulary")): vocab = Vocabulary.from_files( os.path.join(serialization_dir, "vocabulary")) params.pop("vocabulary", {}) else: vocab = Vocabulary.from_params( params.pop("vocabulary", {}), # Using a generator comprehension here is important # because, being lazy, it allows us to not iterate over the # dataset when directory_path is specified. (instance for key, dataset in all_datasets.items() if key in datasets_for_vocab_creation for instance in dataset)) model = Model.from_params(vocab=vocab, params=params.pop('model')) # If vocab extension is ON for training, embedding extension should also be # done. If vocab and embeddings are already in sync, it would be a no-op. model.extend_embedder_vocab() # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(model.vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params( validation_iterator_params) validation_iterator.index_with(model.vocab) else: validation_iterator = None train_data = all_datasets.pop('train') validation_data = all_datasets.pop('validation', None) test_data = all_datasets.pop('test', None) trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) return cls(model, iterator, train_data, validation_data, test_data, validation_iterator, trainer_params, all_datasets)
def from_params( cls, # type: ignore params: Params, serialization_dir: str, recover: bool = False) -> 'Trainer': # modified for second training_data all_datasets = datasets_from_params(params) # copied from allennlp.training.trainer.TrainingPieces # modified for second training_data datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) if recover and os.path.exists( os.path.join(serialization_dir, "vocabulary")): vocab = Vocabulary.from_files( os.path.join(serialization_dir, "vocabulary")) params.pop("vocabulary", {}) else: vocab = Vocabulary.from_params(params.pop( "vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation)) model = Model.from_params(vocab=vocab, params=params.pop('model')) model.extend_embedder_vocab() vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(model.vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params( validation_iterator_params) validation_iterator.index_with(model.vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') train_low_data = all_datasets.get('train_low') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) # END OF TrainerPieces code params = trainer_params # pylint: disable=arguments-differ patience = params.pop_int("patience", None) validation_metric = params.pop("validation_metric", "-loss") shuffle = params.pop_bool("shuffle", True) num_epochs = params.pop_int("num_epochs", 20) cuda_device = parse_cuda_device(params.pop("cuda_device", -1)) grad_norm = params.pop_float("grad_norm", None) grad_clipping = params.pop_float("grad_clipping", None) lr_scheduler_params = params.pop("learning_rate_scheduler", None) momentum_scheduler_params = params.pop("momentum_scheduler", None) if isinstance(cuda_device, list): model_device = cuda_device[0] else: model_device = cuda_device if model_device >= 0: # Moving model to GPU here so that the optimizer state gets constructed on # the right device. model = model.cuda(model_device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) if "moving_average" in params: moving_average = MovingAverage.from_params( params.pop("moving_average"), parameters=parameters) else: moving_average = None if lr_scheduler_params: lr_scheduler = LearningRateScheduler.from_params( optimizer, lr_scheduler_params) else: lr_scheduler = None if momentum_scheduler_params: momentum_scheduler = MomentumScheduler.from_params( optimizer, momentum_scheduler_params) else: momentum_scheduler = None if 'checkpointer' in params: if 'keep_serialized_model_every_num_seconds' in params or \ 'num_serialized_models_to_keep' in params: raise ConfigurationError( "Checkpointer may be initialized either from the 'checkpointer' key or from the " "keys 'num_serialized_models_to_keep' and 'keep_serialized_model_every_num_seconds'" " but the passed config uses both methods.") checkpointer = Checkpointer.from_params(params.pop("checkpointer")) else: num_serialized_models_to_keep = params.pop_int( "num_serialized_models_to_keep", 20) keep_serialized_model_every_num_seconds = params.pop_int( "keep_serialized_model_every_num_seconds", None) checkpointer = Checkpointer( serialization_dir=serialization_dir, num_serialized_models_to_keep=num_serialized_models_to_keep, keep_serialized_model_every_num_seconds= keep_serialized_model_every_num_seconds) model_save_interval = params.pop_float("model_save_interval", None) summary_interval = params.pop_int("summary_interval", 100) histogram_interval = params.pop_int("histogram_interval", None) should_log_parameter_statistics = params.pop_bool( "should_log_parameter_statistics", True) should_log_learning_rate = params.pop_bool("should_log_learning_rate", False) log_batch_size_period = params.pop_int("log_batch_size_period", None) epoch_low_start = params.pop_int("epoch_low_start", None) epoch_without_improvement_low_start = params.pop_int( "epoch_without_improvement_low_start", None) params.assert_empty(cls.__name__) return cls( model, optimizer, iterator, train_data, validation_data, train_low_dataset=train_low_data, patience=patience, validation_metric=validation_metric, validation_iterator=validation_iterator, shuffle=shuffle, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, grad_norm=grad_norm, grad_clipping=grad_clipping, learning_rate_scheduler=lr_scheduler, momentum_scheduler=momentum_scheduler, checkpointer=checkpointer, model_save_interval=model_save_interval, summary_interval=summary_interval, histogram_interval=histogram_interval, should_log_parameter_statistics=should_log_parameter_statistics, should_log_learning_rate=should_log_learning_rate, log_batch_size_period=log_batch_size_period, moving_average=moving_average, epoch_low_start=epoch_low_start, epoch_without_improvement_low_start= epoch_without_improvement_low_start, )
def from_params(params: Params, serialization_dir: str, recover: bool = False) -> 'TrainerPieces': # all_datasets = datasets_from_params(params) corpus = Corpus.from_params(params.pop('corpus')) # datasets_for_vocab_creation = set(params.pop( # "datasets_for_vocab_creation", all_datasets)) # for dataset in datasets_for_vocab_creation: # if dataset not in all_datasets: # raise ConfigurationError( # f"invalid 'dataset_for_vocab_creation' {dataset}") # logger.info("From dataset instances, %s will be considered for vocabulary creation.", # ", ".join(datasets_for_vocab_creation)) seed = params.pop_int("seed", 5678) vocab_params = params.pop("vocabulary", {}) vocab_type = vocab_params.get("type", "default") if vocab_type == 'default' and os.path.exists( os.path.join(serialization_dir, "vocabulary")): vocab = Vocabulary.from_files( os.path.join(serialization_dir, "vocabulary")) elif vocab_type == 'empty': vocab = Vocabulary() else: seed_environment(seed) vocab = Vocabulary.from_params(vocab_params, corpus.train) # Need to reset the seed. Otherwise loading existing vocab and creating # vocab from scratch will lead to different behavior. seed_environment(seed) # contextualizer_params = params.pop('contextualizer') # contextualizer = Seq2SeqDecoder.from_params( # vocab=vocab, params=contextualizer_params) model = Model.from_params(vocab=vocab, params=params.pop('model')) # If vocab extension is ON for training, embedding extension should also be # done. If vocab and embeddings are already in sync, it would be a no-op. model.extend_embedder_vocab() # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(model.vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params( validation_iterator_params) validation_iterator.index_with(model.vocab) else: validation_iterator = None # train_data = all_datasets['train'] # validation_data = all_datasets.get('validation') # test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = get_frozen_and_tunable_parameter_names( model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) batch_weight_key = params.pop('batch_weight_key', '') return TrainerPieces(model, iterator, corpus, validation_iterator, batch_weight_key, trainer_params)
<<<<<<< HEAD trainer = Trainer.from_params(model, selector, serialization_dir, iterator, train_data, validation_data, trainer_params) ======= no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_choice = trainer_params.pop_choice("type", Trainer.list_available(), default_to_first_choice=True) trainer = Trainer.by_name(trainer_choice).from_params(model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, validation_data=validation_data,
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ prepare_environment(params) create_serialization_dir(params, serialization_dir, recover) prepare_global_logging(serialization_dir, file_friendly_logging) check_for_gpu(params.get('trainer').get('cuda_device', -1)) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) all_datasets, all_datasets_aux, all_datasets_aux2 = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) datasets_for_vocab_creation_aux = set(params.pop("auxiliary_datasets_for_vocab_creation", all_datasets_aux)) datasets_for_vocab_creation_aux2 = set(params.pop("auxiliary_datasets_for_vocab_creation_2", all_datasets_aux2)) mixing_ratio = params.pop_float("mixing_ratio") mixing_ratio2 = params.pop_float("mixing_ratio2") cutoff_epoch = params.pop("cutoff_epoch", -1) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) vocab_instances_aux = [ instance for key, dataset in all_datasets_aux.items() for instance in dataset if key in datasets_for_vocab_creation_aux ] vocab_instances_aux.extend([ instance for key, dataset in all_datasets_aux2.items() for instance in dataset if key in datasets_for_vocab_creation_aux2 ]) vocab = VocabularyMultitask.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation), instances_aux=vocab_instances_aux ) model = Model.from_params(vocab=vocab, params=params.pop('model')) # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) iterator_aux = DataIterator.from_params(params.pop("iterator_aux")) iterator_aux.index_with(vocab) iterator_aux2 = DataIterator.from_params(params.pop("iterator_aux2")) iterator_aux2.index_with(vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params(validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None # TODO: if validation in multi-task need to add validation iterator as above train_data = all_datasets.get('train') validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') train_data_aux = all_datasets_aux.get('train_aux') validation_data_aux = all_datasets_aux.get('validation_aux') test_data_aux = all_datasets_aux.get('test_aux') train_data_aux2 = all_datasets_aux2.get('train_aux') validation_data_aux2 = all_datasets_aux2.get('validation_aux') test_data_aux2 = all_datasets_aux2.get('test_aux') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer = MultiTaskTrainer2.from_params(model=model, serialization_dir=serialization_dir, iterator=iterator, iterator_aux=iterator_aux, iterator_aux2=iterator_aux2, train_data=train_data, train_data_aux=train_data_aux, train_data_aux2=train_data_aux2, mixing_ratio=mixing_ratio, mixing_ratio2=mixing_ratio2, cutoff_epoch=cutoff_epoch, validation_data_aux=validation_data_aux, validation_data_aux2=validation_data_aux2, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator) evaluate_on_test = params.pop_bool("evaluate_on_test", False) evaluate_aux_on_test = params.pop_bool("evaluate_aux_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info("Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) logger.info("Loading the best epoch weights.") best_model_state_path = os.path.join(serialization_dir, 'best.th') best_model_state = torch.load(best_model_state_path) best_model = model best_model.load_state_dict(best_model_state) if test_data and evaluate_on_test: logger.info("The model will be evaluated using the best epoch weights.") test_metrics = evaluate( best_model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0] # pylint: disable=protected-access ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info("To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") if test_data_aux and evaluate_aux_on_test: # for instance in test_data_aux: # instance.index_fields(vocab) # for instance in test_data_aux2: # instance.index_fields(vocab) test_metrics_aux = evaluate(best_model, test_data_aux, iterator_aux, cuda_device=trainer._cuda_devices[0]) # pylint: disable=protected-access test_metrics_aux2 = evaluate(best_model, test_data_aux2, iterator_aux2, cuda_device=trainer._cuda_devices[0]) # pylint: disable=protected-access for key, value in test_metrics_aux.items(): metrics["test_aux_" + key] = value for key, value in test_metrics_aux2.items(): metrics["test_aux2_" + key] = value elif test_data_aux: logger.info("To evaluate on the auxiliary test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) return best_model
def fine_tune_model(model: Model, params: Params, serialization_dir: str, file_friendly_logging: bool = False) -> Model: """ Fine tunes the given model, using a set of parameters that is largely identical to those used for :func:`~allennlp.commands.train.train_model`, except that the ``model`` section is ignored, if it is present (as we are already given a ``Model`` here). The main difference between the logic done here and the logic done in ``train_model`` is that here we do not worry about vocabulary construction or creating the model object. Everything else is the same. Parameters ---------- archive : ``Archive`` A saved model archive that is the result of running the ``train`` command. train_data_path : ``str`` Path to the training data to use for fine-tuning. serialization_dir : ``str`` The directory in which to save results and logs. validation_data_path : ``str``, optional Path to the validation data to use while fine-tuning. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. """ prepare_environment(params) os.makedirs(serialization_dir) prepare_global_logging(serialization_dir, file_friendly_logging) serialization_params = deepcopy(params).as_dict(quiet=True) with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file: json.dump(serialization_params, param_file, indent=4) if params.pop('model', None): logger.warning("You passed parameters for the model in your configuration file, but we " "are ignoring them, using instead the model parameters in the archive.") vocabulary_params = params.pop('vocabulary', {}) if vocabulary_params.get('directory_path', None): logger.warning("You passed `directory_path` in parameters for the vocabulary in " "your configuration file, but it will be ignored. " "Vocabulary from the saved model will be extended with current data.") all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("Extending model vocabulary using %s data.", ", ".join(datasets_for_vocab_creation)) vocab = model.vocab vocab.extend_from_instances(vocabulary_params, (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation)) vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer = Trainer.from_params(model, serialization_dir, iterator, train_data, validation_data, trainer_params) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info("Fine-tuning interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) if test_data and evaluate_on_test: test_metrics = evaluate(model, test_data, iterator, cuda_device=trainer._cuda_devices[0]) # pylint: disable=protected-access for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info("To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") metrics_json = json.dumps(metrics, indent=2) with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file: metrics_file.write(metrics_json) logger.info("Metrics: %s", metrics_json) return model
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ prepare_environment(params) create_serialization_dir(params, serialization_dir, recover) prepare_global_logging(serialization_dir, file_friendly_logging) check_for_gpu(params.params.get('trainer').get('cuda_device', -1)) serialization_params = deepcopy(params).as_dict(quiet=True) with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file: json.dump(serialization_params, param_file, indent=4) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError( f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("Creating a vocabulary using %s data.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation)) vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) model = Model.from_params(vocab=vocab, params=params.pop('model')) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params( validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer = Trainer.from_params(model, serialization_dir, iterator, train_data, validation_data, trainer_params, validation_iterator=validation_iterator) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) logger.info("Loading the best epoch weights.") best_model_state_path = os.path.join(serialization_dir, 'best.th') best_model_state = torch.load(best_model_state_path) best_model = model best_model.load_state_dict(best_model_state) if test_data and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( best_model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0] # pylint: disable=protected-access ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") metrics_json = json.dumps(metrics, indent=2) with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file: metrics_file.write(metrics_json) logger.info("Metrics: %s", metrics_json) return best_model
def from_params(cls, params: Params, serialization_dir: str, recover: bool = False, cache_directory: str = None, cache_prefix: str = None) -> 'PtTrainer': max_src_len = params.dataset_reader.get('max_src_len', None) all_datasets = training_util.datasets_from_params( params, cache_directory, cache_prefix) datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError( f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info( "From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) if recover and os.path.exists( os.path.join(serialization_dir, "vocabulary")): vocab = Vocabulary.from_files( os.path.join(serialization_dir, "vocabulary")) params.pop("vocabulary", {}) else: vocab = Vocabulary.from_params(params.pop( "vocabulary", {}), (instance for key, dataset in all_datasets.items() if key in datasets_for_vocab_creation for instance in dataset)) model = Model.from_params(vocab=vocab, params=params.pop('model')) # If vocab extension is ON for training, embedding extension should also be # done. If vocab and embeddings are already in sync, it would be a no-op. model.extend_embedder_vocab() # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(model.vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params( validation_iterator_params) validation_iterator.index_with(model.vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) params = trainer_params patience = params.pop_int("patience", None) validation_metric = params.pop("validation_metric", "-loss") shuffle = params.pop_bool("shuffle", True) num_epochs = params.pop_int("num_epochs", 20) cuda_device = parse_cuda_device(params.pop("cuda_device", -1)) grad_norm = params.pop_float("grad_norm", None) grad_clipping = params.pop_float("grad_clipping", None) lr_scheduler_params = params.pop("learning_rate_scheduler", None) momentum_scheduler_params = params.pop("momentum_scheduler", None) if isinstance(cuda_device, list): model_device = cuda_device[0] else: model_device = cuda_device if model_device >= 0: # Moving model to GPU here so that the optimizer state gets constructed on # the right device. model = model.cuda(model_device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) if "moving_average" in params: moving_average = MovingAverage.from_params( params.pop("moving_average"), parameters=parameters) else: moving_average = None if lr_scheduler_params: lr_scheduler = LearningRateScheduler.from_params( optimizer, lr_scheduler_params) else: lr_scheduler = None if momentum_scheduler_params: momentum_scheduler = MomentumScheduler.from_params( optimizer, momentum_scheduler_params) else: momentum_scheduler = None if 'checkpointer' in params: if 'keep_serialized_model_every_num_seconds' in params or \ 'num_serialized_models_to_keep' in params: raise ConfigurationError( "Checkpointer may be initialized either from the 'checkpointer' key or from the " "keys 'num_serialized_models_to_keep' and 'keep_serialized_model_every_num_seconds'" " but the passed config uses both methods.") checkpointer = Checkpointer.from_params(params.pop("checkpointer")) else: num_serialized_models_to_keep = params.pop_int( "num_serialized_models_to_keep", 20) keep_serialized_model_every_num_seconds = params.pop_int( "keep_serialized_model_every_num_seconds", None) checkpointer = Checkpointer( serialization_dir=serialization_dir, num_serialized_models_to_keep=num_serialized_models_to_keep, keep_serialized_model_every_num_seconds= keep_serialized_model_every_num_seconds) model_save_interval = params.pop_float("model_save_interval", None) summary_interval = params.pop_int("summary_interval", 100) histogram_interval = params.pop_int("histogram_interval", None) should_log_parameter_statistics = params.pop_bool( "should_log_parameter_statistics", True) should_log_learning_rate = params.pop_bool("should_log_learning_rate", False) log_batch_size_period = params.pop_int("log_batch_size_period", None) return cls( model, optimizer, iterator, train_data, validation_data, patience=patience, validation_metric=validation_metric, validation_iterator=validation_iterator, max_src_len=max_src_len, shuffle=shuffle, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, grad_norm=grad_norm, grad_clipping=grad_clipping, learning_rate_scheduler=lr_scheduler, momentum_scheduler=momentum_scheduler, checkpointer=checkpointer, model_save_interval=model_save_interval, summary_interval=summary_interval, histogram_interval=histogram_interval, should_log_parameter_statistics=should_log_parameter_statistics, should_log_learning_rate=should_log_learning_rate, log_batch_size_period=log_batch_size_period, moving_average=moving_average, batch_size=iterator._batch_size)