Exemple #1
0
def main(args):
    params = Params.from_file(args.config_path)
    stdout_handler = prepare_global_logging(args.output_dir, False)
    prepare_environment(params)

    reader = DatasetReader.from_params(params["dataset_reader"])
    train_dataset = reader.read(params.pop("train_data_path", None))
    valid_dataset = reader.read(params.pop("validation_data_path", None))
    test_data_path = params.pop("test_data_path", None)
    if test_data_path:
        test_dataset = reader.read(test_data_path)
        vocab = Vocabulary.from_instances(train_dataset + valid_dataset +
                                          test_dataset)
    else:
        test_dataset = None
        vocab = Vocabulary.from_instances(train_dataset + valid_dataset)

    model_params = params.pop("model", None)
    model = Model.from_params(model_params.duplicate(), vocab=vocab)
    vocab.save_to_files(os.path.join(args.output_dir, "vocabulary"))
    # copy config file
    with open(args.config_path, "r", encoding="utf-8") as f_in:
        with open(os.path.join(args.output_dir, "config.json"),
                  "w",
                  encoding="utf-8") as f_out:
            f_out.write(f_in.read())

    iterator = DataIterator.from_params(params.pop("iterator", None))
    iterator.index_with(vocab)

    trainer_params = params.pop("trainer", None)
    trainer = Trainer.from_params(model=model,
                                  serialization_dir=args.output_dir,
                                  iterator=iterator,
                                  train_data=train_dataset,
                                  validation_data=valid_dataset,
                                  params=trainer_params.duplicate())
    trainer.train()

    # evaluate on the test set
    if test_dataset:
        logging.info("Evaluating on the test set")
        import torch  # import here to ensure the republication of the experiment
        model.load_state_dict(
            torch.load(os.path.join(args.output_dir, "best.th")))
        test_metrics = evaluate(model,
                                test_dataset,
                                iterator,
                                cuda_device=trainer_params.pop(
                                    "cuda_device", 0),
                                batch_weight_key=None)
        logging.info(f"Metrics on the test set: {test_metrics}")
        with open(os.path.join(args.output_dir, "test_metrics.txt"),
                  "w",
                  encoding="utf-8") as f_out:
            f_out.write(f"Metrics on the test set: {test_metrics}")

    cleanup_global_logging(stdout_handler)
Exemple #2
0
def dry_run_from_params(params: Params, serialization_dir: str) -> None:
    prepare_environment(params)

    vocab_params = params.pop("vocabulary", {})
    os.makedirs(serialization_dir, exist_ok=True)
    vocab_dir = os.path.join(serialization_dir, "vocabulary")

    if os.path.isdir(vocab_dir) and os.listdir(vocab_dir) is not None:
        raise ConfigurationError("The 'vocabulary' directory in the provided "
                                 "serialization directory is non-empty")

    all_datasets = datasets_from_params(params)
    datasets_for_vocab_creation = set(
        params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(
                f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info(
        "From dataset instances, %s will be considered for vocabulary creation.",
        ", ".join(datasets_for_vocab_creation))

    instances = [
        instance for key, dataset in all_datasets.items()
        for instance in dataset if key in datasets_for_vocab_creation
    ]

    vocab = Vocabulary.from_params(vocab_params, instances)
    dataset = Batch(instances)
    dataset.index_instances(vocab)
    dataset.print_statistics()
    vocab.print_statistics()

    logger.info(f"writing the vocabulary to {vocab_dir}.")
    vocab.save_to_files(vocab_dir)

    model = Model.from_params(vocab=vocab, params=params.pop('model'))
    trainer_params = params.pop("trainer")
    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)

    frozen_parameter_names, tunable_parameter_names = \
                   get_frozen_and_tunable_parameter_names(model)

    stdout_handler = prepare_global_logging(serialization_dir, False)

    logger.info("Following parameters are Frozen  (without gradient):")
    for name in frozen_parameter_names:
        logger.info(name)
    logger.info("Following parameters are Tunable (with gradient):")
    for name in tunable_parameter_names:
        logger.info(name)

    cleanup_global_logging(stdout_handler)
Exemple #3
0
            params.to_file(serialize_config_file)
    dist.barrier()
    params = ConstParams.from_file(serialize_config_file)

    log_dir = os.path.join(serialization_dir, str(dist.get_rank()))
    os.makedirs(log_dir, exist_ok=True)
    stdout_handler = prepare_global_logging(log_dir,
                                            file_friendly_logging=False)
    prepare_environment(params)

    cuda_device = params.trainer.get('cuda_device', -1)
    check_for_gpu(cuda_device)

    trainer_type = params.trainer.type

    trainer = TrainerBase.from_params(params, serialization_dir, recover)
    params_cnt, params_trainable_cnt = count_parameters(trainer.model)
    print("all params cnt: ", params_cnt)
    print("all trainable params cnt: ", params_trainable_cnt)

    metrics = trainer.train()

    cleanup_global_logging(stdout_handler)

    if is_master_rank:
        archive_model(serialization_dir,
                      files_to_archive=params.files_to_archive)
        dump_metrics(os.path.join(serialization_dir, "metrics.json"),
                     metrics,
                     log=True)
Exemple #4
0
def train_model(params: Params,
                serialization_dir: str,
                file_friendly_logging: bool = False,
                recover: bool = False,
                force: bool = False,
                cache_directory: str = None,
                cache_prefix: str = None) -> Model:
    """
    Trains the model specified in the given :class:`Params` object, using the data and training
    parameters also specified in that object, and saves the results in ``serialization_dir``.

    Parameters
    ----------
    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    recover : ``bool``, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    force : ``bool``, optional (default=False)
        If ``True``, we will overwrite the serialization directory if it already exists.
    cache_directory : ``str``, optional
        For caching data pre-processing.  See :func:`allennlp.training.util.datasets_from_params`.
    cache_prefix : ``str``, optional
        For caching data pre-processing.  See :func:`allennlp.training.util.datasets_from_params`.

    Returns
    -------
    best_model: ``Model``
        The model with the best epoch weights.
    """
    create_serialization_dir(params, serialization_dir, recover, force)
    stdout_handler = prepare_global_logging(serialization_dir,
                                            file_friendly_logging)
    prepare_environment(params)

    cuda_device = params.params.get('trainer').get('cuda_device', -1)
    check_for_gpu(cuda_device)

    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)

    trainer_type = params.get("trainer", {}).get("type", "default")

    if trainer_type == "default":
        # Special logic to instantiate backward-compatible trainer.
        pieces = TrainerPieces.from_params(
            params,  # pylint: disable=no-member
            serialization_dir,
            recover,
            cache_directory,
            cache_prefix)
        trainer = Trainer.from_params(
            model=pieces.model,
            serialization_dir=serialization_dir,
            iterator=pieces.iterator,
            train_data=pieces.train_dataset,
            validation_data=pieces.validation_dataset,
            params=pieces.params,
            validation_iterator=pieces.validation_iterator)

        evaluation_iterator = pieces.validation_iterator or pieces.iterator
        evaluation_dataset = pieces.test_dataset

    else:
        if evaluate_on_test:
            raise ValueError(
                "--evaluate-on-test only works with the default Trainer. "
                "If you're using the CallbackTrainer you can use a callback "
                "to evaluate at Events.TRAINING_END; otherwise you'll have "
                "to run allennlp evaluate separately.")

        trainer = TrainerBase.from_params(params, serialization_dir, recover,
                                          cache_directory, cache_prefix)
        evaluation_dataset = None

    params.assert_empty('base train command')

    try:
        metrics = trainer.train()
    except KeyboardInterrupt:
        # if we have completed an epoch, try to create a model archive.
        if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)):
            logging.info(
                "Training interrupted by the user. Attempting to create "
                "a model archive using the current best epoch weights.")
            archive_model(serialization_dir,
                          files_to_archive=params.files_to_archive)
        raise

    # Evaluate
    if evaluation_dataset and evaluate_on_test:
        logger.info(
            "The model will be evaluated using the best epoch weights.")
        test_metrics = evaluate(
            trainer.model,
            evaluation_dataset,
            evaluation_iterator,
            cuda_device=trainer._cuda_devices[0],  # pylint: disable=protected-access,
            # TODO(brendanr): Pass in an arg following Joel's trainer refactor.
            batch_weight_key="")

        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif evaluation_dataset:
        logger.info(
            "To evaluate on the test set after training, pass the "
            "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    cleanup_global_logging(stdout_handler)

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)
    dump_metrics(os.path.join(serialization_dir, "metrics.json"),
                 metrics,
                 log=True)

    # We count on the trainer to have the model with best weights
    return trainer.model
Exemple #5
0
def train_model(
    params: Params,
    serialization_dir: str,
    file_friendly_logging: bool = False,
    recover: bool = False,
    force: bool = False,
    cache_directory: str = None,
    cache_prefix: str = None,
) -> Model:
    create_serialization_dir(params, serialization_dir, recover, force)
    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    stdout_handler = prepare_global_logging(serialization_dir,
                                            file_friendly_logging)
    prepare_environment(params)

    cuda_device = params.params.get("trainer").get("cuda_device", -1)
    check_for_gpu(cuda_device)

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)

    trainer_type = params.get("trainer", {}).get("type", "default")

    if True:
        # Special logic to instantiate backward-compatible trainer.
        pieces = TrainerPieces.from_params(params, serialization_dir, recover,
                                           cache_directory, cache_prefix)
        logger.info("Using MultiTrainer")
        from lm.trainining.MultiTaskTrainer import MultiTaskTrainer
        # MultiTrainer
        trainer = MultiTaskTrainer.from_params(
            model=pieces.model,
            serialization_dir=serialization_dir,
            iterator=pieces.iterator,
            train_data=pieces.train_dataset,
            validation_data=pieces.validation_dataset,
            params=pieces.params,
            validation_iterator=pieces.validation_iterator,
        )

        evaluation_iterator = pieces.validation_iterator or pieces.iterator
        evaluation_dataset = pieces.test_dataset

    else:
        if evaluate_on_test:
            raise ValueError(
                "--evaluate-on-test only works with the default Trainer. "
                "If you're using the CallbackTrainer you can use a callback "
                "to evaluate at Events.TRAINING_END; otherwise you'll have "
                "to run allennlp evaluate separately.")
        """
        The only main difference
        """
        print("Using MultuTrainer")
        logger.info("Using MultiTrainer")
        trainer = MultiTrainer.from_params(params, serialization_dir, recover,
                                           cache_directory, cache_prefix)
        evaluation_dataset = None

    params.assert_empty("base train command")

    try:
        metrics = trainer.train()
    except KeyboardInterrupt:
        # if we have completed an epoch, try to create a model archive.
        if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)):
            logging.info(
                "Training interrupted by the user. Attempting to create "
                "a model archive using the current best epoch weights.")
            archive_model(serialization_dir,
                          files_to_archive=params.files_to_archive)
        raise

    # Evaluate
    if evaluation_dataset and evaluate_on_test:
        logger.info(
            "The model will be evaluated using the best epoch weights.")
        test_metrics = evaluate(
            trainer.model,
            evaluation_dataset,
            evaluation_iterator,
            cuda_device=trainer._cuda_devices[0],
            # TODO(brendanr): Pass in an arg following Joel's trainer refactor.
            batch_weight_key="",
        )

        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif evaluation_dataset:
        logger.info(
            "To evaluate on the test set after training, pass the "
            "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    cleanup_global_logging(stdout_handler)

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)
    dump_metrics(os.path.join(serialization_dir, "metrics.json"),
                 metrics,
                 log=True)

    # We count on the trainer to have the model with best weights
    return trainer.model