コード例 #1
0
def train_model(params: Union[Params, Dict[str, Any]], cuda_device: int,
                serialization_dir: str, filtering: str) -> Model:
    """
    This function can be used as an entry point to running models in AllenNLP
    directly from a JSON specification using a :class:`Driver`. Note that if
    you care about reproducibility, you should avoid running code using Pytorch
    or numpy which affect the reproducibility of your experiment before you
    import and use this function, these libraries rely on random seeds which
    can be set in this function via a JSON specification file. Note that this
    function performs training and will also evaluate the trained model on
    development and test sets if provided in the parameter json.

    Parameters
    ----------
    params: Params, required.
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir: str, required
        The directory in which to save results and logs.
    """

    SimpleRandom.set_seeds()

    os.makedirs(serialization_dir, exist_ok=True)
    try:
        sys.stdout = TeeLogger(os.path.join(serialization_dir, "stdout.log"),
                               sys.stdout, True)  # type: ignore
        sys.stderr = TeeLogger(os.path.join(serialization_dir, "stderr.log"),
                               sys.stderr, True)  # type: ignore
    except TypeError:
        sys.stdout = TeeLogger(os.path.join(serialization_dir, "stdout.log"),
                               sys.stdout)  # type: ignore
        sys.stderr = TeeLogger(os.path.join(serialization_dir, "stderr.log"),
                               sys.stderr)  # type: ignore
    handler = logging.FileHandler(
        os.path.join(serialization_dir, "python_logging.log"))
    handler.setLevel(logging.INFO)
    handler.setFormatter(
        logging.Formatter(
            '%(asctime)s - %(levelname)s - %(name)s - %(message)s'))
    logging.getLogger().addHandler(handler)
    serialization_params = deepcopy(params).as_dict(quiet=True)

    with open(os.path.join(serialization_dir, "model_params.json"),
              "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    # Now we begin assembling the required parts for the Trainer.
    ds_params = params.pop('dataset_reader', {})
    read_settings = ds_params.pop('read_settings', {})
    dataset_reader = FEVERReader.from_params(ds_params)

    train_data_path = params.pop('train_data_path')
    logger.info("Reading training data from %s", train_data_path)
    train_data = dataset_reader.read(
        train_data_path,
        include_metadata=True,
        replace_with_gold=read_settings.pop('replace_gold', False),
        pad_with_nearest=read_settings.pop('pad_with_nearest', 0))

    validation_data_path = params.pop('validation_data_path', None)
    if validation_data_path is not None:
        logger.info("Reading validation data from %s", validation_data_path)
        validation_data = dataset_reader.read(validation_data_path,
                                              include_metadata=True)
    else:
        validation_data = None

    vocab_params = params.pop("vocabulary", {})
    dataset = None
    print(dict(vocab_params), 'directory_path' not in vocab_params)
    assert ('directory_path' in vocab_params)
    vocab = Vocabulary.from_params(vocab_params, dataset)
    print(vocab)
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    model = Model.from_params(vocab=vocab, params=params.pop('model'))
    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)

    trainer_params = params.pop("trainer")
    if cuda_device is not None:
        trainer_params["cuda_device"] = cuda_device
    trainer = Trainer.from_params(model, serialization_dir, iterator,
                                  train_data, validation_data, trainer_params)

    trainer.train()

    # Now tar up results
    archive_model(serialization_dir)

    return model
コード例 #2
0
def train_model(db: FeverDocDB, params: Union[Params, Dict[str, Any]],
                cuda_device: int, serialization_dir: str,
                filtering: str) -> Model:
    """
    This function can be used as an entry point to running models in AllenNLP
    directly from a JSON specification using a :class:`Driver`. Note that if
    you care about reproducibility, you should avoid running code using Pytorch
    or numpy which affect the reproducibility of your experiment before you
    import and use this function, these libraries rely on random seeds which
    can be set in this function via a JSON specification file. Note that this
    function performs training and will also evaluate the trained model on
    development and test sets if provided in the parameter json.

    Parameters
    ----------
    params: Params, required.
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir: str, required
        The directory in which to save results and logs.
    """

    SimpleRandom.set_seeds()

    os.makedirs(serialization_dir, exist_ok=True)
    sys.stdout = TeeLogger(os.path.join(serialization_dir, "stdout.log"),
                           sys.stdout)  # type: ignore
    sys.stderr = TeeLogger(os.path.join(serialization_dir, "stderr.log"),
                           sys.stderr)  # type: ignore
    handler = logging.FileHandler(
        os.path.join(serialization_dir, "python_logging.log"))
    handler.setLevel(logging.INFO)
    handler.setFormatter(
        logging.Formatter(
            '%(asctime)s - %(levelname)s - %(name)s - %(message)s'))
    logging.getLogger().addHandler(handler)
    serialization_params = deepcopy(params).as_dict(quiet=True)

    with open(os.path.join(serialization_dir, "model_params.json"),
              "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    # Now we begin assembling the required parts for the Trainer.
    ds_params = params.pop('dataset_reader', {})
    dataset_reader = FEVERReader(db,
                                 sentence_level=ds_params.pop(
                                     "sentence_level", False),
                                 wiki_tokenizer=Tokenizer.from_params(
                                     ds_params.pop('wiki_tokenizer', {})),
                                 claim_tokenizer=Tokenizer.from_params(
                                     ds_params.pop('claim_tokenizer', {})),
                                 token_indexers=TokenIndexer.dict_from_params(
                                     ds_params.pop('token_indexers', {})),
                                 filtering=filtering)

    train_data_path = params.pop('train_data_path')
    logger.info("Reading training data from %s", train_data_path)
    train_data = dataset_reader.read(train_data_path)

    all_datasets = [train_data]
    datasets_in_vocab = ["train"]

    validation_data_path = params.pop('validation_data_path', None)
    if validation_data_path is not None:
        logger.info("Reading validation data from %s", validation_data_path)
        validation_data = dataset_reader.read(validation_data_path)
        all_datasets.append(validation_data)
        datasets_in_vocab.append("validation")
    else:
        validation_data = None

    logger.info("Creating a vocabulary using %s data.",
                ", ".join(datasets_in_vocab))
    vocab = Vocabulary.from_params(
        params.pop("vocabulary", {}),
        Dataset([
            instance for dataset in all_datasets
            for instance in dataset.instances
        ]))
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    model = Model.from_params(vocab, params.pop('model'))
    iterator = DataIterator.from_params(params.pop("iterator"))

    train_data.index_instances(vocab)
    if validation_data:
        validation_data.index_instances(vocab)

    trainer_params = params.pop("trainer")
    if cuda_device is not None:
        trainer_params["cuda_device"] = cuda_device
    trainer = Trainer.from_params(model, serialization_dir, iterator,
                                  train_data, validation_data, trainer_params)

    trainer.train()

    # Now tar up results
    archive_model(serialization_dir)

    return model
コード例 #3
0
def fine_tune_model(model: Model,
                    params: Params,
                    serialization_dir: str,
                    extend_vocab: bool = False,
                    file_friendly_logging: bool = False,
                    batch_weight_key: str = "",
                    embedding_sources_mapping: Dict[str, str] = None,
                    in_fold = None,
                    num_folds = None,
                    ewc_weight=None) -> Model:
    """
    Fine tunes the given model, using a set of parameters that is largely identical to those used
    for :func:`~allennlp.commands.train.train_model`, except that the ``model`` section is ignored,
    if it is present (as we are already given a ``Model`` here).

    The main difference between the logic done here and the logic done in ``train_model`` is that
    here we do not worry about vocabulary construction or creating the model object.  Everything
    else is the same.

    Parameters
    ----------
    model : ``Model``
        A model to fine tune.
    params : ``Params``
        A parameter object specifying an AllenNLP Experiment
    serialization_dir : ``str``
        The directory in which to save results and logs.
    extend_vocab: ``bool``, optional (default=False)
        If ``True``, we use the new instances to extend your vocabulary.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    batch_weight_key : ``str``, optional (default="")
        If non-empty, name of metric used to weight the loss on a per-batch basis.
    embedding_sources_mapping: ``Dict[str, str]``, optional (default=None)
        mapping from model paths to the pretrained embedding filepaths
        used during fine-tuning.
    """
    prepare_environment(params)
    if os.path.exists(serialization_dir) and os.listdir(serialization_dir):
        raise ConfigurationError(f"Serialization directory ({serialization_dir}) "
                                 f"already exists and is not empty.")

    os.makedirs(serialization_dir, exist_ok=True)
    prepare_global_logging(serialization_dir, file_friendly_logging)

    serialization_params = deepcopy(params).as_dict(quiet=True)
    with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    if params.pop('model', None):
        logger.warning("You passed parameters for the model in your configuration file, but we "
                       "are ignoring them, using instead the model parameters in the archive.")

    vocabulary_params = params.pop('vocabulary', {})
    if vocabulary_params.get('directory_path', None):
        logger.warning("You passed `directory_path` in parameters for the vocabulary in "
                       "your configuration file, but it will be ignored. ")

    all_datasets = datasets_from_params(params)
    vocab = model.vocab

    if extend_vocab:
        datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

        for dataset in datasets_for_vocab_creation:
            if dataset not in all_datasets:
                raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

        logger.info("Extending model vocabulary using %s data.", ", ".join(datasets_for_vocab_creation))
        vocab.extend_from_instances(vocabulary_params,
                                    (instance for key, dataset in all_datasets.items()
                                     for instance in dataset
                                     if key in datasets_for_vocab_creation))

        model.extend_embedder_vocab(embedding_sources_mapping)

    trainer_params = params.pop("trainer")
    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
                parameter.requires_grad_(False)

    frozen_parameter_names, tunable_parameter_names = \
                   get_frozen_and_tunable_parameter_names(model)
    logger.info("Following parameters are Frozen  (without gradient):")
    for name in frozen_parameter_names:
        logger.info(name)
    logger.info("Following parameters are Tunable (with gradient):")
    for name in tunable_parameter_names:
        logger.info(name)

    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
    test_data = all_datasets.get('test')

    dl_params = params.pop("data_loader")
    if test_data is not None:
        rand = random.Random(1234)
        test_data.index_with(vocab)
        shuffled_test = copy(test_data.instances)
        rand.shuffle(shuffled_test)
        extra_test = shuffled_test[:2000]

        keys = deepcopy(dl_params.as_dict())
        keys.update({"dataset": AllennlpDataset(extra_test, vocab)})
        extra_test_loader = DataLoader.from_params(params.pop("test_data_loader", keys))

        keys = deepcopy(dl_params.as_dict())
        keys.update({"dataset": test_data})
        test_loader = DataLoader.from_params(params.pop("test_data_loader", keys))

    master_model = model
    global_metrics = {}
    training_metrics = []
    final_metrics = {}
    master_trainer = trainer_params.as_dict()

    if num_folds is not None:

        rand = random.Random(1234)

        fold_train = []
        fold_test = []

        fold_train_loader = []
        fold_test_loader = []

        shuffled_instances = copy(train_data.instances)
        rand.shuffle(shuffled_instances)



        kfold = KFold(n_splits=num_folds, random_state=None, shuffle=False)
        computed_folds = list(kfold.split(shuffled_instances))

        for fold in range(num_folds):
            train_indexes, test_indexes = computed_folds[fold]
            new_train = [shuffled_instances[i] for i in train_indexes]
            new_test = [shuffled_instances[i] for i in test_indexes]
            fold_train.append(AllennlpDataset(new_train, vocab=vocab))
            fold_test.append(AllennlpDataset(new_test, vocab=vocab))

            keys = deepcopy(dl_params.as_dict())
            keys.update({"dataset": fold_test[-1]})
            fold_test_loader.append(DataLoader.from_params(params.pop("fold_test_data_loader",keys)))

            keys = deepcopy(dl_params.as_dict())
            keys.update({"dataset": fold_train[-1]})
            fold_train_loader.append(DataLoader.from_params(params.pop("fold_train_data_loader", keys)))

        for fold in ([in_fold] if in_fold is not None else range(num_folds)):
            fold_model = deepcopy(master_model)
            eval_epoch_callback = EvalEpochCallback(fold, fold_test_loader[fold], test_loader, global_metrics)
            callbacks = [eval_epoch_callback]
            if ewc_weight is not None:
                ewc = EWC(extra_test_loader)

                def ewc_forward(*args, **kwargs) -> Dict[str, torch.Tensor]:
                    ewc_loss = 0
                    if ewc.model.training:
                        ewc_loss = ewc.penalty(ewc.model)
                    ret = ewc.model.old_forward(*args, **kwargs)
                    ret["loss"] += ewc_weight * ewc_loss
                    return ret

                fold_model.old_forward = fold_model.forward
                fold_model.forward = ewc_forward
                callbacks.append(CallLossCallback(ewc))

            trainer = Trainer.from_params(model=fold_model,
                                          serialization_dir=serialization_dir,
                                          data_loader=fold_train_loader[fold],
                                          train_data=train_data,
                                          validation_data=None,
                                          params=Params(deepcopy(master_trainer)),
                                          validation_data_loader=None,
                                          epoch_callbacks=callbacks)

            training_metrics.append(trainer.train())
            del fold_model
            del trainer
            del eval_epoch_callback

            state = glob(serialization_dir+"/*.th")
            for file in state:
                logger.info("deleting state - {}".format(file))
                os.unlink(file)
    else:
        callbacks = []
        if ewc_weight is not None:
            ewc = EWC(extra_test_loader)

            def ewc_forward(*args, **kwargs) -> Dict[str, torch.Tensor]:
                ewc_loss = 0
                if ewc.model.training:
                    ewc_loss = ewc.penalty(ewc.model)
                ret = ewc.model.old_forward(*args, **kwargs)
                ret["loss"] += ewc_weight * ewc_loss
                return ret

            model.old_forward = model.forward
            model.forward = ewc_forward
            callbacks.append(CallLossCallback(ewc))

        keys = deepcopy(dl_params.as_dict())
        keys.update({"dataset": train_data})
        train_data.index_with(vocab)
        train_data_loader = DataLoader.from_params(params.pop("train_loader",keys))

        if validation_data is not None:
            validation_data.index_with(vocab)
            keys = deepcopy(dl_params.as_dict())
            keys.update({"dataset": validation_data})

            validation_data_loader = DataLoader.from_params(params.pop("validation_loader", keys))
        else:
            validation_data_loader = None

        if "finetune" in dir(model):
            model.finetune()
            logger.info("Fine tuning model")
        trainer = Trainer.from_params(model=model,
                                      serialization_dir=serialization_dir,
                                      data_loader=train_data_loader,
                                      train_data=train_data,
                                      validation_data=None,
                                      params=Params(deepcopy(master_trainer)),
                                      validation_data_loader=validation_data_loader,
                                      epoch_callbacks=callbacks)

        training_metrics = trainer.train()
        archive_model(serialization_dir)

    final_metrics["fine_tune"] = global_metrics
    final_metrics["training"] = training_metrics

    metrics_json = json.dumps(final_metrics, indent=2)
    with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file:
        metrics_file.write(metrics_json)
    logger.info("Metrics: %s", metrics_json)
    return model
コード例 #4
0
def fine_tune_model(model: Model,
                    params: Params,
                    serialization_dir: str,
                    extend_vocab: bool = False,
                    file_friendly_logging: bool = False,
                    batch_weight_key: str = "") -> Model:
    """
    Fine tunes the given model, using a set of parameters that is largely identical to those used
    for :func:`~allennlp.commands.train.train_model`, except that the ``model`` section is ignored,
    if it is present (as we are already given a ``Model`` here).

    The main difference between the logic done here and the logic done in ``train_model`` is that
    here we do not worry about vocabulary construction or creating the model object.  Everything
    else is the same.

    Parameters
    ----------
    archive : ``Archive``
        A saved model archive that is the result of running the ``train`` command.
    train_data_path : ``str``
        Path to the training data to use for fine-tuning.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    validation_data_path : ``str``, optional
        Path to the validation data to use while fine-tuning.
    extend_vocab: ``bool``, optional (default=False)
        If ``True``, we use the new instances to extend your vocabulary.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    """
    prepare_environment(params)
    if os.path.exists(serialization_dir) and os.listdir(serialization_dir):
        raise ConfigurationError(
            f"Serialization directory ({serialization_dir}) "
            f"already exists and is not empty.")

    os.makedirs(serialization_dir, exist_ok=True)
    prepare_global_logging(serialization_dir, file_friendly_logging)

    serialization_params = deepcopy(params).as_dict(quiet=True)
    with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    if params.pop('model', None):
        logger.warning(
            "You passed parameters for the model in your configuration file, but we "
            "are ignoring them, using instead the model parameters in the archive."
        )

    vocabulary_params = params.pop('vocabulary', {})
    if vocabulary_params.get('directory_path', None):
        logger.warning(
            "You passed `directory_path` in parameters for the vocabulary in "
            "your configuration file, but it will be ignored. ")

    all_datasets = datasets_from_params(params)
    vocab = model.vocab

    if extend_vocab:
        datasets_for_vocab_creation = set(
            params.pop("datasets_for_vocab_creation", all_datasets))

        for dataset in datasets_for_vocab_creation:
            if dataset not in all_datasets:
                raise ConfigurationError(
                    f"invalid 'dataset_for_vocab_creation' {dataset}")

        logger.info("Extending model vocabulary using %s data.",
                    ", ".join(datasets_for_vocab_creation))
        vocab.extend_from_instances(
            vocabulary_params,
            (instance for key, dataset in all_datasets.items()
             for instance in dataset if key in datasets_for_vocab_creation))

    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(model.vocab)
    validation_iterator_params = params.pop("validation_iterator", None)
    if validation_iterator_params:
        validation_iterator = DataIterator.from_params(
            validation_iterator_params)
        validation_iterator.index_with(vocab)
    else:
        validation_iterator = None

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
    test_data = all_datasets.get('test')

    trainer_params = params.pop("trainer")
    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)

    frozen_parameter_names, tunable_parameter_names = \
                   get_frozen_and_tunable_parameter_names(model)
    logger.info("Following parameters are Frozen  (without gradient):")
    for name in frozen_parameter_names:
        logger.info(name)
    logger.info("Following parameters are Tunable (with gradient):")
    for name in tunable_parameter_names:
        logger.info(name)

    trainer_type = trainer_params.pop("type", "default")
    if trainer_type == "default":
        trainer = Trainer.from_params(model=model,
                                      serialization_dir=serialization_dir,
                                      iterator=iterator,
                                      train_data=train_data,
                                      validation_data=validation_data,
                                      params=trainer_params,
                                      validation_iterator=validation_iterator)
    else:
        raise ConfigurationError(
            "currently fine-tune only works with the default Trainer")

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)

    params.assert_empty('base train command')
    try:
        metrics = trainer.train()
    except KeyboardInterrupt:
        # if we have completed an epoch, try to create a model archive.
        if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)):
            logging.info(
                "Fine-tuning interrupted by the user. Attempting to create "
                "a model archive using the current best epoch weights.")
            archive_model(serialization_dir,
                          files_to_archive=params.files_to_archive)
        raise

    # Evaluate
    if test_data and evaluate_on_test:
        logger.info(
            "The model will be evaluated using the best epoch weights.")
        test_metrics = evaluate(
            model,
            test_data,
            validation_iterator or iterator,
            cuda_device=trainer._cuda_devices[0],  # pylint: disable=protected-access,
            batch_weight_key=batch_weight_key)

        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif test_data:
        logger.info(
            "To evaluate on the test set after training, pass the "
            "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)

    metrics_json = json.dumps(metrics, indent=2)
    with open(os.path.join(serialization_dir, "metrics.json"),
              "w") as metrics_file:
        metrics_file.write(metrics_json)
    logger.info("Metrics: %s", metrics_json)

    return model
コード例 #5
0
ファイル: fine_tune.py プロジェクト: sensecollective/allennlp
def fine_tune_model(model: Model,
                    params: Params,
                    serialization_dir: str,
                    file_friendly_logging: bool = False) -> Model:
    """
    Fine tunes the given model, using a set of parameters that is largely identical to those used
    for :func:`~allennlp.commands.train.train_model`, except that the ``model`` section is ignored,
    if it is present (as we are already given a ``Model`` here).

    The main difference between the logic done here and the logic done in ``train_model`` is that
    here we do not worry about vocabulary construction or creating the model object.  Everything
    else is the same.

    Parameters
    ----------
    archive : ``Archive``
        A saved model archive that is the result of running the ``train`` command.
    train_data_path : ``str``
        Path to the training data to use for fine-tuning.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    validation_data_path : ``str``, optional
        Path to the validation data to use while fine-tuning.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    """
    prepare_environment(params)

    os.makedirs(serialization_dir)

    # TODO(mattg): pull this block out into a separate function (maybe just add this to
    # `prepare_environment`?)
    Tqdm.set_slower_interval(file_friendly_logging)
    sys.stdout = TeeLogger(
        os.path.join(serialization_dir, "stdout.log"),  # type: ignore
        sys.stdout,
        file_friendly_logging)
    sys.stderr = TeeLogger(
        os.path.join(serialization_dir, "stderr.log"),  # type: ignore
        sys.stderr,
        file_friendly_logging)
    handler = logging.FileHandler(
        os.path.join(serialization_dir, "python_logging.log"))
    handler.setLevel(logging.INFO)
    handler.setFormatter(
        logging.Formatter(
            '%(asctime)s - %(levelname)s - %(name)s - %(message)s'))
    logging.getLogger().addHandler(handler)

    serialization_params = deepcopy(params).as_dict(quiet=True)
    with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    if params.pop('model', None):
        logger.warning(
            "You passed parameters for the model in your configuration file, but we "
            "are ignoring them, using instead the model parameters in the archive."
        )

    if params.pop('vocabulary', None):
        logger.warning(
            "You passed parameters for the vocabulary in your configuration file, but "
            "we are ignoring them, using instead the vocabulary from the saved model."
        )

    vocab = model.vocab
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)

    all_datasets = datasets_from_params(params)

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
    test_data = all_datasets.get('test')

    trainer_params = params.pop("trainer")
    trainer = Trainer.from_params(model, serialization_dir, iterator,
                                  train_data, validation_data, trainer_params)

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)
    params.assert_empty('base train command')
    metrics = trainer.train()

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)

    if test_data and evaluate_on_test:
        test_metrics = evaluate(model,
                                test_data,
                                iterator,
                                cuda_device=trainer._cuda_devices[0])  # pylint: disable=protected-access
        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif test_data:
        logger.info(
            "To evaluate on the test set after training, pass the "
            "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    metrics_json = json.dumps(metrics, indent=2)
    with open(os.path.join(serialization_dir, "metrics.json"),
              "w") as metrics_file:
        metrics_file.write(metrics_json)
    logger.info("Metrics: %s", metrics_json)

    return model
コード例 #6
0
ファイル: fine_tune.py プロジェクト: sxdkxgwan/allennlp
def fine_tune_model(model: Model,
                    params: Params,
                    serialization_dir: str,
                    file_friendly_logging: bool = False) -> Model:
    """
    Fine tunes the given model, using a set of parameters that is largely identical to those used
    for :func:`~allennlp.commands.train.train_model`, except that the ``model`` section is ignored,
    if it is present (as we are already given a ``Model`` here).

    The main difference between the logic done here and the logic done in ``train_model`` is that
    here we do not worry about vocabulary construction or creating the model object.  Everything
    else is the same.

    Parameters
    ----------
    archive : ``Archive``
        A saved model archive that is the result of running the ``train`` command.
    train_data_path : ``str``
        Path to the training data to use for fine-tuning.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    validation_data_path : ``str``, optional
        Path to the validation data to use while fine-tuning.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    """
    prepare_environment(params)
    os.makedirs(serialization_dir)
    prepare_global_logging(serialization_dir, file_friendly_logging)

    serialization_params = deepcopy(params).as_dict(quiet=True)
    with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    if params.pop('model', None):
        logger.warning(
            "You passed parameters for the model in your configuration file, but we "
            "are ignoring them, using instead the model parameters in the archive."
        )

    if params.pop('vocabulary', None):
        logger.warning(
            "You passed parameters for the vocabulary in your configuration file, but "
            "we are ignoring them, using instead the vocabulary from the saved model."
        )

    vocab = model.vocab
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)

    all_datasets = datasets_from_params(params)

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
    test_data = all_datasets.get('test')

    trainer_params = params.pop("trainer")
    trainer = Trainer.from_params(model, serialization_dir, iterator,
                                  train_data, validation_data, trainer_params)

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)
    params.assert_empty('base train command')
    try:
        metrics = trainer.train()
    except KeyboardInterrupt:
        # if we have completed an epoch, try to create a model archive.
        if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)):
            logging.info(
                "Fine-tuning interrupted by the user. Attempting to create "
                "a model archive using the current best epoch weights.")
            archive_model(serialization_dir,
                          files_to_archive=params.files_to_archive)
        raise

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)

    if test_data and evaluate_on_test:
        test_metrics = evaluate(model,
                                test_data,
                                iterator,
                                cuda_device=trainer._cuda_devices[0])  # pylint: disable=protected-access
        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif test_data:
        logger.info(
            "To evaluate on the test set after training, pass the "
            "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    metrics_json = json.dumps(metrics, indent=2)
    with open(os.path.join(serialization_dir, "metrics.json"),
              "w") as metrics_file:
        metrics_file.write(metrics_json)
    logger.info("Metrics: %s", metrics_json)

    return model
コード例 #7
0
 def save_best_model(self):
     """Packages the best model as tar.gz archive"""
     archive_model(self._output_dir)
コード例 #8
0
def main(file, embeddings, model, emb_wt_key, namespace, output_dir):
    archive = load_archive(model)
    config = archive.config
    os.makedirs(output_dir, exist_ok=True)
    config.to_file(os.path.join(output_dir, CONFIG_NAME))

    model = archive.model
    # first expand the vocabulary
    dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
    instances = dataset_reader.read(file)
    vocab = model.vocab

    # get all the tokens in the new file
    namespace_token_counts: Dict[str, Dict[str, int]] = defaultdict(
        lambda: defaultdict(int))
    for instance in Tqdm.tqdm(instances):
        instance.count_vocab_items(namespace_token_counts)
    old_token_size = vocab.get_vocab_size(namespace)
    print("Before expansion: Number of instances in {} namespace: {}".format(
        namespace, old_token_size))
    if namespace not in namespace_token_counts:
        logger.error(
            "No tokens found for namespace: {} in the new input file".format(
                namespace))
    # identify the new tokens in the new instances
    token_to_add = set()
    token_hits = 0
    for token, count in namespace_token_counts[namespace].items():
        if token not in vocab._token_to_index[namespace]:
            # new token, must add
            token_to_add.add(token)
        else:
            token_hits += 1
    print("Found {} existing tokens and {} new tokens in {}".format(
        token_hits, len(token_to_add), file))

    # add the new tokens to the vocab
    for token in token_to_add:
        vocab.add_token_to_namespace(token=token, namespace=namespace)
    archived_parameters = dict(model.named_parameters())

    # second, expand the embedding matrix
    for name, weights in archived_parameters.items():
        # find the wt matrix for the embeddings
        if name == emb_wt_key:
            if weights.dim() != 2:
                logger.error(
                    "Expected an embedding matrix for the parameter: {} instead"
                    "found {} tensor".format(emb_wt_key, weights.shape))
            emb_dim = weights.shape[-1]
            print("Before expansion: Size of emb matrix: {}".format(
                weights.shape))
            # Loading embeddings for old and new tokens since that is cleaner than copying all
            # the embedding loading logic here
            all_embeddings = _read_pretrained_embeddings_file(
                embeddings, emb_dim, vocab, namespace)
            # concatenate the new entries i.e last token_to_add embeddings to the original weights
            if len(token_to_add) > 0:
                weights.data = torch.cat(
                    [weights.data, all_embeddings[-len(token_to_add):, :]])
            print("After expansion: Size of emb matrix: {}".format(
                weights.shape))

    # save the files needed by the model archiver
    model_path = os.path.join(output_dir, "weight.th")
    model_state = model.state_dict()
    torch.save(model_state, model_path)
    vocab.save_to_files(os.path.join(output_dir, "vocabulary"))
    archive_model(output_dir, weights="weight.th")

    # more debug messages
    new_token_size = vocab.get_vocab_size(namespace)
    for name, weights in archived_parameters.items():
        if name == emb_wt_key:
            print("Size of emb matrix: {}".format(weights.shape))
    print("After expansion: Number of instances in {} namespace: {}".format(
        namespace, new_token_size))
コード例 #9
0
ファイル: fine_tune.py プロジェクト: pyknife/allennlp
def fine_tune_model(model: Model,
                    params: Params,
                    serialization_dir: str,
                    file_friendly_logging: bool = False) -> Model:
    """
    Fine tunes the given model, using a set of parameters that is largely identical to those used
    for :func:`~allennlp.commands.train.train_model`, except that the ``model`` section is ignored,
    if it is present (as we are already given a ``Model`` here).

    The main difference between the logic done here and the logic done in ``train_model`` is that
    here we do not worry about vocabulary construction or creating the model object.  Everything
    else is the same.

    Parameters
    ----------
    archive : ``Archive``
        A saved model archive that is the result of running the ``train`` command.
    train_data_path : ``str``
        Path to the training data to use for fine-tuning.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    validation_data_path : ``str``, optional
        Path to the validation data to use while fine-tuning.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    """
    prepare_environment(params)
    os.makedirs(serialization_dir)
    prepare_global_logging(serialization_dir, file_friendly_logging)

    serialization_params = deepcopy(params).as_dict(quiet=True)
    with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    if params.pop('model', None):
        logger.warning("You passed parameters for the model in your configuration file, but we "
                       "are ignoring them, using instead the model parameters in the archive.")

    vocabulary_params = params.pop('vocabulary', {})
    if vocabulary_params.get('directory_path', None):
        logger.warning("You passed `directory_path` in parameters for the vocabulary in "
                       "your configuration file, but it will be ignored. "
                       "Vocabulary from the saved model will be extended with current data.")

    all_datasets = datasets_from_params(params)

    datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info("Extending model vocabulary using %s data.", ", ".join(datasets_for_vocab_creation))
    vocab = model.vocab
    vocab.extend_from_instances(vocabulary_params,
                                (instance for key, dataset in all_datasets.items()
                                 for instance in dataset
                                 if key in datasets_for_vocab_creation))
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
    test_data = all_datasets.get('test')

    trainer_params = params.pop("trainer")
    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)

    frozen_parameter_names, tunable_parameter_names = \
                   get_frozen_and_tunable_parameter_names(model)
    logger.info("Following parameters are Frozen  (without gradient):")
    for name in frozen_parameter_names:
        logger.info(name)
    logger.info("Following parameters are Tunable (with gradient):")
    for name in tunable_parameter_names:
        logger.info(name)

    trainer = Trainer.from_params(model,
                                  serialization_dir,
                                  iterator,
                                  train_data,
                                  validation_data,
                                  trainer_params)

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)
    params.assert_empty('base train command')
    try:
        metrics = trainer.train()
    except KeyboardInterrupt:
        # if we have completed an epoch, try to create a model archive.
        if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)):
            logging.info("Fine-tuning interrupted by the user. Attempting to create "
                         "a model archive using the current best epoch weights.")
            archive_model(serialization_dir, files_to_archive=params.files_to_archive)
        raise

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)

    if test_data and evaluate_on_test:
        test_metrics = evaluate(model, test_data, iterator, cuda_device=trainer._cuda_devices[0])  # pylint: disable=protected-access
        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif test_data:
        logger.info("To evaluate on the test set after training, pass the "
                    "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    metrics_json = json.dumps(metrics, indent=2)
    with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file:
        metrics_file.write(metrics_json)
    logger.info("Metrics: %s", metrics_json)

    return model
コード例 #10
0
def train_model(data_path,
                params,
                serialization_dir,
                cuda_device=-1,
                use_validation_data=True):

    os.makedirs(serialization_dir, exist_ok=True)

    handler = logging.FileHandler(
        os.path.join(serialization_dir, "python_logging.log"))
    handler.setLevel(logging.INFO)
    handler.setFormatter(
        logging.Formatter(
            '%(asctime)s - %(levelname)s - %(name)s - %(message)s'))
    logging.getLogger().addHandler(handler)

    serialization_params = deepcopy(params).as_dict(quiet=True)

    with open(os.path.join(serialization_dir, "model_params.json"),
              "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    ds_params = params.pop('dataset_reader', {})
    data_params = ds_params.pop('data', {})
    dataset_reader = CMVReader.from_params(ds_params)
    '''
    dataset_reader = CMVReader(data_path,
                               tokenizer=Tokenizer.from_params(ds_params.pop('tokenizer', {})),
                               token_indexers=TokenIndexer.from_params(ds_params.pop('token_indexers', {})))
    '''

    logger.info('Reading training data...')

    train_data = dataset_reader.read('train', **data_params)

    #train_data_response_only_for_vocab = dataset_reader.read('train', response_only=True)
    #train_data_op_only_for_vocab = dataset_reader.read('train', op_only=True)
    #all_datasets = [train_data_response_only_for_vocab, train_data_op_only_for_vocab]
    all_datasets = [train_data]
    datasets_in_vocab = ['train']  #_response_only_for_vocab']

    if use_validation_data:
        logger.info('Reading validation data...')
        validation_data = dataset_reader.read('val', **data_params)
        all_datasets.append(validation_data)
        datasets_in_vocab.append('val')
    else:
        validation_data = None

    logger.info('Creating a vocabulary using %s data.',
                ', '.join(datasets_in_vocab))
    vocab_params = params.pop('vocabulary', {})
    dataset = None
    if 'directory_path' not in vocab_params:
        dataset = Batch([
            instance for dataset in all_datasets
            for instance in dataset.instances
        ])

    vocab = Vocabulary.from_params(vocab_params, dataset)
    vocab.save_to_files(os.path.join(serialization_dir, 'vocabulary'))

    model = Model.from_params(params=params.pop('model'), vocab=vocab)
    iterator = DataIterator.from_params(params.pop('iterator'))

    train_data.index_instances(vocab)
    if validation_data:
        validation_data.index_instances(vocab)

    trainer_params = params.pop("trainer")
    if cuda_device is not None:
        trainer_params["cuda_device"] = cuda_device
    trainer = Trainer.from_params(model, serialization_dir, iterator,
                                  train_data, validation_data, trainer_params)

    trainer.train()

    # Now tar up results
    archive_model(serialization_dir)

    return model
コード例 #11
0
def train_model(params: Params,
                serialization_dir: str,
                file_friendly_logging: bool = False,
                recover: bool = False,
                force: bool = False) -> Model:
    prepare_environment(params)

    create_serialization_dir(params, serialization_dir, recover, force)
    prepare_global_logging(serialization_dir, file_friendly_logging)

    cuda_device = params.params.get('trainer').get('cuda_device', -1)
    if isinstance(cuda_device, list):
        for device in cuda_device:
            check_for_gpu(device)
    else:
        check_for_gpu(cuda_device)

    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    all_datasets = datasets_from_params(params)
    datasets_for_vocab_creation = set(
        params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(
                f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info(
        "From dataset instances, %s will be considered for vocabulary creation.",
        ", ".join(datasets_for_vocab_creation))

    if recover and os.path.exists(os.path.join(serialization_dir,
                                               "vocabulary")):
        vocab = Vocabulary.from_files(
            os.path.join(serialization_dir, "vocabulary"))
    else:
        vocab = Vocabulary.from_params(
            params.pop("vocabulary", {}),
            (instance for key, dataset in all_datasets.items()
             for instance in dataset if key in datasets_for_vocab_creation))

    model = Model.from_params(vocab=vocab, params=params.pop('model'))

    # Initializing the model can have side effect of expanding the vocabulary
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)
    validation_iterator_params = params.pop("validation_iterator", None)
    if validation_iterator_params:
        validation_iterator = DataIterator.from_params(
            validation_iterator_params)
        validation_iterator.index_with(vocab)
    else:
        validation_iterator = None

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
    test_data = all_datasets.get('test')

    trainer_params = params.pop("trainer")
    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)

    frozen_parameter_names, tunable_parameter_names = \
        get_frozen_and_tunable_parameter_names(model)
    logger.info("Following parameters are Frozen  (without gradient):")
    for name in frozen_parameter_names:
        logger.info(name)
    logger.info("Following parameters are Tunable (with gradient):")
    for name in tunable_parameter_names:
        logger.info(name)

    trainer_choice = trainer_params.pop_choice("type",
                                               Trainer.list_available(),
                                               default_to_first_choice=True)
    trainer = Trainer.by_name(trainer_choice).from_params(
        model=model,
        serialization_dir=serialization_dir,
        iterator=iterator,
        train_data=train_data,
        validation_data=validation_data,
        params=trainer_params,
        validation_iterator=validation_iterator)

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)
    params.assert_empty('base train command')

    try:
        metrics = trainer.train()
    except KeyboardInterrupt:
        # if we have completed an epoch, try to create a model archive.
        if os.path.exists(os.path.join(serialization_dir, DEFAULT_WEIGHTS)):
            logging.info(
                "Training interrupted by the user. Attempting to create "
                "a model archive using the current best epoch weights.")
            archive_model(serialization_dir,
                          files_to_archive=params.files_to_archive)
        raise

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)

    logger.info("Loading the best epoch weights.")
    best_model_state_path = os.path.join(serialization_dir, 'best.th')
    best_model_state = torch.load(best_model_state_path)
    best_model = model
    best_model.load_state_dict(best_model_state)

    if test_data and evaluate_on_test:
        logger.info(
            "The model will be evaluated using the best epoch weights.")
        test_metrics = evaluate(
            best_model,
            test_data,
            validation_iterator or iterator,
            cuda_device=trainer._cuda_devices[0]  # pylint: disable=protected-access
        )
        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif test_data:
        logger.info(
            "To evaluate on the test set after training, pass the "
            "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    dump_metrics(os.path.join(serialization_dir, "metrics.json"),
                 metrics,
                 log=True)

    return best_model
コード例 #12
0
if 'vocabulary' in params:
    vocab_params = params['vocabulary']
    vocab = Vocabulary.from_params(
        params=vocab_params, instances=train_instances.extend(valid_instances))
else:
    vocab = Vocabulary.from_instances(train_instances.extend(valid_instances))
dep_model = Model.from_params(vocab=vocab, params=params['model'])
print(dep_model)
iterator = DataIterator.from_params(params.pop("iterator"))
iterator.index_with(vocab)
train_dataset = Batch(train_instances)
train_dataset.index_instances(vocab)

valid_dataset = Batch(valid_instances)
valid_dataset.index_instances(vocab)

# dep_trainer = Trainer(dep_model, dep_file_path, dep_iterator, dep_train_data, dep_valid_data)
trainer_params = params.pop("trainer")

trainer = Trainer.from_params(model=dep_model,
                              serialization_dir='',
                              iterator=iterator,
                              train_data=train_dataset,
                              validation_data=valid_dataset,
                              params=trainer_params,
                              validation_iterator=iterator)

metrics = trainer.train()

archive_model('data/output')
コード例 #13
0
def train_model(data_path,
                params,
                serialization_dir,
                cuda_device=-1,
                use_validation_data=True):

    os.makedirs(serialization_dir, exist_ok=True)

    handler = logging.FileHandler(
        os.path.join(serialization_dir, "python_logging.log"))
    handler.setLevel(logging.INFO)
    handler.setFormatter(
        logging.Formatter(
            '%(asctime)s - %(levelname)s - %(name)s - %(message)s'))
    logging.getLogger().addHandler(handler)

    serialization_params = deepcopy(params).as_dict(quiet=True)

    with open(os.path.join(serialization_dir, "model_params.json"),
              "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    ds_params = params.pop('dataset_reader', {})
    data_params = ds_params.pop('data', {})
    dataset_reader = CMVReader.from_params(ds_params)

    logger.info('Reading training data...')

    train_data = dataset_reader.read('train', **data_params)

    #train_data_response_only_for_vocab = dataset_reader.read('train', response_only=True)
    #all_datasets = [train_data_response_only_for_vocab]
    all_datasets = [train_data]
    datasets_in_vocab = ['train']  #_response_only_for_vocab']

    if use_validation_data:
        logger.info('Reading validation data...')
        data_params['weakpoints_only'] = False
        validation_data = dataset_reader.read('val', **data_params)
        all_datasets.append(validation_data)
        datasets_in_vocab.append('val')
    else:
        validation_data = None

    logger.info('Creating a vocabulary using %s data.',
                ', '.join(datasets_in_vocab))
    vocab_params = params.pop('vocabulary', {})
    dataset = None
    if 'directory_path' not in vocab_params:
        dataset = Batch([
            instance for dataset in all_datasets
            for instance in dataset.instances
        ])

    vocab = Vocabulary.from_params(vocab_params, dataset)
    vocab.save_to_files(os.path.join(serialization_dir, 'vocabulary'))

    iterator = DataIterator.from_params(params.pop('iterator'))

    cmv_predictor_params = params.pop('cmv_predictor')
    predictor_pretrained_params = cmv_predictor_params.pop(
        'predictor_pretrained_params', None)
    cmv_predictor = Model.from_params(params=cmv_predictor_params, vocab=vocab)
    model_state = torch.load(predictor_pretrained_params['filename'],
                             map_location=util.device_mapping(
                                 predictor_pretrained_params['cuda_device']))
    cmv_predictor.load_state_dict(model_state)

    if params.pop('shared_embedder', False):
        print('using shared embedder')
        document_embedder = HierarchicalDocumentEmbedder(
            vocab, cmv_predictor._response_embedder,
            cmv_predictor._response_word_attention,
            cmv_predictor._response_encoder)
    else:
        document_embedder = Model.from_params(
            params=params.pop('document_embedder'), vocab=vocab)
    cmv_extractor = Model.from_params(params=params.pop('cmv_extractor'))
    cmv_discriminator = Model.from_params(
        params=params.pop('cmv_discriminator'))

    cmv_actor_critic_params = params.pop('cmv_actor_critic', None)
    cmv_actor_critic = None
    if cmv_actor_critic_params is not None:
        cmv_actor_critic = Model.from_params(params=cmv_actor_critic_params)

    train_data.index_instances(vocab)
    if validation_data:
        validation_data.index_instances(vocab)

    trainer_params = params.pop("trainer", None)
    if trainer_params is not None:
        if cuda_device is not None:
            trainer_params["cuda_device"] = cuda_device
        trainer = Trainer.from_params(cmv_predictor, serialization_dir,
                                      iterator, train_data, validation_data,
                                      trainer_params)

    compress_response = params.pop('compress_response', False)

    generator_iterator = DataIterator.from_params(
        params.pop('generator_iterator'))
    cmv_actor_critic_trainer_params = params.pop('actor_critic_trainer', None)
    if cmv_actor_critic_trainer_params is not None:
        cmv_actor_critic_pretrainer = CMVActorCriticTrainer(
            document_embedder, cmv_predictor, cmv_extractor, cmv_actor_critic,
            cmv_actor_critic_trainer_params.pop('train_predictor', False),
            cmv_actor_critic_trainer_params.pop('train_fake_predictor', False),
            compress_response)

        cmv_actor_critic_serialization_dir = os.path.join(
            serialization_dir, 'actor_critic')

        cmv_actor_critic_trainer = Trainer.from_params(
            cmv_actor_critic_pretrainer, cmv_actor_critic_serialization_dir,
            generator_iterator, train_data, validation_data,
            cmv_actor_critic_trainer_params)
    else:
        ac_pretrained_params = params.pop('pretrained_actor_critic', None)
        if ac_pretrained_params is not None:
            cmv_actor_critic_pretrainer = CMVActorCriticTrainer(
                document_embedder, cmv_predictor, cmv_extractor, None)
            model_state = torch.load(ac_pretrained_params['filename'],
                                     map_location=util.device_mapping(
                                         ac_pretrained_params['cuda_device']))
            cmv_actor_critic_pretrainer.load_state_dict(model_state)
            document_embedder = cmv_actor_critic_pretrainer._document_embedder
            cmv_predictor = cmv_actor_critic_pretrainer._cmv_predictor
            cmv_extractor = cmv_actor_critic_pretrainer._cmv_extractor

    generator = CMVGeneratorTrainer(
        document_embedder,
        cmv_predictor,
        cmv_extractor,
        cmv_discriminator,
        cmv_actor_critic,
        update_extractor=True,  #cmv_actor_critic_trainer_params is None,
        update_gold_extractor=False,  #True,
        compress_response=compress_response)  #False)

    discriminator = CMVDiscriminatorTrainer(document_embedder, cmv_predictor,
                                            cmv_extractor, cmv_discriminator,
                                            compress_response)

    generator_serialization_dir = os.path.join(serialization_dir, 'generator')
    os.makedirs(generator_serialization_dir, exist_ok=True)
    generator_trainer = GANTrainer.from_params(generator,
                                               generator_serialization_dir,
                                               generator_iterator, train_data,
                                               validation_data,
                                               params.pop('generator_trainer'))

    discriminator_serialization_dir = os.path.join(serialization_dir,
                                                   'discriminator')
    os.makedirs(discriminator_serialization_dir, exist_ok=True)
    discriminator_trainer = GANTrainer.from_params(
        discriminator, discriminator_serialization_dir, iterator, train_data,
        validation_data, params.pop('discriminator_trainer'))

    #first train predictor for N steps
    if trainer_params is not None:
        trainer._num_epochs = 5  #hacky
        trainer.train()

    #TODO? then train actor critic for M steps
    #if we are using separate predictors, use the full CMV to train the extractor based on maximizing persuasiveness prediction
    if cmv_actor_critic_trainer_params is not None:
        cmv_actor_critic_trainer.train()

    #then alternate training between discriminator and generator for E epochs
    generator_trainer._num_epochs = 1  #hacky
    discriminator_trainer._num_epochs = 1  #hacky
    gan_epochs = params.pop("gan_epochs")
    for i in range(gan_epochs):
        discriminator_trainer.train()
        generator_trainer.train()
        discriminator_trainer._num_epochs += 1  #very hacky
        generator_trainer._num_epochs += 1  #also hacky

        #if cmv_actor_critic_trainer_params is not None:
        #    cmv_actor_critic_trainer._num_epochs += 1
        #    cmv_actor_critic_trainer.train()

    # Now tar up results
    archive_model(serialization_dir)
    archive_model(generator_serialization_dir)
    archive_model(discriminator_serialization_dir)

    return generator