Exemple #1
0
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
    # Disable some of the more verbose logging statements
    logging.getLogger('allennlp.common.params').disabled = True
    logging.getLogger('allennlp.nn.initializers').disabled = True
    logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)

    # Load parameter file
    with open(args.config_file) as config_file:
        config = Params(replace_none(json.loads(config_file.read())))

    model = Model.load(config,
                       weights_file=args.weights_file,
                       cuda_device=args.cuda_device)
    model.eval()

    vocab = model._vocab  # pylint: disable=protected-access

    # Load the evaluation data
    dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
    evaluation_data_path = args.evaluation_data_file
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    dataset = dataset_reader.read(evaluation_data_path)
    dataset.index_instances(vocab)

    iterator = DataIterator.from_params(config.pop("iterator"))

    metrics = evaluate(model, dataset, iterator, args.cuda_device)

    logger.info("Finished evaluating.")
    logger.info("Metrics:")
    for key, metric in metrics.items():
        logger.info("%s: %s", key, metric)

    return metrics
Exemple #2
0
def load_model(archive_file: str, cuda_device: int = -1) -> Model:
    """
    Instantiates a model from an archived `tar.gz` file.

    Parameters
    ----------
    archive_file: ``str``
        The archive file to load the model from.
    cuda_device: ``int``, optional (default = -1)
        If `cuda_device` is >= 0, the model will be loaded onto the
        corresponding GPU. Otherwise it will be loaded onto the CPU.
    """
    # Extract archive to temp dir
    tempdir = tempfile.mkdtemp()
    logger.info("extracting archive file %s to temp dir %s", archive_file,
                tempdir)
    with tarfile.open(archive_file, 'r:gz') as archive:
        archive.extractall(tempdir)

    # Load config
    config = Params.from_file(os.path.join(tempdir, _CONFIG_NAME))

    # Instantiate model
    model = Model.load(config,
                       weights_file=os.path.join(tempdir, _WEIGHTS_NAME),
                       serialization_prefix=tempdir,
                       cuda_device=cuda_device)

    # Clean up temp dir
    shutil.rmtree(tempdir)

    return model
Exemple #3
0
def load_archive(archive_file: str, cuda_device: int = -1) -> Archive:
    """
    Instantiates an Archive from an archived `tar.gz` file.

    Parameters
    ----------
    archive_file: ``str``
        The archive file to load the model from.
    cuda_device: ``int``, optional (default = -1)
        If `cuda_device` is >= 0, the model will be loaded onto the
        corresponding GPU. Otherwise it will be loaded onto the CPU.
    """
    # redirect to the cache, if necessary
    archive_file = cached_path(archive_file)

    # Extract archive to temp dir
    tempdir = tempfile.mkdtemp()
    logger.info("extracting archive file %s to temp dir %s", archive_file, tempdir)
    with tarfile.open(archive_file, 'r:gz') as archive:
        archive.extractall(tempdir)

    # Load config
    config = Params.from_file(os.path.join(tempdir, _CONFIG_NAME))

    # Instantiate model. Use a duplicate of the config, as it will get consumed.
    model = Model.load(config.duplicate(),
                       weights_file=os.path.join(tempdir, _WEIGHTS_NAME),
                       serialization_dir=tempdir,
                       cuda_device=cuda_device)

    # Clean up temp dir
    shutil.rmtree(tempdir)

    return Archive(model=model, config=config)
Exemple #4
0
def _load_model(config, weights_path, serialization_dir, cuda_device):
    return Model.load(
        config,
        weights_file=weights_path,
        serialization_dir=serialization_dir,
        cuda_device=cuda_device,
    )
Exemple #5
0
def run(model_path, test_path, config_path, output_path, batch_size):
    params_path = config_path or os.path.join(model_path, "config.json")

    params = Params.from_file(params_path)
    is_subwords = "tokenizer" in params["reader"] and params["reader"][
        "tokenizer"]["type"] == "subword"
    reader = DatasetReader.from_params(params.pop("reader"))

    device = 0 if torch.cuda.is_available() else -1
    model = Model.load(params, model_path, cuda_device=device)
    model.training = False

    predictor = Seq2SeqPredictor(model, reader)
    with open(output_path, "wt", encoding="utf-8") as w:
        for batch_number, batch in enumerate(get_batches(
                test_path, batch_size)):
            outputs = predictor.predict_batch_json(batch)
            assert len(outputs) == len(batch)
            for output in outputs:
                decoded_words = output["predicted_tokens"]
                if not decoded_words:
                    decoded_words = ["заявил"]
                if not is_subwords:
                    hyp = " ".join(decoded_words)
                else:
                    hyp = "".join(decoded_words).replace("▁", " ").replace(
                        "\n", "").strip()
                if len(hyp) <= 3:
                    hyp = "заявил"
                w.write(hyp + "\n")
Exemple #6
0
def load_archive(
    archive_file: str, cuda_device: int = -1, overrides: str = "", weights_file: str = None
) -> Archive:
    """
    Instantiates an Archive from an archived `tar.gz` file.

    # Parameters

    archive_file : `str`
        The archive file to load the model from.
    weights_file : `str`, optional (default = None)
        The weights file to use.  If unspecified, weights.th in the archive_file will be used.
    cuda_device : `int`, optional (default = -1)
        If `cuda_device` is >= 0, the model will be loaded onto the
        corresponding GPU. Otherwise it will be loaded onto the CPU.
    overrides : `str`, optional (default = "")
        JSON overrides to apply to the unarchived `Params` object.
    """
    # redirect to the cache, if necessary
    resolved_archive_file = cached_path(archive_file)

    if resolved_archive_file == archive_file:
        logger.info(f"loading archive file {archive_file}")
    else:
        logger.info(f"loading archive file {archive_file} from cache at {resolved_archive_file}")

    if os.path.isdir(resolved_archive_file):
        serialization_dir = resolved_archive_file
    else:
        # Extract archive to temp dir
        tempdir = tempfile.mkdtemp()
        logger.info(f"extracting archive file {resolved_archive_file} to temp dir {tempdir}")
        with tarfile.open(resolved_archive_file, "r:gz") as archive:
            archive.extractall(tempdir)
        # Postpone cleanup until exit in case the unarchived contents are needed outside
        # this function.
        atexit.register(_cleanup_archive_dir, tempdir)

        serialization_dir = tempdir

    # Load config
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME), overrides)

    if weights_file:
        weights_path = weights_file
    else:
        weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME)
        # Fallback for serialization directories.
        if not os.path.exists(weights_path):
            weights_path = os.path.join(serialization_dir, _DEFAULT_WEIGHTS)

    # Instantiate model. Use a duplicate of the config, as it will get consumed.
    model = Model.load(
        config.duplicate(),
        weights_file=weights_path,
        serialization_dir=serialization_dir,
        cuda_device=cuda_device,
    )

    return Archive(model=model, config=config)
Exemple #7
0
def load_model(weights_file, serialization_dir, cuda):
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME))
    config.loading_from_archive = True
    model = Model.load(config.duplicate(),
                    weights_file = weights_file,
                    serialization_dir = serialization_dir,
                    cuda_device = cuda)

    return model
def load_model(serialization_dir):
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME))
    config.loading_from_archive = True
    cuda_device = int(config['trainer']['cuda_device'])
    cuda_device = -1
    model = Model.load(config.duplicate(),
                       weights_file=args.weights_file,
                       serialization_dir=args.serialization_dir,
                       cuda_device=cuda_device)

    return model
Exemple #9
0
def load_archive(archive_file: str, cuda_device: int = -1, overrides: str = "") -> Archive:
    """
    Instantiates an Archive from an archived `tar.gz` file.

    Parameters
    ----------
    archive_file: ``str``
        The archive file to load the model from.
    cuda_device: ``int``, optional (default = -1)
        If `cuda_device` is >= 0, the model will be loaded onto the
        corresponding GPU. Otherwise it will be loaded onto the CPU.
    overrides: ``str``, optional (default = "")
        HOCON overrides to apply to the unarchived ``Params`` object.
    """
    # redirect to the cache, if necessary
    archive_file = cached_path(archive_file)

    # Extract archive to temp dir
    tempdir = tempfile.mkdtemp()
    logger.info("extracting archive file %s to temp dir %s", archive_file, tempdir)
    with tarfile.open(archive_file, 'r:gz') as archive:
        archive.extractall(tempdir)

    # Check for supplemental files in archive
    fta_filename = os.path.join(tempdir, _FTA_NAME)
    if os.path.exists(fta_filename):
        with open(fta_filename, 'r') as fta_file:
            files_to_archive = json.loads(fta_file.read())

        # Add these replacements to overrides
        replacement_hocon = pyhocon.ConfigTree(root=True)
        for key, _ in files_to_archive.items():
            replacement_filename = os.path.join(tempdir, f"fta/{key}")
            replacement_hocon.put(key, replacement_filename)

        overrides_hocon = pyhocon.ConfigFactory.parse_string(overrides)
        combined_hocon = replacement_hocon.with_fallback(overrides_hocon)
        overrides = json.dumps(combined_hocon)

    # Load config
    config = Params.from_file(os.path.join(tempdir, _CONFIG_NAME), overrides)
    config.loading_from_archive = True

    # Instantiate model. Use a duplicate of the config, as it will get consumed.
    model = Model.load(config.duplicate(),
                       weights_file=os.path.join(tempdir, _WEIGHTS_NAME),
                       serialization_dir=tempdir,
                       cuda_device=cuda_device)

    # Clean up temp dir
    shutil.rmtree(tempdir)

    return Archive(model=model, config=config)
 def predict(self, words):
     global model
     if not model:
         model = Model.load(
             config=self.config,
             serialization_dir=self.flags['difference_model_path'])
         self.default_predictor = Predictor.from_path(
             self.flags['difference_model_path'])
         self.predictor = DifferenceTaggerPredictor(
             self.default_predictor._model,
             dataset_reader=self.default_predictor._dataset_reader)
     annotation = self.predictor.predict_json({"sentence": words})
     subj_annotation = [(t, w) for t, w in annotation if "SUBJ" in t]
     self.info(subj_annotation)
     return annotation
Exemple #11
0
def load_archive_from_folder(archive_file: str,
                             cuda_device: int = -1,
                             overrides: str = "",
                             weights_file: str = None) -> Archive:
    # redirect to the cache, if necessary
    resolved_archive_file = cached_path(archive_file)

    logger.info(f"loading model from direactory {archive_file}")

    serialization_dir = resolved_archive_file

    # Check for supplemental files in archive
    fta_filename = os.path.join(serialization_dir, _FTA_NAME)
    if os.path.exists(fta_filename):
        with open(fta_filename, 'r') as fta_file:
            files_to_archive = json.loads(fta_file.read())

        # Add these replacements to overrides
        replacements_dict: Dict[str, Any] = {}
        for key, filename  in files_to_archive.items():
            if not filename.startswith("/"):
                filename = os.path.join(serialization_dir, f"fta/{key}")
            replacements_dict[key] = filename

        overrides_dict = parse_overrides(overrides)
        combined_dict = with_fallback(preferred=unflatten(replacements_dict), fallback=overrides_dict)
        overrides = json.dumps(combined_dict)

    # Load config
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME), overrides)
    config.loading_from_archive = True

    if weights_file:
        weights_path = weights_file
    else:
        weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME)

    # Instantiate model. Use a duplicate of the config, as it will get consumed.
    model = Model.load(config.duplicate(),
                       weights_file=weights_path,
                       serialization_dir=serialization_dir,
                       cuda_device=cuda_device)

    return Archive(model=model, config=config)
def main(weights_file, device):
    serialization_dir = serialization_dir = '/'.join(
        weights_file.split('/')[:-1])
    config = Params.from_file(join(serialization_dir, 'config.json'))
    reader = DatasetReader.from_params(config['validation_dataset_reader'])
    model = Model.load(config=config,
                       serialization_dir=serialization_dir,
                       weights_file=weights_file,
                       cuda_device=device).eval()

    predictions = get_predictions(model, serialization_dir, reader, device)

    # Write out predictions to file
    predictions_file = weights_file.split('/')[-1].split(
        '.')[0] + '_dev_pred.json'
    predictions_file = join(serialization_dir, predictions_file)

    with open(predictions_file, "w") as writer:
        writer.write(json.dumps(predictions, indent=4) + "\n")
def load_model_from_file(
    serialization_dir: str,
    weights_file: str = _DEFAULT_WEIGHTS,
    include_package: str = "models",
    cuda_device: int = -1,
    overrides: str = None,
):
    logging.disable(logging.INFO)
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME),
                              overrides)

    import_module_and_submodules(include_package)

    model = Model.load(
        config,
        weights_file=os.path.join(serialization_dir, weights_file),
        serialization_dir=serialization_dir,
        cuda_device=cuda_device,
    )
    return model
Exemple #14
0
def get_model_runner(model_path, reader, model_config_path=None):
    config_path = model_config_path or os.path.join(model_path, "config.json")
    params = Params.from_file(config_path)
    device = 0 if torch.cuda.is_available() else -1
    model = Model.load(params, model_path, cuda_device=device)
    model.eval()
    predictor = Seq2SeqPredictor(model, reader)
    print(model)
    print("Trainable params count: ", sum(p.numel() for p in model.parameters() if p.requires_grad))

    def run_model(batch):
        outputs = predictor.predict_batch_json(batch)
        targets = [b.get('target') for b in batch]
        hyps = []
        for output in outputs:
            decoded_words = output["predicted_tokens"]
            hyp = " ".join(decoded_words).strip()
            hyps.append(hyp)
        return targets, hyps
    return run_model
def load_testing_models_in_eval_mode_from_serialization_dir(
        s_dir,
        training_config_filename,
        name_of_attn_layer_to_replace="_sentence_attention",
        cuda_device=-1):
    loaded_params = Params.from_file(training_config_filename, "")
    model = Model.load(loaded_params, s_dir, cuda_device=cuda_device)

    original_attn_layer = getattr(model, name_of_attn_layer_to_replace)
    talkative_attn_layer = \
        attn_tests_lib.TalkativeSimpleHanAttention(original_attn_layer, "temp_weights", "temp_vects/", 1)
    setattr(model, name_of_attn_layer_to_replace, talkative_attn_layer)
    just_the_classifier = \
        attn_tests_lib.ClassifierFromAttnAndInputVects(model._output_logit)
    if cuda_device >= 0:
        model = model.cuda(device=cuda_device)
        just_the_classifier = just_the_classifier.cuda(device=cuda_device)
    model = model.eval()
    just_the_classifier = just_the_classifier.eval()

    return model, just_the_classifier
Exemple #16
0
def train(model_path,
          train_path,
          val_path,
          seed,
          vocabulary_path=None,
          config_path=None,
          pretrained_path=None):
    assert os.path.isdir(model_path), "Model directory does not exist"
    set_seed(seed)

    config_path = config_path or os.path.join(model_path, "config.json")
    assert os.path.isfile(config_path), "Config file does not exist"
    params = Params.from_file(config_path)

    vocabulary_path = vocabulary_path or os.path.join(model_path, "vocabulary")
    assert os.path.exists(
        vocabulary_path
    ), "Vocabulary is not ready, do not forget to run preprocess.py first"
    vocabulary = Vocabulary.from_files(vocabulary_path)

    reader_params = params.duplicate().pop("reader", default=Params({}))
    reader = DatasetReader.from_params(reader_params)
    train_dataset = reader.read(train_path)
    val_dataset = reader.read(val_path) if val_path else None

    if not pretrained_path:
        model_params = params.pop("model")
        model = Model.from_params(model_params, vocab=vocabulary)
    else:
        model = Model.load(params, pretrained_path)
    print(model)
    print("Trainable params count: ",
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    iterator = DataIterator.from_params(params.pop('iterator'))
    iterator.index_with(vocabulary)
    trainer = Trainer.from_params(model, model_path, iterator, train_dataset,
                                  val_dataset, params.pop('trainer'))
    trainer.train()
Exemple #17
0
    def _load(cls,
              params: Params,
              vocab: Vocabulary,
              serialization_dir: str,
              weights_file: str,
              cuda_device: int = -1,
              **kwargs):
        if params.get('train', None):
            params.pop('train')

        inner_model = Model.load(params,
                                 serialization_dir,
                                 weights_file=weights_file,
                                 cuda_device=cuda_device)
        params.pop('model')
        if params.get('vocabulary', None):
            params.pop('vocabulary')
        model = NeuralNetLanguageModel.from_params(params,
                                                   model=inner_model,
                                                   vocab=vocab,
                                                   **kwargs)
        return model
Exemple #18
0
def train_model(
    params: Params,
    serialization_dir: Union[str, PathLike],
    recover: bool = False,
    force: bool = False,
    node_rank: int = 0,
    include_package: List[str] = None,
    dry_run: bool = False,
    file_friendly_logging: bool = False,
) -> Optional[Model]:
    """
    Trains the model specified in the given [`Params`](../common/params.md#params) object, using the data
    and training parameters also specified in that object, and saves the results in `serialization_dir`.

    # Parameters

    params : `Params`
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : `str`
        The directory in which to save results and logs.
    recover : `bool`, optional (default=`False`)
        If `True`, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see `Model.from_archive`.
    force : `bool`, optional (default=`False`)
        If `True`, we will overwrite the serialization directory if it already exists.
    node_rank : `int`, optional
        Rank of the current node in distributed training
    include_package : `List[str]`, optional
        In distributed mode, extra packages mentioned will be imported in trainer workers.
    dry_run : `bool`, optional (default=`False`)
        Do not train a model, but create a vocabulary, show dataset statistics and other training
        information.
    file_friendly_logging : `bool`, optional (default=`False`)
        If `True`, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.

    # Returns

    best_model : `Optional[Model]`
        The model with the best epoch weights or `None` if in dry run.
    """
    common_logging.FILE_FRIENDLY_LOGGING = file_friendly_logging

    training_util.create_serialization_dir(params, serialization_dir, recover,
                                           force)
    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    distributed_params = params.params.pop("distributed", None)
    # If distributed isn't in the config and the config contains strictly
    # one cuda device, we just run a single training process.
    if distributed_params is None:
        model = _train_worker(
            process_rank=0,
            params=params,
            serialization_dir=serialization_dir,
            include_package=include_package,
            dry_run=dry_run,
            file_friendly_logging=file_friendly_logging,
        )

        if not dry_run:
            archive_model(serialization_dir)
        return model

    # Otherwise, we are running multiple processes for training.
    else:
        # We are careful here so that we can raise a good error if someone
        # passed the wrong thing - cuda_devices are required.
        device_ids = distributed_params.pop("cuda_devices", None)
        multi_device = isinstance(device_ids, list) and len(device_ids) > 1
        num_nodes = distributed_params.pop("num_nodes", 1)

        if not (multi_device or num_nodes > 1):
            raise ConfigurationError(
                "Multiple cuda devices/nodes need to be configured to run distributed training."
            )
        check_for_gpu(device_ids)

        master_addr = distributed_params.pop("master_address", "127.0.0.1")
        master_port = distributed_params.pop("master_port", 29500)
        num_procs = len(device_ids)
        world_size = num_nodes * num_procs

        # Creating `Vocabulary` objects from workers could be problematic since
        # the data loaders in each worker will yield only `rank` specific
        # instances. Hence it is safe to construct the vocabulary and write it
        # to disk before initializing the distributed context. The workers will
        # load the vocabulary from the path specified.
        vocab_dir = os.path.join(serialization_dir, "vocabulary")
        if recover:
            vocab = Vocabulary.from_files(vocab_dir)
        else:
            vocab = training_util.make_vocab_from_params(
                params.duplicate(),
                serialization_dir,
                print_statistics=dry_run)
        params["vocabulary"] = {
            "type": "from_files",
            "directory": vocab_dir,
            "padding_token": vocab._padding_token,
            "oov_token": vocab._oov_token,
        }

        logging.info(
            "Switching to distributed training mode since multiple GPUs are configured | "
            f"Master is at: {master_addr}:{master_port} | Rank of this node: {node_rank} | "
            f"Number of workers in this node: {num_procs} | Number of nodes: {num_nodes} | "
            f"World size: {world_size}")

        mp.spawn(
            _train_worker,
            args=(
                params.duplicate(),
                serialization_dir,
                include_package,
                dry_run,
                node_rank,
                master_addr,
                master_port,
                world_size,
                device_ids,
                file_friendly_logging,
            ),
            nprocs=num_procs,
        )
        if dry_run:
            return None
        else:
            archive_model(serialization_dir)
            model = Model.load(params, serialization_dir)
            return model
Exemple #19
0
def load_archive(
    archive_file: str,
    cuda_device: int = -1,
    opt_level: str = None,
    overrides: str = "",
    weights_file: str = None,
) -> Archive:
    """
    Instantiates an Archive from an archived `tar.gz` file.

    # Parameters

    archive_file : `str`
        The archive file to load the model from.
    cuda_device : `int`, optional (default = -1)
        If `cuda_device` is >= 0, the model will be loaded onto the
        corresponding GPU. Otherwise it will be loaded onto the CPU.
    opt_level : `str`, optional, (default = `None`)
        Each `opt_level` establishes a set of properties that govern Amp’s implementation of pure or mixed
        precision training. Must be a choice of `"O0"`, `"O1"`, `"O2"`, or `"O3"`.
        See the Apex [documentation](https://nvidia.github.io/apex/amp.html#opt-levels-and-properties) for
        more details. If `None`, defaults to the `opt_level` found in the model params. If `cuda_device==-1`,
        Amp is not used and this argument is ignored.
    overrides : `str`, optional (default = "")
        JSON overrides to apply to the unarchived `Params` object.
    weights_file : `str`, optional (default = None)
        The weights file to use.  If unspecified, weights.th in the archive_file will be used.
    """
    # redirect to the cache, if necessary
    resolved_archive_file = cached_path(archive_file)

    if resolved_archive_file == archive_file:
        logger.info(f"loading archive file {archive_file}")
    else:
        logger.info(
            f"loading archive file {archive_file} from cache at {resolved_archive_file}"
        )

    if os.path.isdir(resolved_archive_file):
        serialization_dir = resolved_archive_file
    else:
        # Extract archive to temp dir
        tempdir = tempfile.mkdtemp()
        logger.info(
            f"extracting archive file {resolved_archive_file} to temp dir {tempdir}"
        )
        with tarfile.open(resolved_archive_file, "r:gz") as archive:
            archive.extractall(tempdir)
        # Postpone cleanup until exit in case the unarchived contents are needed outside
        # this function.
        atexit.register(_cleanup_archive_dir, tempdir)

        serialization_dir = tempdir

    # Load config
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME),
                              overrides)

    if weights_file:
        weights_path = weights_file
    else:
        weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME)
        # Fallback for serialization directories.
        if not os.path.exists(weights_path):
            weights_path = os.path.join(serialization_dir, _DEFAULT_WEIGHTS)

    # Instantiate model. Use a duplicate of the config, as it will get consumed.
    model = Model.load(
        config.duplicate(),
        weights_file=weights_path,
        serialization_dir=serialization_dir,
        cuda_device=cuda_device,
        opt_level=opt_level,
    )

    return Archive(model=model, config=config)
def main(serialization_dir, evaluation_data_file, split, cuda_device, weights_file, overrides):

    archive_file = os.path.join(serialization_dir, "model.tar.gz")

    logging_dir = os.path.join(serialization_dir, "logging")

    if os.path.isfile(archive_file):
        weights_file = None
        archive = load_archive(archive_file, cuda_device, overrides, weights_file)
        config = archive.config
        prepare_environment(config)
        prepare_global_logging(logging_dir, file_friendly_logging=False, file_name=split)
        model = archive.model
    else:
        # Load config
        config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME), overrides)
        prepare_environment(config)
        prepare_global_logging(logging_dir, file_friendly_logging=False, file_name=split)

        if weights_file:
            weights_path = os.path.join(serialization_dir, weights_file)
        else:
            weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME)

        logger.info("Using weights_file located at : %s", weights_path)
        # Instantiate model. Use a duplicate of the config, as it will get consumed.
        model = Model.load(config.duplicate(),
                           weights_file=weights_path,
                           serialization_dir=serialization_dir,
                           cuda_device=cuda_device)

    # Eval mode ON
    model.eval()

    # Load the evaluation data

    # Try to use the validation dataset reader if there is one - otherwise fall back
    # to the default dataset_reader used for both training and validation.
    validation_dataset_reader_params = config.pop('validation_dataset_reader', None)
    if validation_dataset_reader_params is not None:
        dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
    else:
        dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))

    if evaluation_data_file is None:
        logger.info("--evaluation_data_file not provided. So using --split=%s to read data", split)
        data_path_key = split + '_data_path'
        evaluation_data_path = config.pop(data_path_key)
    else:
        evaluation_data_path = evaluation_data_file

    logger.info("Reading evaluation data from %s", evaluation_data_path)

    instances = dataset_reader.read(evaluation_data_path)
    logger.info("No. of instances = %d", len(instances))

    iterator = BasicIterator(batch_size=128)
    iterator.index_with(model.vocab)

    metrics, model_predictions = get_model_predictions(model, instances, iterator, args.cuda_device)

    logger.info("Finished evaluating.")
    logger.info("Metrics:")
    for key, metric in metrics.items():
        logger.info("%s: %s", key, metric)

    write_predictions(serialization_dir=serialization_dir, instances=instances,
                 model_predictions=model_predictions, split=split)

    analyze_gold_data(serialization_dir=serialization_dir, instances=instances, split=split)

    analyze_model_predictions(serialization_dir=serialization_dir, instances=instances,
                              model_predictions=model_predictions, split=split)

    analyze_bio_violations(instances=instances, model_predictions=model_predictions)
Exemple #21
0
def train_model(
    params: Params,
    serialization_dir: str,
    file_friendly_logging: bool = False,
    recover: bool = False,
    force: bool = False,
    cache_directory: str = None,
    cache_prefix: str = None,
    node_rank: int = 0,
    include_package: List[str] = None,
) -> Model:
    """
    Trains the model specified in the given :class:`Params` object, using the data and training
    parameters also specified in that object, and saves the results in ``serialization_dir``.

    # Parameters

    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    recover : ``bool``, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    force : ``bool``, optional (default=False)
        If ``True``, we will overwrite the serialization directory if it already exists.
    cache_directory : ``str``, optional
        For caching data pre-processing.  See :func:`allennlp.training.util.datasets_from_params`.
    cache_prefix : ``str``, optional
        For caching data pre-processing.  See :func:`allennlp.training.util.datasets_from_params`.
    node_rank : ``int``, optional
        Rank of the current node in distributed training
    include_package : ``List[str]``, optional
        In distributed mode, extra packages mentioned will be imported in trainer workers.

    # Returns

    best_model : ``Model``
        The model with the best epoch weights.
    """
    create_serialization_dir(params, serialization_dir, recover, force)
    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    distributed_params = params.params.pop("distributed", None)
    # If distributed isn't in the config and the config contains strictly
    # one cuda device, we just run a single training process.
    if distributed_params is None:
        model = _train_worker(
            process_rank=0,
            params=params,
            serialization_dir=serialization_dir,
            file_friendly_logging=file_friendly_logging,
            recover=recover,
            cache_directory=cache_directory,
            cache_prefix=cache_prefix,
            include_package=include_package,
        )
        archive_model(serialization_dir,
                      files_to_archive=params.files_to_archive)
        return model

    # Otherwise, we are running multiple processes for training.
    else:
        # We are careful here so that we can raise a good error if someone
        # passed the wrong thing - cuda_devices are required.
        device_ids = distributed_params.pop("cuda_devices", None)
        multi_device = isinstance(device_ids, list) and len(device_ids) > 1
        num_nodes = distributed_params.pop("num_nodes", 1)

        if not (multi_device or num_nodes > 1):
            raise ConfigurationError(
                "Multiple cuda devices/nodes need to be configured to run distributed training."
            )
        check_for_gpu(device_ids)

        master_addr = distributed_params.pop("master_address", "127.0.0.1")
        master_port = distributed_params.pop("master_port", 29500)
        num_procs = len(device_ids)
        world_size = num_nodes * num_procs

        os.environ["MASTER_ADDR"] = master_addr
        os.environ["MASTER_PORT"] = str(master_port)
        os.environ["WORLD_SIZE"] = str(world_size)

        logging.info(
            f"Switching to distributed training mode since multiple GPUs are configured"
            f"Master is at: {master_addr}:{master_port} | Rank of this node: {node_rank} | "
            f"Number of workers in this node: {num_procs} | Number of nodes: {num_nodes} | "
            f"World size: {world_size}")

        # Creating `Vocabulary` objects from workers could be problematic since the data iterators
        # in each worker will yield only `rank` specific instances. Hence it is safe to construct
        # the vocabulary and write it to disk before initializing the distributed context. The workers
        # will load the vocabulary from the path specified.
        make_vocab_from_params(params.duplicate(), serialization_dir)
        params["vocabulary"] = {
            "directory_path": os.path.join(serialization_dir, "vocabulary"),
            "extend": False,  # vocab extension would have been done above
        }

        mp.spawn(
            _train_worker,
            args=(
                params.duplicate(),
                serialization_dir,
                file_friendly_logging,
                recover,
                cache_directory,
                cache_prefix,
                include_package,
                node_rank,
                master_addr,
                master_port,
                world_size,
                device_ids,
            ),
            nprocs=num_procs,
        )
        archive_model(serialization_dir,
                      files_to_archive=params.files_to_archive)
        model = Model.load(params, serialization_dir)
        return model
Exemple #22
0
def load_archive(archive_file: str,
                 cuda_device: int = -1,
                 overrides: str = "",
                 weights_file: str = None) -> Archive:
    """
    Instantiates an Archive from an archived `tar.gz` file.

    Parameters
    ----------
    archive_file: ``str``
        The archive file to load the model from.
    weights_file: ``str``, optional (default = None)
        The weights file to use.  If unspecified, weights.th in the archive_file will be used.
    cuda_device: ``int``, optional (default = -1)
        If `cuda_device` is >= 0, the model will be loaded onto the
        corresponding GPU. Otherwise it will be loaded onto the CPU.
    overrides: ``str``, optional (default = "")
        JSON overrides to apply to the unarchived ``Params`` object.
    """
    # redirect to the cache, if necessary
    resolved_archive_file = cached_path(archive_file)

    if resolved_archive_file == archive_file:
        logger.info(f"loading archive file {archive_file}")
    else:
        logger.info(
            f"loading archive file {archive_file} from cache at {resolved_archive_file}"
        )

    tempdir = None
    if os.path.isdir(resolved_archive_file):
        serialization_dir = resolved_archive_file
    else:
        # Extract archive to temp dir
        tempdir = tempfile.mkdtemp()
        logger.info(
            f"extracting archive file {resolved_archive_file} to temp dir {tempdir}"
        )
        with tarfile.open(resolved_archive_file, 'r:gz') as archive:
            archive.extractall(tempdir)

        serialization_dir = tempdir

    # Check for supplemental files in archive
    fta_filename = os.path.join(serialization_dir, _FTA_NAME)
    if os.path.exists(fta_filename):
        with open(fta_filename, 'r') as fta_file:
            files_to_archive = json.loads(fta_file.read())

        # Add these replacements to overrides
        replacements_dict: Dict[str, Any] = {}
        for key, _ in files_to_archive.items():
            replacement_filename = os.path.join(serialization_dir,
                                                f"fta/{key}")
            replacements_dict[key] = replacement_filename

        overrides_dict = parse_overrides(overrides)
        combined_dict = with_fallback(preferred=unflatten(replacements_dict),
                                      fallback=overrides_dict)
        overrides = json.dumps(combined_dict)

    # Load config
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME),
                              overrides)
    config.loading_from_archive = True

    if weights_file:
        weights_path = weights_file
    else:
        weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME)

    # Instantiate model. Use a duplicate of the config, as it will get consumed.
    model = Model.load(config.duplicate(),
                       weights_file=weights_path,
                       serialization_dir=serialization_dir,
                       cuda_device=cuda_device)

    if tempdir:
        # Clean up temp dir
        shutil.rmtree(tempdir)

    return Archive(model=model, config=config)
Exemple #23
0
def load_archive(archive_file: str,
                 cuda_device: int = -1,
                 overrides: str = "",
                 weights_file: str = None) -> Archive:
    """
    Instantiates an Archive from an archived `tar.gz` file.

    Parameters
    ----------
    archive_file: ``str``
        The archive file to load the model from.
    weights_file: ``str``, optional (default = None)
        The weights file to use.  If unspecified, weights.th in the archive_file will be used.
    cuda_device: ``int``, optional (default = -1)
        If `cuda_device` is >= 0, the model will be loaded onto the
        corresponding GPU. Otherwise it will be loaded onto the CPU.
    overrides: ``str``, optional (default = "")
        JSON overrides to apply to the unarchived ``Params`` object.
    """
    # redirect to the cache, if necessary
    resolved_archive_file = cached_path(archive_file)

    if resolved_archive_file == archive_file:
        logger.info(f"loading archive file {archive_file}")
    else:
        logger.info(f"loading archive file {archive_file} from cache at {resolved_archive_file}")

    if os.path.isdir(resolved_archive_file):
        serialization_dir = resolved_archive_file
    else:
        # Extract archive to temp dir
        tempdir = tempfile.mkdtemp()
        logger.info(f"extracting archive file {resolved_archive_file} to temp dir {tempdir}")
        with tarfile.open(resolved_archive_file, 'r:gz') as archive:
            archive.extractall(tempdir)
        # Postpone cleanup until exit in case the unarchived contents are needed outside
        # this function.
        atexit.register(_cleanup_archive_dir, tempdir)

        serialization_dir = tempdir

    # Check for supplemental files in archive
    fta_filename = os.path.join(serialization_dir, _FTA_NAME)
    if os.path.exists(fta_filename):
        with open(fta_filename, 'r') as fta_file:
            files_to_archive = json.loads(fta_file.read())

        # Add these replacements to overrides
        replacements_dict: Dict[str, Any] = {}
        for key, _ in files_to_archive.items():
            replacement_filename = os.path.join(serialization_dir, f"fta/{key}")
            replacements_dict[key] = replacement_filename

        overrides_dict = parse_overrides(overrides)
        combined_dict = with_fallback(preferred=unflatten(replacements_dict), fallback=overrides_dict)
        overrides = json.dumps(combined_dict)

    # Load config
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME), overrides)
    config.loading_from_archive = True

    if weights_file:
        weights_path = weights_file
    else:
        weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME)

    # Instantiate model. Use a duplicate of the config, as it will get consumed.
    model = Model.load(config.duplicate(),
                       weights_file=weights_path,
                       serialization_dir=serialization_dir,
                       cuda_device=cuda_device)

    return Archive(model=model, config=config)
Exemple #24
0
def load_archive(
    archive_file: str,
    cuda_device: int = -1,
    overrides: str = "",
    weights_file: str = None,
) -> Archive:
    """
    Instantiates an Archive from an archived `tar.gz` file.

    # Parameters

    archive_file : `str`
        The archive file to load the model from.
    cuda_device : `int`, optional (default = `-1`)
        If `cuda_device` is >= 0, the model will be loaded onto the
        corresponding GPU. Otherwise it will be loaded onto the CPU.
    overrides : `str`, optional (default = `""`)
        JSON overrides to apply to the unarchived `Params` object.
    weights_file : `str`, optional (default = `None`)
        The weights file to use.  If unspecified, weights.th in the archive_file will be used.
    """
    # redirect to the cache, if necessary
    resolved_archive_file = cached_path(archive_file)

    if resolved_archive_file == archive_file:
        logger.info(f"loading archive file {archive_file}")
    else:
        logger.info(
            f"loading archive file {archive_file} from cache at {resolved_archive_file}"
        )

    tempdir = None
    try:
        if os.path.isdir(resolved_archive_file):
            serialization_dir = resolved_archive_file
        else:
            # Extract archive to temp dir
            tempdir = tempfile.mkdtemp()
            logger.info(
                f"extracting archive file {resolved_archive_file} to temp dir {tempdir}"
            )
            with tarfile.open(resolved_archive_file, "r:gz") as archive:
                archive.extractall(tempdir)
            serialization_dir = tempdir

        # Load config
        config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME),
                                  overrides)

        if weights_file:
            weights_path = weights_file
        else:
            weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME)
            # Fallback for serialization directories.
            if not os.path.exists(weights_path):
                weights_path = os.path.join(serialization_dir,
                                            _DEFAULT_WEIGHTS)

        # Instantiate model. Use a duplicate of the config, as it will get consumed.
        model = Model.load(
            config.duplicate(),
            weights_file=weights_path,
            serialization_dir=serialization_dir,
            cuda_device=cuda_device,
        )

    finally:
        if tempdir is not None:
            logger.info(
                f"removing temporary unarchived model dir at {tempdir}")
            shutil.rmtree(tempdir, ignore_errors=True)

    return Archive(model=model, config=config)
Exemple #25
0
def evaluate(model_path, test_path, config_path, metric, is_multiple_ref,
             max_count, report_every, batch_size):
    params_path = config_path or os.path.join(model_path, "config.json")

    params = Params.from_file(params_path)
    is_subwords = "tokenizer" in params["reader"] and params["reader"][
        "tokenizer"]["type"] == "subword"
    reader = DatasetReader.from_params(params.pop("reader"))

    device = 0 if torch.cuda.is_available() else -1
    model = Model.load(params, model_path, cuda_device=device)
    model.training = False
    print(model)
    print("Trainable params count: ",
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    hyps = []
    refs = []
    predictor = Seq2SeqPredictor(model, reader)
    for batch in get_batches(reader, test_path, batch_size):
        outputs = predictor.predict_batch_json(batch)
        targets = [b.get('target') for b in batch]
        for output, target in zip(outputs, targets):
            decoded_words = output["predicted_tokens"]
            if not is_multiple_ref:
                hyp = detokenize(
                    " ".join(decoded_words)) if not is_subwords else "".join(
                        decoded_words).replace("▁", " ")
                if len(hyp.strip()) <= 1:
                    hyp = "empty"
                    print("Empty hyp")
                if len(target.strip()) <= 1:
                    target = "empty"
                    print("Empty target")
                ref = [target]
            else:
                if isinstance(target, list):
                    reference_sents = target
                elif isinstance(target, str):
                    reference_sents = target.split(" s_s ")
                else:
                    assert False
                decoded_sents = (" ".join(decoded_words)).split("s_s")
                hyp = [
                    w.replace("<", "&lt;").replace(">", "&gt;").strip()
                    for w in decoded_sents
                ]
                ref = [
                    w.replace("<", "&lt;").replace(">", "&gt;").strip()
                    for w in reference_sents
                ]
                hyp = " ".join(hyp)
                ref = [" ".join(ref)]

            hyps.append(hyp)
            refs.append(ref)

            if len(hyps) % report_every == 0:
                print("Count: ", len(hyps))
                print("Ref: ", ref)
                print("Hyp: ", hyp)

                if metric in ("bleu", "all"):
                    from nltk.translate.bleu_score import corpus_bleu
                    print("BLEU: ", corpus_bleu(refs, hyps))

                if metric in ("rouge", "all"):
                    rouge = Rouge()
                    scores = rouge.get_scores(hyps, [r[0] for r in refs],
                                              avg=True)
                    print("ROUGE: ", scores)

            if max_count and len(hyps) >= max_count:
                break
Exemple #26
0
def load_archive(archive_file: str,
                 cuda_device: int = -1,
                 overrides: str = "",
                 weights_file: str = None) -> Archive:
    """
    Instantiates an Archive from an archived `tar.gz` file.

    Parameters
    ----------
    archive_file: ``str``
        The archive file to load the model from.
    weights_file: ``str``, optional (default = None)
        The weights file to use.  If unspecified, weights.th in the archive_file will be used.
    cuda_device: ``int``, optional (default = -1)
        If `cuda_device` is >= 0, the model will be loaded onto the
        corresponding GPU. Otherwise it will be loaded onto the CPU.
    overrides: ``str``, optional (default = "")
        JSON overrides to apply to the unarchived ``Params`` object.
    """
    # redirect to the cache, if necessary
    resolved_archive_file = cached_path(archive_file)

    if resolved_archive_file == archive_file:
        logger.info(f"loading archive file {archive_file}")
    else:
        logger.info(
            f"loading archive file {archive_file} from cache at {resolved_archive_file}"
        )

    if os.path.isdir(resolved_archive_file):
        serialization_dir = resolved_archive_file
    else:
        # Extract archive to temp dir
        tempdir = tempfile.mkdtemp()
        logger.info(
            f"extracting archive file {resolved_archive_file} to temp dir {tempdir}"
        )
        with tarfile.open(resolved_archive_file, 'r:gz') as archive:
            archive.extractall(tempdir)
        # Postpone cleanup until exit in case the unarchived contents are needed outside
        # this function.
        atexit.register(_cleanup_archive_dir, tempdir)

        serialization_dir = tempdir

    # Check for supplemental files in archive
    fta_filename = os.path.join(serialization_dir, _FTA_NAME)
    if os.path.exists(fta_filename):
        with open(fta_filename, 'r', encoding='utf-8') as fta_file:
            files_to_archive = json.loads(fta_file.read())

        # Add these replacements to overrides
        replacements_dict: Dict[str, Any] = {}
        for key, original_filename in files_to_archive.items():
            replacement_filename = os.path.join(serialization_dir,
                                                f"fta/{key}")
            if os.path.exists(replacement_filename):
                replacements_dict[key] = replacement_filename
            else:
                logger.warning(
                    f"Archived file {replacement_filename} not found! At train time "
                    f"this file was located at {original_filename}. This may be "
                    "because you are loading a serialization directory. Attempting to "
                    "load the file from its train-time location.")

        overrides_dict = parse_overrides(overrides)
        combined_dict = with_fallback(preferred=overrides_dict,
                                      fallback=unflatten(replacements_dict))
        overrides = json.dumps(combined_dict)

    # Load config
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME),
                              overrides)
    config.loading_from_archive = True

    if weights_file:
        weights_path = weights_file
    else:
        weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME)
        # Fallback for serialization directories.
        if not os.path.exists(weights_path):
            weights_path = os.path.join(serialization_dir, _DEFAULT_WEIGHTS)

    # Instantiate model. Use a duplicate of the config, as it will get consumed.
    model = Model.load(config.duplicate(),
                       weights_file=weights_path,
                       serialization_dir=serialization_dir,
                       cuda_device=cuda_device)

    return Archive(model=model, config=config)
Exemple #27
0
def train_model(
    params: Params,
    serialization_dir: str,
    file_friendly_logging: bool = False,
    recover: bool = False,
    force: bool = False,
    node_rank: int = 0,
    include_package: List[str] = None,
    batch_weight_key: str = "",
) -> Model:
    """
    Trains the model specified in the given :class:`Params` object, using the data and training
    parameters also specified in that object, and saves the results in ``serialization_dir``.

    # Parameters

    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    recover : ``bool``, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see ``Model.from_archive``.
    force : ``bool``, optional (default=False)
        If ``True``, we will overwrite the serialization directory if it already exists.
    node_rank : ``int``, optional
        Rank of the current node in distributed training
    include_package : ``List[str]``, optional
        In distributed mode, extra packages mentioned will be imported in trainer workers.
    batch_weight_key : ``str``, optional (default="")
        If non-empty, name of metric used to weight the loss on a per-batch basis.

    # Returns

    best_model : ``Model``
        The model with the best epoch weights.
    """
    training_util.create_serialization_dir(params, serialization_dir, recover,
                                           force)
    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    distributed_params = params.params.pop("distributed", None)
    # If distributed isn't in the config and the config contains strictly
    # one cuda device, we just run a single training process.
    if distributed_params is None:
        model = _train_worker(
            process_rank=0,
            params=params,
            serialization_dir=serialization_dir,
            file_friendly_logging=file_friendly_logging,
            include_package=include_package,
            batch_weight_key=batch_weight_key,
        )
        archive_model(serialization_dir)
        return model

    # Otherwise, we are running multiple processes for training.
    else:
        # We are careful here so that we can raise a good error if someone
        # passed the wrong thing - cuda_devices are required.
        device_ids = distributed_params.pop("cuda_devices", None)
        multi_device = isinstance(device_ids, list) and len(device_ids) > 1
        num_nodes = distributed_params.pop("num_nodes", 1)

        if not (multi_device or num_nodes > 1):
            raise ConfigurationError(
                "Multiple cuda devices/nodes need to be configured to run distributed training."
            )
        check_for_gpu(device_ids)

        master_addr = distributed_params.pop("master_address", "127.0.0.1")
        master_port = distributed_params.pop("master_port", 29500)
        num_procs = len(device_ids)
        world_size = num_nodes * num_procs

        logging.info(
            f"Switching to distributed training mode since multiple GPUs are configured"
            f"Master is at: {master_addr}:{master_port} | Rank of this node: {node_rank} | "
            f"Number of workers in this node: {num_procs} | Number of nodes: {num_nodes} | "
            f"World size: {world_size}")

        # Creating `Vocabulary` objects from workers could be problematic since
        # the data iterators in each worker will yield only `rank` specific
        # instances. Hence it is safe to construct the vocabulary and write it
        # to disk before initializing the distributed context. The workers will
        # load the vocabulary from the path specified.
        if params.get("vocabulary", Params({})).get("type",
                                                    "") != "from_files":
            vocab = training_util.make_vocab_from_params(
                params.duplicate(), serialization_dir)
            params["vocabulary"] = {
                "type": "from_files",
                "directory": os.path.join(serialization_dir, "vocabulary"),
                "padding_token": vocab._padding_token,
                "oov_token": vocab._oov_token,
            }

        mp.spawn(
            _train_worker,
            args=(
                params.duplicate(),
                serialization_dir,
                file_friendly_logging,
                include_package,
                batch_weight_key,
                node_rank,
                master_addr,
                master_port,
                world_size,
                device_ids,
            ),
            nprocs=num_procs,
        )
        archive_model(serialization_dir)
        model = Model.load(params, serialization_dir)
        return model
Exemple #28
0
def load_archive(archive_file: str,
                 cuda_device: int = -1,
                 overrides: str = "",
                 weights_file: str = None) -> Archive:
    """
    Instantiates an Archive from an archived `tar.gz` file.

    Parameters
    ----------
    archive_file: ``str``
        The archive file to load the model from.
    weights_file: ``str``, optional (default = None)
        The weights file to use.  If unspecified, weights.th in the archive_file will be used.
    cuda_device: ``int``, optional (default = -1)
        If `cuda_device` is >= 0, the model will be loaded onto the
        corresponding GPU. Otherwise it will be loaded onto the CPU.
    overrides: ``str``, optional (default = "")
        HOCON overrides to apply to the unarchived ``Params`` object.
    """
    # redirect to the cache, if necessary
    archive_file = cached_path(archive_file)

    tempdir = None
    if os.path.isdir(archive_file):
        serialization_dir = archive_file
    else:
        # Extract archive to temp dir
        tempdir = tempfile.mkdtemp()
        logger.info("extracting archive file %s to temp dir %s", archive_file, tempdir)
        with tarfile.open(archive_file, 'r:gz') as archive:
            archive.extractall(tempdir)

        serialization_dir = tempdir

    # Check for supplemental files in archive
    fta_filename = os.path.join(serialization_dir, _FTA_NAME)
    if os.path.exists(fta_filename):
        with open(fta_filename, 'r') as fta_file:
            files_to_archive = json.loads(fta_file.read())

        # Add these replacements to overrides
        replacement_hocon = pyhocon.ConfigTree(root=True)
        for key, _ in files_to_archive.items():
            replacement_filename = os.path.join(serialization_dir, f"fta/{key}")
            replacement_hocon.put(key, replacement_filename)

        overrides_hocon = pyhocon.ConfigFactory.parse_string(overrides)
        combined_hocon = replacement_hocon.with_fallback(overrides_hocon)
        overrides = json.dumps(combined_hocon)

    # Load config
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME), overrides)
    config.loading_from_archive = True

    if weights_file:
        weights_path = weights_file
    else:
        weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME)

    # Instantiate model. Use a duplicate of the config, as it will get consumed.
    model = Model.load(config.duplicate(),
                       weights_file=weights_path,
                       serialization_dir=serialization_dir,
                       cuda_device=cuda_device)

    if tempdir:
        # Clean up temp dir
        shutil.rmtree(tempdir)

    return Archive(model=model, config=config)