Example #1
0
def run(ModelClass, output_dir, pipeline_opts, model_opts):
    """
    Runs the prediction pipeline. Loads the model and necessary files
    and creates the model's predictions for all data received.

    Args:
        ModelClass (type): Python Type of the Model to train
        output_dir: Directory to save predictions
        pipeline_options (Namespace): Generic predict Options
            batch_size: Max batch size for predicting

        model_options (Namespace): Model Specific options

    Returns:
        Predictions (dict): Dictionary with format {'target':predictions}
    """
    model_name = getattr(ModelClass, "title", ModelClass.__name__)
    logger.info("Predict with the {} model".format(model_name))

    if ModelClass == LinearWordQEClassifier:
        load_vocab = None

        model = LinearWordQEClassifier(
            evaluation_metric=model_opts.evaluation_metric)
        model.load(pipeline_opts.load_model)
        # predicter = LinearTester(model)
    else:
        load_vocab = pipeline_opts.load_model
        # load_vocab = pipeline_opts.load_vocab

        model = Model.create_from_file(pipeline_opts.load_model)

        # Set GPU or CPU. This has to be done before instantiating the optimizer
        device_id = None
        if pipeline_opts.gpu_id is not None and pipeline_opts.gpu_id >= 0:
            device_id = pipeline_opts.gpu_id
        model.to(device_id)

        predicter = Predicter(model)

    test_dataset = build_test_dataset(
        fieldset=ModelClass.fieldset(
            wmt18_format=model_opts.__dict__.get("wmt18_format")),
        load_vocab=load_vocab,
        **vars(model_opts),
    )
    predictions = predicter.run(test_dataset,
                                batch_size=pipeline_opts.batch_size)

    save_predicted_probabilities(output_dir, predictions)
    return predictions
Example #2
0
def load_model(model_path):
    """Load a pretrained model into a `Predicter` object.

    Args:
      load_model (str): A path to the saved model file.

    Throws:
      Exception: If the path does not exist, or is not a valid model file.

    """
    model_path = Path(model_path)
    if not model_path.exists():
        raise Exception('Path "{}" does not exist!'.format(model_path))

    model = Model.create_from_file(model_path)
    if not model:
        raise Exception('No model found in "{}"'.format(model_path))
    fieldset = model.fieldset()
    fields = deserialize_fields_from_vocabs(fieldset.fields, model.vocabs)
    predicter = Predicter(model, fields=fields)
    return predicter
    def from_directory(cls, directory, device_id=None):
        logger.info('Loading training state from {}'.format(directory))
        root_path = Path(directory)

        model_path = root_path / const.MODEL_FILE
        model = Model.create_from_file(model_path)

        if device_id is not None:
            model.to(device_id)

        optimizer_path = root_path / const.OPTIMIZER
        optimizer_dict = load_torch_file(str(optimizer_path))

        optimizer = optimizer_class(optimizer_dict['name'])(model.parameters(),
                                                            lr=0.0)
        optimizer.load_state_dict(optimizer_dict['state_dict'])

        trainer = cls(model, optimizer, checkpointer=None)
        trainer_path = root_path / const.TRAINER
        state = load_torch_file(str(trainer_path))
        trainer.__dict__.update(state)
        return trainer
def test_get_mask():
    target_lengths = torch.LongTensor([1, 2, 3, 4])
    source_lengths = torch.LongTensor([4, 3, 2, 1])
    target_mask = [
        [0, 1, 0, 0, 0, 0],
        [0, 1, 1, 0, 0, 0],
        [0, 1, 1, 1, 0, 0],
        [0, 1, 1, 1, 1, 0],
    ]
    source_mask = [
        [0, 1, 1, 1, 1, 0],
        [0, 1, 1, 1, 0, 0],
        [0, 1, 1, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
    ]

    source_mask = torch.ByteTensor(source_mask)
    target_mask = torch.ByteTensor(target_mask)
    gap_mask = target_mask[:, 1:]
    target_tags_mask = target_mask[:, 1:-1]
    source_tags_mask = source_mask[:, 1:-1]
    source = torch.LongTensor(np.random.randint(4, 100, size=(4, 6)))
    target = torch.LongTensor(np.random.randint(4, 100, size=(4, 6)))
    source_tags = torch.LongTensor(np.random.randint(0, 2, size=(4, 4)))
    target_tags = torch.LongTensor(np.random.randint(0, 2, size=(4, 4)))
    gap_tags = torch.LongTensor(np.random.randint(0, 2, size=(4, 5)))

    source = source.masked_fill(1 - source_mask, const.PAD_ID)
    target = target.masked_fill(1 - target_mask, const.PAD_ID)
    target_tags = target_tags.masked_fill(
        1 - target_tags_mask, const.PAD_TAGS_ID
    )
    source_tags = source_tags.masked_fill(
        1 - source_tags_mask, const.PAD_TAGS_ID
    )
    gap_tags = gap_tags.masked_fill(1 - gap_mask, const.PAD_TAGS_ID)

    source[:, 0] = const.START_ID
    stop_mask = torch.arange(6).unsqueeze(0).expand_as(source) == (
        (source_lengths + 1).unsqueeze(1)
    )

    source = source.masked_fill(stop_mask, const.STOP_ID)
    target[:, 0] = const.START_ID
    stop_mask = torch.arange(6).unsqueeze(0).expand_as(target) == (
        (target_lengths + 1).unsqueeze(1)
    )
    target = target.masked_fill(stop_mask, const.STOP_ID)

    batch = SimpleNamespace(
        **{
            const.TARGET: target,
            const.SOURCE: source,
            const.TARGET_TAGS: target_tags,
            const.SOURCE_TAGS: source_tags,
            const.GAP_TAGS: gap_tags,
        }
    )

    vocab = Vocabulary(collections.Counter())
    vocab.stoi = {
        const.UNK: const.UNK_ID,
        const.PAD: const.PAD_ID,
        const.START: const.START_ID,
        const.STOP: const.STOP_ID,
    }
    tags_vocab = Vocabulary(collections.Counter())
    tags_vocab.stoi = {const.PAD: const.PAD_TAGS_ID}

    model = Model(
        vocabs={
            const.TARGET: vocab,
            const.SOURCE: vocab,
            const.TARGET_TAGS: tags_vocab,
            const.SOURCE_TAGS: tags_vocab,
            const.GAP_TAGS: tags_vocab,
        }
    )
    _source_mask = model.get_mask(batch, const.SOURCE)
    _target_mask = model.get_mask(batch, const.TARGET)
    _target_tags_mask = model.get_mask(batch, const.TARGET_TAGS)
    _source_tags_mask = model.get_mask(batch, const.SOURCE_TAGS)
    _gap_mask = model.get_mask(batch, const.GAP_TAGS)
    assert (_source_mask == source_mask).all()
    assert (_target_mask == target_mask).all()
    assert (_target_tags_mask == target_tags_mask).all()
    assert (_source_tags_mask == source_tags_mask).all()
    assert (_gap_mask == gap_mask).all()
Example #5
0
def retrieve_trainer(ModelClass, pipeline_options, model_options, vocabs,
                     output_dir, device_id):
    """
    Creates a Trainer object with an associated model.

    This object encapsulates the logic behind training the model and
    checkpointing. This method uses the received pipeline options to
    instantiate a Trainer object with the the requested model and
    hyperparameters.

    Args:
        ModelClass
        pipeline_options (Namespace): Generic training options
            resume (bool): Set to true if resuming an existing run.
            load_model (str): Directory containing model.torch for loading
                pre-created model.
            checkpoint_save (bool): Boolean indicating if snapshots should be
                saved after validation runs. warning: if false, will never save
                the model.
            checkpoint_keep_only_best (int): Indicates kiwi to keep the best
                `n` models.
            checkpoint_early_stop_patience (int): Stops training if metrics
                don't improve after `n` validation runs.
            checkpoint_validation_steps (int): Perform validation every `n`
                training steps.
            optimizer (string): The optimizer to be used in training.
            learning_rate (float): Starting learning rate.
            learning_rate_decay (float): Factor of learning rate decay.
            learning_rate_decay_start (int): Start decay after epoch `x`.
            log_interval (int): Log after `k` batches.
        model_options (Namespace): Model specific options.
        vocabs (dict): Vocab dictionary.
        output_dir (str or Path): Output directory for models and stats
            concerning training.
        device_id (int): The gpu id to be used in training. Set to negative
            to use cpu.
    Returns:
        Trainer

    """

    if pipeline_options.resume:
        return Trainer.resume(local_path=output_dir, device_id=device_id)

    if pipeline_options.load_model:
        model = Model.create_from_file(pipeline_options.load_model)
    else:
        model = ModelClass.from_options(vocabs=vocabs, opts=model_options)

    checkpointer = Checkpoint(
        output_dir,
        pipeline_options.checkpoint_save,
        pipeline_options.checkpoint_keep_only_best,
        pipeline_options.checkpoint_early_stop_patience,
        pipeline_options.checkpoint_validation_steps,
    )

    if isinstance(model, LinearWordQEClassifier):
        trainer = LinearWordQETrainer(
            model,
            model_options.training_algorithm,
            model_options.regularization_constant,
            checkpointer,
        )
    else:
        # Set GPU or CPU; has to be before instantiating the optimizer
        model.to(device_id)

        # Optimizer
        OptimizerClass = optimizer_class(pipeline_options.optimizer)
        optimizer = OptimizerClass(model.parameters(),
                                   lr=pipeline_options.learning_rate)
        scheduler = None
        if 0.0 < pipeline_options.learning_rate_decay < 1.0:
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                factor=pipeline_options.learning_rate_decay,
                patience=pipeline_options.learning_rate_decay_start,
                verbose=True,
                mode="max",
            )

        trainer = Trainer(
            model,
            optimizer,
            checkpointer,
            log_interval=pipeline_options.log_interval,
            scheduler=scheduler,
        )
    return trainer