Beispiel #1
0
def test_sequence_categorical_crossentropy(guesses, labels):
    d_scores = SequenceCategoricalCrossentropy(normalize=False).get_grad(
        guesses, labels)
    d_scores1 = d_scores[0]
    d_scores2 = d_scores[1]
    assert d_scores1.shape == guesses1.shape
    assert d_scores2.shape == guesses2.shape
    assert d_scores1[1][0] == pytest.approx(0.4, eps)
    assert d_scores1[1][1] == pytest.approx(-0.4, eps)
    # The normalization divides the difference (e.g. 0.4) by the number of seqs
    d_scores = SequenceCategoricalCrossentropy(normalize=True).get_grad(
        guesses, labels)
    d_scores1 = d_scores[0]
    d_scores2 = d_scores[1]

    assert d_scores1[1][0] == pytest.approx(0.2, eps)
    assert d_scores1[1][1] == pytest.approx(-0.2, eps)

    # The third vector predicted all labels, but only the first one was correct
    assert d_scores1[2][0] == pytest.approx(0, eps)
    assert d_scores1[2][1] == pytest.approx(0.5, eps)
    assert d_scores1[2][2] == pytest.approx(0.5, eps)

    # The fourth vector predicted no labels but should have predicted the last one
    assert d_scores1[3][0] == pytest.approx(0, eps)
    assert d_scores1[3][1] == pytest.approx(0, eps)
    assert d_scores1[3][2] == pytest.approx(-0.5, eps)

    # Test the second batch
    assert d_scores2[0][0] == pytest.approx(0.1, eps)
    assert d_scores2[0][1] == pytest.approx(-0.35, eps)

    loss = SequenceCategoricalCrossentropy(normalize=True).get_loss(
        guesses, labels)
    assert loss == pytest.approx(1.09, eps)
Beispiel #2
0
def test_loss():
    d_scores = CategoricalCrossentropy().get_grad(scores0, labels0)
    assert d_scores.dtype == "float32"
    assert d_scores.shape == scores0.shape
    d_scores = SequenceCategoricalCrossentropy().get_grad([scores0], [labels0])
    assert d_scores[0].dtype == "float32"
    assert d_scores[0].shape == scores0.shape
    assert SequenceCategoricalCrossentropy().get_grad([], []) == []
Beispiel #3
0
def main(path: Optional[Path] = None, out_dir: Optional[Path] = None):
    if prefer_gpu():
        print("Using gpu!")
        use_pytorch_for_gpu_memory()
    # You can edit the CONFIG string within the file, or copy it out to
    # a separate file and pass in the path.
    if path is None:
        config = Config().from_str(CONFIG)
    else:
        config = Config().from_disk(path)
    # make_from_config constructs objects whenever you have blocks with an @ key.
    # In the optimizer block we write @optimizers = "Adam.v1". This tells Thinc
    # to use registry.optimizers to fetch the "Adam.v1" function. You can
    # register your own functions as well and build up trees of objects.
    C = thinc.registry.make_from_config(config)

    words_per_subbatch = C["training"]["words_per_subbatch"]
    n_epoch = C["training"]["n_epoch"]
    batch_size = C["training"]["batch_size"]
    model = C["model"]
    optimizer = C["optimizer"]
    calculate_loss = SequenceCategoricalCrossentropy()

    (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.ud_ancora_pos_tags()
    # Convert the outputs to cupy (if we're using that)
    train_Y = list(map(model.ops.asarray, train_Y))
    dev_Y = list(map(model.ops.asarray, dev_Y))
    # Pass in a small batch of data, to fill in missing shapes
    model.initialize(X=train_X[:5], Y=train_Y[:5])
    for epoch in range(n_epoch):
        # Transformers often learn best with large batch sizes -- larger than
        # fits in GPU memory. But you don't have to backprop the whole batch
        # at once. Here we consider the "logical" batch size (number of examples
        # per update) separately from the physical batch size.
        batches = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True)
        for outer_batch in tqdm.tqdm(batches, leave=False):
            # For the physical batch size, what we care about is the number
            # of words (considering padding too). We also want to sort by
            # length, for efficiency.
            for batch in minibatch_by_words(outer_batch, words_per_subbatch):
                inputs, truths = zip(*batch)
                guesses, backprop = model(inputs, is_train=True)
                backprop(calculate_loss.get_grad(guesses, truths))
            # At the end of the batch, we call the optimizer with the accumulated
            # gradients, and advance the learning rate schedules.
            model.finish_update(optimizer)
            optimizer.step_schedules()
        # You might want to evaluate more often than once per epoch; that's up
        # to you.
        score = evaluate_sequences(model, dev_X, dev_Y, 128)
        print(epoch, f"{score:.3f}")
        if out_dir:
            model.to_disk(out_dir / f"{epoch}.bin")
Beispiel #4
0
    def get_loss(self, examples: Iterable[Example],
                 scores: List[Floats2d]) -> Tuple[float, List[Floats2d]]:
        validate_examples(examples, "EditTreeLemmatizer.get_loss")
        loss_func = SequenceCategoricalCrossentropy(normalize=False,
                                                    missing_value=-1)

        truths = []
        for eg in examples:
            eg_truths = []
            for (predicted,
                 gold_lemma) in zip(eg.predicted,
                                    eg.get_aligned("LEMMA", as_string=True)):
                if gold_lemma is None:
                    label = -1
                else:
                    tree_id = self.trees.add(predicted.text, gold_lemma)
                    label = self.tree2label.get(tree_id, 0)
                eg_truths.append(label)

            truths.append(eg_truths)

        d_scores, loss = loss_func(scores, truths)
        if self.model.ops.xp.isnan(loss):
            raise ValueError(Errors.E910.format(name=self.name))

        return float(loss), d_scores
Beispiel #5
0
def test_sequence_categorical_missing_negative(guesses, labels, names):
    d_scores = SequenceCategoricalCrossentropy(normalize=False,
                                               names=names,
                                               neg_prefix="!",
                                               missing_value="").get_grad(
                                                   guesses, labels)
    d_scores0 = d_scores[0]

    # [0.1, 0.5, 0.6] should be A
    assert d_scores0[0][0] == pytest.approx(-0.9, eps)
    assert d_scores0[0][1] == pytest.approx(0.5, eps)
    assert d_scores0[0][2] == pytest.approx(0.6, eps)

    # [0.4, 0.6, 0.3] should NOT be A
    assert d_scores0[1][0] == pytest.approx(0.4, eps)
    assert d_scores0[1][1] == pytest.approx(0.0, eps)
    assert d_scores0[1][2] == pytest.approx(0.0, eps)

    # [1, 1, 1] has missing gold label
    assert d_scores0[2][0] == pytest.approx(0.0, eps)
    assert d_scores0[2][1] == pytest.approx(0.0, eps)
    assert d_scores0[2][2] == pytest.approx(0.0, eps)

    # [0.0, 0.0, 0.0] should NOT be C
    assert d_scores0[3][0] == pytest.approx(0.0, eps)
    assert d_scores0[3][1] == pytest.approx(0.0, eps)
    assert d_scores0[3][2] == pytest.approx(0.0, eps)