예제 #1
0
def test_generator_predict_probs_output_shape():
    spec = GenerationSpec()
    spec.construct_model = lambda: Transformer(
        layers=2, pad_idx=0, words=80, seq_len=16, heads=2, dims=16, rate=4,
        dropout=0, bidirectional=False)

    # Create generator with simple specification.
    generator = Generator(
        spec=spec,
        config=GenerateConfig(seq_len=0, nucleus_prob=0.5, use_gpu=False))
    generator.initialize()

    probs1, past1 = generator._predict_probs(
        [np.random.randint(80) for _ in range(8)])
    probs2, past2 = generator._predict_probs(
        [generator._sample_from_top_p(probs1)], past1)

    assert probs1.shape == (80,)
    assert probs2.shape == (80,)

    for p in past1:
        assert p[0].shape == (8, 16)
        assert p[1].shape == (8, 16)
    for p in past2:
        assert p[0].shape == (9, 16)
        assert p[1].shape == (9, 16)
예제 #2
0
def test_transformer_output_shape():
    model = Transformer(layers=2,
                        pad_idx=0,
                        words=80,
                        seq_len=100,
                        heads=2,
                        dims=16,
                        rate=4,
                        bidirectional=False)

    x, past = model(torch.randint(80, (10, )))
    assert x.shape == (10, 80)
    for p in past:
        assert p[0].shape == (10, 16)
        assert p[1].shape == (10, 16)

    x, past = model(torch.randint(80, (2, 7, 5, 10)))
    assert x.shape == (2, 7, 5, 10, 80)
    for p in past:
        assert p[0].shape == (2, 7, 5, 10, 16)
        assert p[1].shape == (2, 7, 5, 10, 16)

    # Previously calculated attention keys and values would be concatenated to
    # the current ones.
    x, past = model(torch.randint(80, (2, 7, 5, 7)), past=past)
    assert x.shape == (2, 7, 5, 7, 80)
    for p in past:
        assert p[0].shape == (2, 7, 5, 17, 16)
        assert p[1].shape == (2, 7, 5, 17, 16)
예제 #3
0
 def construct_model(self) -> nn.Module:
     return Transformer(layers=self.layers,
                        pad_idx=self.vocab.pad_idx,
                        words=len(self.vocab),
                        seq_len=self.seq_len,
                        heads=self.heads,
                        dims=self.dims,
                        rate=self.rate,
                        dropout=0,
                        bidirectional=False)
예제 #4
0
def test_generator_generate():
    spec = GenerationSpec()
    spec.construct_model = lambda: Transformer(
        layers=2, pad_idx=0, words=80, seq_len=50, heads=2, dims=16, rate=4,
        dropout=0, bidirectional=False)
    spec.encode_context = lambda context: list(range(len(context.split())))
    spec.decode_tokens = lambda tokens: ' '.join(str(t) for t in tokens)

    # Create generator with simple specification.
    generator = Generator(
        spec=spec,
        config=GenerateConfig(seq_len=50, nucleus_prob=0.5, use_gpu=False))
    generator.initialize()

    for t in generator.generate('a b c d e').split():
        assert int(t) < 80
예제 #5
0
def test_transformer_generating_sequence():
    model = Transformer(layers=2,
                        pad_idx=0,
                        words=80,
                        seq_len=100,
                        heads=2,
                        dims=16,
                        rate=4,
                        bidirectional=False)

    past = None
    for _ in range(10):
        x, past = model(torch.randint(80, (1, )), past=past)

        # The output tensor is a distribution of next-word.
        assert x.shape == (1, 80)

    # All keys and values should be stacked.
    for p in past:
        assert p[0].shape == (10, 16)
        assert p[1].shape == (10, 16)