def test_generator_predict_probs_output_shape(): spec = GenerationSpec() spec.construct_model = lambda: Transformer( layers=2, pad_idx=0, words=80, seq_len=16, heads=2, dims=16, rate=4, dropout=0, bidirectional=False) # Create generator with simple specification. generator = Generator( spec=spec, config=GenerateConfig(seq_len=0, nucleus_prob=0.5, use_gpu=False)) generator.initialize() probs1, past1 = generator._predict_probs( [np.random.randint(80) for _ in range(8)]) probs2, past2 = generator._predict_probs( [generator._sample_from_top_p(probs1)], past1) assert probs1.shape == (80,) assert probs2.shape == (80,) for p in past1: assert p[0].shape == (8, 16) assert p[1].shape == (8, 16) for p in past2: assert p[0].shape == (9, 16) assert p[1].shape == (9, 16)
def test_transformer_output_shape(): model = Transformer(layers=2, pad_idx=0, words=80, seq_len=100, heads=2, dims=16, rate=4, bidirectional=False) x, past = model(torch.randint(80, (10, ))) assert x.shape == (10, 80) for p in past: assert p[0].shape == (10, 16) assert p[1].shape == (10, 16) x, past = model(torch.randint(80, (2, 7, 5, 10))) assert x.shape == (2, 7, 5, 10, 80) for p in past: assert p[0].shape == (2, 7, 5, 10, 16) assert p[1].shape == (2, 7, 5, 10, 16) # Previously calculated attention keys and values would be concatenated to # the current ones. x, past = model(torch.randint(80, (2, 7, 5, 7)), past=past) assert x.shape == (2, 7, 5, 7, 80) for p in past: assert p[0].shape == (2, 7, 5, 17, 16) assert p[1].shape == (2, 7, 5, 17, 16)
def construct_model(self) -> nn.Module: return Transformer(layers=self.layers, pad_idx=self.vocab.pad_idx, words=len(self.vocab), seq_len=self.seq_len, heads=self.heads, dims=self.dims, rate=self.rate, dropout=0, bidirectional=False)
def test_generator_generate(): spec = GenerationSpec() spec.construct_model = lambda: Transformer( layers=2, pad_idx=0, words=80, seq_len=50, heads=2, dims=16, rate=4, dropout=0, bidirectional=False) spec.encode_context = lambda context: list(range(len(context.split()))) spec.decode_tokens = lambda tokens: ' '.join(str(t) for t in tokens) # Create generator with simple specification. generator = Generator( spec=spec, config=GenerateConfig(seq_len=50, nucleus_prob=0.5, use_gpu=False)) generator.initialize() for t in generator.generate('a b c d e').split(): assert int(t) < 80
def test_transformer_generating_sequence(): model = Transformer(layers=2, pad_idx=0, words=80, seq_len=100, heads=2, dims=16, rate=4, bidirectional=False) past = None for _ in range(10): x, past = model(torch.randint(80, (1, )), past=past) # The output tensor is a distribution of next-word. assert x.shape == (1, 80) # All keys and values should be stacked. for p in past: assert p[0].shape == (10, 16) assert p[1].shape == (10, 16)