Esempio n. 1
0
 def test_generate(self):
     lm = ecco.from_pretrained('sshleifer/tiny-gpt2',
                               activations=True,
                               verbose=False)
     output = lm.generate('test', generate=1)
     assert output.token_ids.shape == (1, 2), "Generated one token successfully"
     assert output.attribution['grad_x_input'][0] == 1, "Successfully got an attribution value"
     # Confirm activations is dimensions:
     # (batch 1, layer 2, h_dimension 8, position 1)
     assert output.activations.shape == (1, 2, 8, 1)
Esempio n. 2
0
    def test_nmf_explore_on_dummy_gpt(self):
        lm = ecco.from_pretrained('sshleifer/tiny-gpt2',
                                  activations=True,
                                  verbose=False)
        output = lm.generate('test', generate=1)
        nmf = output.run_nmf()
        exp = nmf.explore(printJson=True)

        assert len(exp['tokens']) == 2 # input & output tokens
        # 1 redundant dimension, 1 generation /factor, 2 tokens.
        assert np.array(exp['factors']).shape == (1, 1, 2)
Esempio n. 3
0
 def test_call_dummy_bert(self):
     lm = ecco.from_pretrained('julien-c/bert-xsmall-dummy',
                               activations=True,
                               verbose=False)
     inputs = lm.to(lm.tokenizer(['test', 'hi'],
                                 padding=True,
                                 truncation=True,
                                 return_tensors="pt",
                                 max_length=512))
     output = lm(inputs)
     # Confirm it's (batch 2, layer 1, h_dimension 40, position 3)
     # position is 3 because of [CLS] and [SEP]
     # If we do require padding, this CUDA compains with this model for some reason.
     assert output.activations.shape == (2, 1, 40, 3)
Esempio n. 4
0
    def test_nmf_explore_on_dummy_bert(self):
        lm = ecco.from_pretrained('julien-c/bert-xsmall-dummy',
                                  activations=True,
                                  verbose=False)
        inputs = lm.to(lm.tokenizer(['test', 'hi'],
                                    padding=True,
                                    truncation=True,
                                    return_tensors="pt",
                                    max_length=512))
        output = lm(inputs)
        nmf = output.run_nmf()
        exp = nmf.explore(printJson=True)

        assert len(exp['tokens']) == 3  # CLS UNK SEP
        # 1 redundant dimension,6 factors, 6 tokens (a batch of two examples, 3 tokens each)
        assert np.array(exp['factors']).shape == (1, 6, 6)
Esempio n. 5
0
    def test_init(self):
        lm = ecco.from_pretrained('sshleifer/tiny-gpt2', activations=True)

        assert isinstance(lm.model, PreTrainedModel), "Model downloaded and LM was initialized successfully."