def create_and_check_reformer_model_fp16_generate(self, config, input_ids, input_mask):
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.half()
     model.eval()
     output = model.generate(input_ids, attention_mask=input_mask, do_sample=False)
     self.parent.assertFalse(torch.isnan(output).any().item())
Ejemplo n.º 2
0
 def create_and_check_reformer_model_fp16_generate(self, config, input_ids, input_mask, choice_labels):
     config.is_decoder = True
     config.lsh_num_chunks_after = 0
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.half()
     model.eval()
     # only use last 10 inputs for generation
     output = model.generate(input_ids[:, -10:], attention_mask=input_mask, do_sample=False)
     self.parent.assertFalse(torch.isnan(output).any().item())
def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file, pytorch_dump_path):
    # Initialise PyTorch model
    config = ReformerConfig.from_json_file(config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = ReformerModelWithLMHead(config)

    with open(trax_model_pkl_path, "rb") as f:
        model_weights = pickle.load(f)["weights"]

    set_model_weights_in_torch(model_weights, model, config.hidden_size)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
 def test_lm_model_forward(self):
     config = self._get_basic_config_and_input()
     config["attn_layers"] = ["local", "lsh", "local", "lsh", "local", "lsh"]
     config["num_buckets"] = [2, 4]
     config["is_decoder"] = False
     torch.manual_seed(0)
     model = ReformerModelWithLMHead(ReformerConfig(**config)).to(torch_device)
     model.eval()
     input_ids, attn_mask = self._get_input_ids_and_mask()
     hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0]
     output_slice = hidden_states[1, -1, :5]
     expected_output_slice = torch.tensor(
         [0.0324, -0.0121, 0.0615, 0.0031, -0.0297], dtype=torch.float, device=torch_device,
     )
     self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
def get_reformer(vocab_size=77, n_layer=12, n_embd=768, n_head=12,  n_positions=512, local_window_size=50,
                 num_buckets=None, num_hashes=1):
    attn_layers = ["local", "local", "lsh", "local", "local", "local", "lsh", "local",
                   "local", "local", "lsh", "local"]
    # attn_layers = ["local", "lsh", "local", "lsh", "local", "lsh", "local", "lsh", "local", "lsh", "local", "lsh"]
    config = ReformerConfig(
        hash_seed=None,
        attn_layers=attn_layers[:n_layer],
        # attention_head_size=128,
        hidden_size=n_embd,
        max_position_embeddings=350,
        feed_forward_size=3072,
        vocab_size=vocab_size,
        is_decoder=True,
        axial_pos_embds_dim=[256, 512],
        axial_pos_shape=[14, 25],
        num_hashes=num_hashes,
        num_buckets=num_buckets,
        local_attn_chunk_length=local_window_size,
        # num_buckets=num_buckets,
        lsh_attn_chunk_length=local_window_size,
        num_attention_heads=n_head,
        # lsh_attention_probs_dropout_prob=0.1,
        # local_attention_probs_dropout_prob=0.1,
        # hidden_dropout_prob=0.1,
        chunk_size_feed_forward=0,
        chunk_size_lm_head=0,
        eos_token_id=2,
        hidden_act='relu',
    )
    return ReformerModelWithLMHead(config=config)
def main():
    # let's use > 0.5M samples per sample
    padded_sequence_length = 2**19

    # reduce dataset to one example
    dataset = prepare_dataset(padded_sequence_length)

    # the non_padded_sequence_length defines the max shift for our data collator
    non_padded_sequence_length = padded_sequence_length - sum(
        dataset["attention_mask"][0])

    # use a special data collator that randomely shifts the input_ids
    data_collator = ReformerCollator(non_padded_sequence_length)

    # create reformer config and init model
    config = create_reformer_config()
    model = ReformerModelWithLMHead(config)

    # create training params
    training_args = get_training_args()

    # create the trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        compute_metrics=compute_metrics,
        data_collator=data_collator,
        train_dataset=dataset,
        eval_dataset=dataset,
    )

    # train
    trainer.train()
Ejemplo n.º 7
0
    def test_pretrained_generate_use_cache_equality(self):
        model = ReformerModelWithLMHead.from_pretrained("google/reformer-crime-and-punishment").to(torch_device)
        tokenizer = ReformerTokenizer.from_pretrained("google/reformer-crime-and-punishment")
        model.eval()
        input_ids = tokenizer.encode("A few months later", return_tensors="pt").to(torch_device)
        output_ids_with_cache = model.generate(input_ids, max_length=130, num_hashes=8, use_cache=False)
        output_ids_without_cache = model.generate(input_ids, max_length=130, num_hashes=8, use_cache=True)

        output_with_cache = tokenizer.decode(output_ids_with_cache[0])
        output_without_cache = tokenizer.decode(output_ids_without_cache[0])

        self.assertEqual(output_with_cache, output_without_cache)
    def test_lsh_lm_model_grad(self):
        config = self._get_basic_config_and_input()
        config["attn_layers"] = ["lsh", "lsh", "lsh", "lsh"]
        config["hidden_dropout_prob"] = 0.0
        config["lsh_attention_probs_dropout_prob"] = 0.0
        config["num_buckets"] = [2, 4]
        config["num_hashes"] = 6
        torch.manual_seed(0)
        model = ReformerModelWithLMHead(ReformerConfig(**config)).to(torch_device)
        model.train()
        model.zero_grad()
        input_ids, _ = self._get_input_ids_and_mask()
        loss = model(input_ids=input_ids, labels=input_ids)[0]

        self.assertTrue(torch.allclose(loss, torch.tensor(5.7819, dtype=torch.float, device=torch_device), atol=1e-3))
        loss.backward()
        # check last grads to cover all proable errors
        grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
        expected_grad_slice_word = torch.tensor(
            [2.6357e-05, 4.3358e-04, -8.4985e-04, 1.0094e-04, 3.8954e-04], dtype=torch.float, device=torch_device,
        )
        grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
        expected_grad_slice_pos_fac_1 = torch.tensor(
            [-0.0984, 0.6283, 0.4282, 1.2960, 0.6897], dtype=torch.float, device=torch_device,
        )
        grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]
        expected_grad_slice_pos_fac_2 = torch.tensor(
            [0.4626, -0.0231, -0.0172, 0.1081, 0.3805], dtype=torch.float, device=torch_device,
        )
        self.assertTrue(torch.allclose(grad_slice_word, expected_grad_slice_word, atol=1e-3))
        self.assertTrue(torch.allclose(grad_slice_position_factor_1, expected_grad_slice_pos_fac_1, atol=1e-3))
        self.assertTrue(torch.allclose(grad_slice_position_factor_2, expected_grad_slice_pos_fac_2, atol=1e-3))
Ejemplo n.º 9
0
    def create_and_check_past_buckets_states(self, config, input_ids, input_mask, choice_labels):
        config.is_decoder = True
        config.lsh_num_chunks_before = 1
        config.lsh_num_chunks_after = 0
        model = ReformerModelWithLMHead(config=config)
        model.to(torch_device)
        model.eval()
        input_ids_first = input_ids[:, :-1]
        input_ids_second = input_ids[:, -1:]

        # return saved cache
        past_buckets_states = model(input_ids_first, use_cache=True)["past_buckets_states"]

        # calculate last output with and without cache
        outputs_with_cache = model(input_ids_second, past_buckets_states=past_buckets_states, use_cache=True)["logits"]
        outputs_without_cache = model(input_ids)["logits"][:, -1]

        # select random slice idx
        random_slice_idx = torch.randint(outputs_without_cache.shape[-1], (1, 1), device=torch_device).item()

        # outputs should be similar within range
        self.parent.assertTrue(
            torch.allclose(
                outputs_with_cache[:, 0, random_slice_idx], outputs_without_cache[:, random_slice_idx], atol=1e-2
            )
        )
Ejemplo n.º 10
0
 def create_and_check_reformer_model_with_lm_backward(
         self, config, input_ids, input_mask, choice_labels):
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.eval()
     loss = model(input_ids, attention_mask=input_mask, labels=input_ids)[0]
     loss.backward()
    def test_local_lm_model_grad(self):
        config = self._get_basic_config_and_input()
        config["attn_layers"] = ["local", "local", "local", "local"]
        config["hidden_dropout_prob"] = 0.0
        config["local_attention_probs_dropout_prob"] = 0.0
        torch.manual_seed(0)
        model = ReformerModelWithLMHead(ReformerConfig(**config)).to(torch_device)
        model.train()
        model.zero_grad()
        input_ids, _ = self._get_input_ids_and_mask()
        loss = model(input_ids=input_ids, labels=input_ids)[0]

        self.assertTrue(torch.allclose(loss, torch.tensor(5.7786, dtype=torch.float, device=torch_device), atol=1e-3))
        loss.backward()

        # check last grads to cover all proable errors
        grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
        expected_grad_slice_word = torch.tensor(
            [-0.0005, 0.0001, 0.0002, 0.0003, 0.0006], dtype=torch.float, device=torch_device,
        )
        grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
        expected_grad_slice_pos_fac_1 = torch.tensor(
            [0.0037, -1.3793, -1.0231, -1.5230, -2.5306], dtype=torch.float, device=torch_device,
        )
        grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]
        expected_grad_slice_pos_fac_2 = torch.tensor(
            [-1.3165, 0.5168, 0.7785, 1.0811, -0.9830], dtype=torch.float, device=torch_device,
        )
        self.assertTrue(torch.allclose(grad_slice_word, expected_grad_slice_word, atol=1e-3))
        self.assertTrue(torch.allclose(grad_slice_position_factor_1, expected_grad_slice_pos_fac_1, atol=1e-3))
        self.assertTrue(torch.allclose(grad_slice_position_factor_2, expected_grad_slice_pos_fac_2, atol=1e-3))
Ejemplo n.º 12
0
 def create_and_check_reformer_with_lm(self, config, input_ids, input_mask, choice_labels):
     config.lsh_num_chunks_after = 0
     config.is_decoder = True
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.eval()
     result = model(input_ids, attention_mask=input_mask, labels=input_ids)
     self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
Ejemplo n.º 13
0
    def create_and_check_reformer_model_generate(self, config, input_ids, input_mask, choice_labels):
        config.is_decoder = True
        config.lsh_num_chunks_after = 0
        config.bos_token_id = 0
        config.eos_token_id = None
        config.max_length = 20

        model = ReformerModelWithLMHead(config=config)
        model.to(torch_device)
        model.eval()
        output = model.generate()
        self.parent.assertIsNotNone(output)
    def test_pretrained_generate_crime_and_punish(self):
        model = ReformerModelWithLMHead.from_pretrained("google/reformer-crime-and-punishment").to(torch_device)
        tokenizer = ReformerTokenizer.from_pretrained("google/reformer-crime-and-punishment")
        model.eval()

        input_ids = tokenizer.encode("A few months later", return_tensors="pt").to(torch_device)
        output_ids = model.generate(
            input_ids, max_length=50, num_beams=4, early_stopping=True, do_sample=False, num_hashes=8
        )
        output_text = tokenizer.decode(output_ids[0])
        self.assertEqual(
            output_text,
            "A few months later state expression in his ideas, at the first entrance. He was positively for an inst",
        )
Ejemplo n.º 15
0
 def create_and_check_reformer_no_chunking(self, config, input_ids, input_mask, choice_labels):
     # force chunk length to be bigger than input_ids
     config.lsh_attn_chunk_length = 2 * input_ids.shape[-1]
     config.local_attn_chunk_length = 2 * input_ids.shape[-1]
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.eval()
     output_logits = model(input_ids, attention_mask=input_mask)[0]
     self.parent.assertTrue(output_logits.shape[1] == input_ids.shape[-1])
Ejemplo n.º 16
0
 def create_and_check_reformer_with_lm(self, config, input_ids, input_mask, choice_labels):
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.eval()
     loss, prediction_scores = model(input_ids, attention_mask=input_mask, labels=input_ids)
     result = {
         "loss": loss,
         "prediction_scores": prediction_scores,
     }
     self.parent.assertListEqual(
         list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size],
     )
     self.check_loss_output(result)
Ejemplo n.º 17
0
    def __init__(self,
                 text_processor: TextProcessor,
                 config: ReformerConfig = None,
                 size: int = 1):
        """
        :param size: config size: 1 small, 2 medium, 3 base.
        """
        super(ReformerLM, self).__init__()
        self.text_processor: TextProcessor = text_processor

        if config is not None:
            self.config = config
        else:
            config_func = _small_config if size == 1 else (
                _base_config if size == 3 else _medium_config)
            self.config = config_func(
                vocab_size=text_processor.tokenizer.get_vocab_size(),
                pad_token_id=text_processor.pad_token_id(),
                eos_token_id=text_processor.sep_token_id())
            self.config = ReformerConfig(**self.config)

        reformer = ReformerModelWithLMHead(self.config)
        self.lm_head: ReformerOnlyLMHead = reformer.lm_head
        self.encoder: ReformerModel = reformer.reformer
Ejemplo n.º 18
0
 def test_model_from_pretrained(self):
     for model_name in REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
         model = ReformerModelWithLMHead.from_pretrained(model_name)
         self.assertIsNotNone(model)
Ejemplo n.º 19
0
    def create_and_check_reformer_feed_backward_chunking(
            self, config, input_ids, input_mask, choice_labels):
        if not self.is_training:
            return

        # disable dropout
        config.hidden_dropout_prob = 0
        config.local_attention_probs_dropout_prob = 0
        config.lsh_attention_probs_dropout_prob = 0

        torch.manual_seed(0)
        model = ReformerModelWithLMHead(config=config)
        model.to(torch_device)
        model.train()
        model.zero_grad()
        loss_no_chunk, output_no_chunk = model(input_ids,
                                               labels=input_ids,
                                               attention_mask=input_mask)[:2]
        loss_no_chunk.backward()
        grad_slice_word_no_chunk = model.reformer.embeddings.word_embeddings.weight.grad[
            0, :5]
        grad_slice_position_factor_1_no_chunk = model.reformer.embeddings.position_embeddings.weights[
            0][1, 0, -5:]
        grad_slice_position_factor_2_no_chunk = model.reformer.embeddings.position_embeddings.weights[
            1][0, 1, :5]

        config.chunk_size_lm_head = 1
        config.chunk_size_feed_forward = 1

        torch.manual_seed(0)
        model = ReformerModelWithLMHead(config=config)
        model.to(torch_device)
        model.train()
        model.zero_grad()
        loss_chunk, output_chunk = model(input_ids,
                                         labels=input_ids,
                                         attention_mask=input_mask)[:2]
        loss_chunk.backward()
        grad_slice_word_chunk = model.reformer.embeddings.word_embeddings.weight.grad[
            0, :5]
        grad_slice_position_factor_1_chunk = model.reformer.embeddings.position_embeddings.weights[
            0][1, 0, -5:]
        grad_slice_position_factor_2_chunk = model.reformer.embeddings.position_embeddings.weights[
            1][0, 1, :5]
        self.parent.assertTrue(
            torch.allclose(loss_chunk, loss_no_chunk, atol=1e-3))
        self.parent.assertTrue(
            torch.allclose(grad_slice_word_no_chunk,
                           grad_slice_word_chunk,
                           atol=1e-3))
        self.parent.assertTrue(
            torch.allclose(grad_slice_position_factor_1_chunk,
                           grad_slice_position_factor_1_no_chunk,
                           atol=1e-3))
        self.parent.assertTrue(
            torch.allclose(grad_slice_position_factor_2_chunk,
                           grad_slice_position_factor_2_no_chunk,
                           atol=1e-3))
Ejemplo n.º 20
0
    # Decoding
    def decode(outputs_ids):
        decoded_outputs = []
        o = outputs_ids.tolist() if torch.is_tensor(
            outputs_ids) else outputs_ids
        for output_ids in o:
            # transform id back to char IDs < 2 are simply transformed to ""
            decoded_outputs.append("".join(
                [chr(x - 2) if x > 1 else "" for x in output_ids]))
        return decoded_outputs

    from transformers import ReformerModelWithLMHead, ReformerForMaskedLM
    # transformers.ReformerModel - raw hidden states
    # ReformerForMaskedLM - UGH THIS IS WHAT I WANT
    # ReformerModelWithLMHead - next token prediction ONLY
    model = ReformerModelWithLMHead.from_pretrained("google/reformer-enwik8")
    encoded, attention_masks = encode(
        ["In 1965, Brooks left IBM to found the Department of"])
    x = model.generate(encoded, do_sample=True, max_length=150)
    d = decode(x)

    input_ids, attention_masks = encode(
        ["In 1965, Brooks left IBM to found the Department of"])
    #i,a = input_ids.to("cuda"), attention_masks.to("cuda")

    sentence = "The quick brown fox jumps over the lazy dog."
    input_ids, attention_masks = encode([sentence])
    attention_masks[0, 37] = attention_masks[0, 19] = attention_masks[0,
                                                                      27] = 0
    i, a = input_ids, attention_masks
    f = model.forward(input_ids=i,
Ejemplo n.º 21
0
from pydantic import BaseModel, Field
from transformers import ReformerModelWithLMHead, ReformerTokenizer

tokenizer = ReformerTokenizer.from_pretrained('google/reformer-crime-and-punishment')
model = ReformerModelWithLMHead.from_pretrained('google/reformer-crime-and-punishment')


##
# GPT-2 generator.
# Make java code!.
def mk_crime_punish(text, length, how_many, top_p, top_k, do_sample):
    try:
        input_ids = tokenizer.encode(text, return_tensors='pt')

        min_length = len(input_ids.tolist()[0])
        length += min_length

        length = length if length > 0 else 1
        top_k = top_k if top_k > 0 else 10
        top_p = top_p if top_p > 0 else 0.5

        # model generating
        sample_outputs = model.generate(input_ids, pad_token_id=50256,
                                        do_sample=do_sample,
                                        max_length=length,
                                        top_p=top_p,
                                        top_k=top_k,
                                        num_return_sequences=how_many)

        result = dict()
Ejemplo n.º 22
0

train_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_train.txt",
                                                  tokenizer)
val_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_val.txt",
                                                tokenizer)
train_loader = cycle(DataLoader(train_dataset, batch_size=BATCH_SIZE))
val_loader = cycle(DataLoader(val_dataset, batch_size=BATCH_SIZE))

# configuration = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment")
# configuration.axial_pos_shape = (64, 72)
# configuration.max_position_embeddings=SEQ_LEN
# configuration.vocab_size=tokenizer.vocab_size
# configuration.save_pretrained('model/config/')
configuration = ReformerConfig.from_pretrained('model/config/')
model = ReformerModelWithLMHead(configuration)
model.cuda()

NUM_BATCHES = len(train_dataset) // BATCH_SIZE

from transformers import AdamW
optimizer = AdamW(params=model.parameters(), lr=LEARNING_RATE)

from collections import OrderedDict
import json

all_training_loss = OrderedDict()
all_val_loss = OrderedDict()

for x in range(1):
    print(f"epoch {x}")