Example #1
0
    def masked_mlm():
        from transformers import ReformerConfig, ReformerForMaskedLM
        config = ReformerConfig.from_pretrained('google/reformer-enwik8')
        config.is_decoder = False
        model = ReformerForMaskedLM.from_pretrained('google/reformer-enwik8',
                                                    config=config)

        sentence = sentence2 = "The quick brown fox jumps over the lazy dog."
        input_ids, attention_masks = encode([sentence])
        if True:
            _input_ids, a = input_ids.clone(), attention_masks.clone()
            for i in [19, 27, 37]:
                a[0, i] = 0
                sentence2 = sentence2[:i] + "%" + sentence2[i + 1:]
        else:
            _input_ids, a = input_ids, attention_masks
        f = model.forward(input_ids=_input_ids,
                          position_ids=None,
                          attention_mask=a,
                          head_mask=None,
                          inputs_embeds=None,
                          num_hashes=None,
                          labels=_input_ids)
        prediction = decode(torch.argmax(f.logits, 2))[0]
        print(sentence2)
        print(prediction)
Example #2
0
    def __init__(self,
                 n_labels,
                 hidden_size,
                 dropout=0.2,
                 label_ignore_idx=0,
                 max_seq_length=128,
                 batch_size=32,
                 head_init_range=0.04,
                 device='cuda',
                 vocab_size=320):
        super().__init__()
        self.n_labels = n_labels

        self.linear_1 = nn.Linear(hidden_size, hidden_size)
        self.classification_head = nn.Linear(hidden_size, n_labels)
        self.label_ignore_idx = label_ignore_idx
        self.tokenizer = ReformerTokenizer.from_pretrained(
            'google/reformer-crime-and-punishment')
        config = ReformerConfig(
            axial_pos_shape=[batch_size,
                             int(max_seq_length / batch_size)])
        self.model = ReformerModel(config)
        self.dropout = nn.Dropout(dropout)

        self.device = device

        # initializing classification head
        self.classification_head.weight.data.normal_(mean=0.0,
                                                     std=head_init_range)
def get_reformer(vocab_size=77, n_layer=12, n_embd=768, n_head=12,  n_positions=512, local_window_size=50,
                 num_buckets=None, num_hashes=1):
    attn_layers = ["local", "local", "lsh", "local", "local", "local", "lsh", "local",
                   "local", "local", "lsh", "local"]
    # attn_layers = ["local", "lsh", "local", "lsh", "local", "lsh", "local", "lsh", "local", "lsh", "local", "lsh"]
    config = ReformerConfig(
        hash_seed=None,
        attn_layers=attn_layers[:n_layer],
        # attention_head_size=128,
        hidden_size=n_embd,
        max_position_embeddings=350,
        feed_forward_size=3072,
        vocab_size=vocab_size,
        is_decoder=True,
        axial_pos_embds_dim=[256, 512],
        axial_pos_shape=[14, 25],
        num_hashes=num_hashes,
        num_buckets=num_buckets,
        local_attn_chunk_length=local_window_size,
        # num_buckets=num_buckets,
        lsh_attn_chunk_length=local_window_size,
        num_attention_heads=n_head,
        # lsh_attention_probs_dropout_prob=0.1,
        # local_attention_probs_dropout_prob=0.1,
        # hidden_dropout_prob=0.1,
        chunk_size_feed_forward=0,
        chunk_size_lm_head=0,
        eos_token_id=2,
        hidden_act='relu',
    )
    return ReformerModelWithLMHead(config=config)
def create_reformer_config():
    # define config of reformer model
    return ReformerConfig(
        **{
            "attention_head_size": 64,
            "attn_layers": ["local", "lsh", "local", "lsh", "local", "lsh"],
            "axial_pos_embds": True,
            "sinusoidal_pos_embds": False,
            "axial_pos_embds_dim": [64, 192],
            "axial_pos_shape": [512, 1024],
            "lsh_attn_chunk_length": 64,
            "local_attn_chunk_length": 64,
            "feed_forward_size": 512,
            "hidden_act": "relu",
            "hidden_size": 256,
            "is_decoder": True,
            "max_position_embeddings": 524288,
            "num_attention_heads": 2,
            "num_buckets": [64, 128],
            "num_hashes": 1,
            "vocab_size": 320,
            "lsh_attention_probs_dropout_prob": 0.0,
            "lsh_num_chunks_before": 1,
            "lsh_num_chunks_after": 0,
            "local_num_chunks_before": 1,
            "local_num_chunks_after": 0,
            "local_attention_probs_dropout_prob": 0.05,
            "hidden_dropout_prob": 0.05,
            "seed":
            None  # that parameter is only needed for testing and will be removed soon
        })
 def load(out_dir: str):
     text_processor = TextProcessor(tok_model_path=out_dir)
     with open(os.path.join(out_dir, "config"), "rb") as fp:
         config = pickle.load(fp)
         if isinstance(config, dict):
             # For older configs
             config = ReformerConfig(**config)
         lm = ReformerLM(text_processor=text_processor, config=config)
         lm.load_state_dict(
             torch.load(os.path.join(out_dir, "model.state_dict")))
         return lm
 def test_local_layer_forward_complex(self):
     config = self._get_basic_config_and_input()
     config["attn_layers"] = ["local"]
     attn_mask = self._get_attn_mask()
     hidden_states = self._get_hidden_states()
     torch.manual_seed(0)
     layer = ReformerLayer(ReformerConfig(**config)).to(torch_device)
     layer.eval()
     reformer_output = layer(prev_attn_output=hidden_states, hidden_states=hidden_states, attention_mask=attn_mask,)
     output_slice = reformer_output.hidden_states[0, 0, :5]
     expected_output_slice = torch.tensor(
         [1.5476, -1.9020, -0.9902, 1.5013, -0.1950], dtype=torch.float, device=torch_device,
     )
     self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
 def test_local_layer_forward(self):
     config = self._get_basic_config_and_input()
     config["attn_layers"] = ["local"]
     config["is_decoder"] = False
     hidden_states = self._get_hidden_states()
     torch.manual_seed(0)
     layer = ReformerLayer(ReformerConfig(**config)).to(torch_device)
     layer.eval()
     reformer_output = layer(prev_attn_output=hidden_states, hidden_states=hidden_states)
     output_slice = reformer_output.hidden_states[0, 0, :5]
     expected_output_slice = torch.tensor(
         [1.4212, -2.0576, -0.9688, 1.4599, -0.1344], dtype=torch.float, device=torch_device,
     )
     self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
 def test_lsh_layer_forward(self):
     config = self._get_basic_config_and_input()
     config["attn_layers"] = ["lsh"]
     config["is_decoder"] = False
     hidden_states = self._get_hidden_states()
     torch.manual_seed(0)
     layer = ReformerLayer(ReformerConfig(**config)).to(torch_device)
     layer.eval()
     reformer_output = layer(prev_attn_output=hidden_states.clone(), hidden_states=hidden_states)
     output_slice = reformer_output.hidden_states[0, 0, :5]
     expected_output_slice = torch.tensor(
         [1.6879, -1.3083, -0.4708, 1.3555, -0.6292], dtype=torch.float, device=torch_device,
     )
     self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file, pytorch_dump_path):
    # Initialise PyTorch model
    config = ReformerConfig.from_json_file(config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = ReformerModelWithLMHead(config)

    with open(trax_model_pkl_path, "rb") as f:
        model_weights = pickle.load(f)["weights"]

    set_model_weights_in_torch(model_weights, model, config.hidden_size)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
 def test_lsh_layer_forward_complex(self):
     config = self._get_basic_config_and_input()
     config["attn_layers"] = ["lsh"]
     config["num_buckets"] = [2, 4]
     attn_mask = self._get_attn_mask()
     hidden_states = self._get_hidden_states()
     torch.manual_seed(0)
     layer = ReformerLayer(ReformerConfig(**config)).to(torch_device)
     layer.eval()
     reformer_output = layer(
         prev_attn_output=hidden_states.clone(), hidden_states=hidden_states, attention_mask=attn_mask,
     )
     output_slice = reformer_output.hidden_states[0, 0, :5]
     expected_output_slice = torch.tensor(
         [1.6439, -1.2306, -0.5108, 1.3006, -0.6537], dtype=torch.float, device=torch_device,
     )
     self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
    def __init__(self,
                 text_processor: TextProcessor,
                 config: ReformerConfig = None,
                 size: int = 1):
        """
        :param size: config size: 1 small, 2 medium, 3 base.
        """
        super(ReformerLM, self).__init__()
        self.text_processor: TextProcessor = text_processor

        if config is not None:
            self.config = config
        else:
            config_func = _small_config if size == 1 else (
                _base_config if size == 3 else _medium_config)
            self.config = config_func(
                vocab_size=text_processor.tokenizer.get_vocab_size(),
                pad_token_id=text_processor.pad_token_id(),
                eos_token_id=text_processor.sep_token_id())
            self.config = ReformerConfig(**self.config)

        reformer = ReformerModelWithLMHead(self.config)
        self.lm_head: ReformerOnlyLMHead = reformer.lm_head
        self.encoder: ReformerModel = reformer.reformer
def _compute_pytorch(
    model_names,
    batch_sizes,
    slice_sizes,
    dictionary,
    average_over,
    device,
    torchscript,
    fp16,
    no_speed,
    no_memory,
    verbose,
    num_hashes
):

    hidden_size = 64
    num_attention_heads = 2
    intermediate_size = 128

    chunk_length = 64
    num_hashes = num_hashes

    hidden_states = floats_tensor((1, 2 ** 16, hidden_size))

    for c, model_name in enumerate(model_names):
        print(f"{c + 1} / {len(model_names)}")

        dictionary[model_name] = {
            "bs": batch_sizes,
            "ss": slice_sizes,
            "results": {},
            "memory": {},
        }
        dictionary[model_name]["results"] = {i: {} for i in batch_sizes}
        dictionary[model_name]["memory"] = {i: {} for i in batch_sizes}

        for batch_size in batch_sizes:

            for slice_size in slice_sizes:

                num_buckets = int(2 * slice_size / chunk_length)
                if num_buckets > chunk_length:
                    factorized_num_buckets = num_buckets // 32
                    num_buckets = [32, factorized_num_buckets]

                bert_config = BertConfig(
                    hidden_size=hidden_size,
                    num_attention_heads=num_attention_heads,
                    intermediate_size=intermediate_size,
                    hidden_dropout_prob=0.0,
                    attention_probs_dropout_prob=0.0,
                )

                reformer_config = ReformerConfig(
                    hidden_size=hidden_size,
                    num_attention_heads=num_attention_heads,
                    intermediate_size=intermediate_size,
                    chunk_length=chunk_length,
                    num_hashes=num_hashes,
                    num_buckets=num_buckets
                )

                layers = {
                    'ReformerLayer': ReformerLayer(reformer_config), 
                    'BertLayer': BertLayer(bert_config)
                }
                model = layers[model_name]

                if fp16:
                    model.half()
                model.to(device)
                model.eval()

                if False:
                    dictionary[model_name]["results"][batch_size][slice_size] = "N/A"
                else:
                    sequence = (
                        hidden_states[0, :slice_size, :]
                        .to(device=device)
                        .repeat(batch_size, 1, 1)
                    )
                    try:
                        if torchscript:
                            print("Tracing model with sequence size", sequence.shape)
                            inference = torch.jit.trace(model, sequence)
                            inference(sequence)
                        else:
                            inference = model
                            if model_name == "ReformerLayer":
                                inference(sequence, sequence)
                            else:
                                inference(sequence)

                        if not no_memory:
                            # model.add_memory_hooks()  # Forward method tracing (only for PyTorch models)

                            trace = start_memory_tracing("transformers")
                            if model_name == "ReformerLayer":
                                inference(sequence, sequence)
                            else:
                                inference(sequence)
                            summary = stop_memory_tracing(trace)

                            if verbose:
                                print_summary_statistics(summary)

                            dictionary[model_name]["memory"][batch_size][
                                slice_size
                            ] = str(summary.total)
                        else:
                            dictionary[model_name]["memory"][batch_size][
                                slice_size
                            ] = "N/A"

                        if not no_speed:
                            print(
                                "Going through model with sequence of shape",
                                sequence.shape,
                            )
                            if model_name == "ReformerLayer":
                                runtimes = timeit.repeat(
                                    lambda: inference(sequence, sequence),
                                    repeat=average_over,
                                    number=3,
                                )
                            else:
                                runtimes = timeit.repeat(
                                    lambda: inference(sequence),
                                    repeat=average_over,
                                    number=3,
                                )
                            average_time = sum(runtimes) / float(len(runtimes)) / 3.0
                            dictionary[model_name]["results"][batch_size][
                                slice_size
                            ] = average_time
                        else:
                            dictionary[model_name]["results"][batch_size][
                                slice_size
                            ] = "N/A"

                    except RuntimeError as e:
                        print("Doesn't fit on GPU.", e)
                        torch.cuda.empty_cache()
                        dictionary[model_name]["results"][batch_size][
                            slice_size
                        ] = "N/A"
                        dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"
    return dictionary
Example #13
0
            yield data


train_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_train.txt",
                                                  tokenizer)
val_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_val.txt",
                                                tokenizer)
train_loader = cycle(DataLoader(train_dataset, batch_size=BATCH_SIZE))
val_loader = cycle(DataLoader(val_dataset, batch_size=BATCH_SIZE))

# configuration = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment")
# configuration.axial_pos_shape = (64, 72)
# configuration.max_position_embeddings=SEQ_LEN
# configuration.vocab_size=tokenizer.vocab_size
# configuration.save_pretrained('model/config/')
configuration = ReformerConfig.from_pretrained('model/config/')
model = ReformerModelWithLMHead(configuration)
model.cuda()

NUM_BATCHES = len(train_dataset) // BATCH_SIZE

from transformers import AdamW
optimizer = AdamW(params=model.parameters(), lr=LEARNING_RATE)

from collections import OrderedDict
import json

all_training_loss = OrderedDict()
all_val_loss = OrderedDict()

for x in range(1):
Example #14
0
from transformers import ReformerConfig, ReformerTokenizer, ReformerModel
import sentencepiece as spm
import os


assert os.path.exists('protein_reformer/training_vocab.txt') == 1\
    , f'build a lower case amino acid txt file to train tokenizer. content should be: {"ARNDCQEGHILKMFPSTWYVOUBZX".lower()}'
MODEL_MAX_LENGTH = 4608
spm.SentencePieceTrainer.Train(
    "--input=protein_reformer/training_vocab.txt --model_prefix=spiece --vocab_size=30 --pad_id=29 --character_coverage=1.0"
)
os.system("mv spiece.model spiece.vocab protein_reformer")
tokenizer = ReformerTokenizer(vocab_file="protein_reformer/spiece.model",
                              do_lower_case=True,
                              model_max_length=MODEL_MAX_LENGTH)
tokenizer.save_pretrained("protein_reformer")

configuration = ReformerConfig.from_pretrained(
    "google/reformer-crime-and-punishment")
configuration.axial_pos_shape = (64, 72)
configuration.max_position_embeddings = MODEL_MAX_LENGTH
configuration.vocab_size = tokenizer.vocab_size
configuration.pad_token_id = tokenizer.pad_token_id
# configuration.attn_layers = ["local","lsh","local","lsh"]
configuration.output_hidden_states = True
configuration.save_pretrained('protein_reformer/')