import torch
from transformers import ReformerTokenizer, ReformerModel

MODEL_MAX_LENGTH = 4608

tokenizer_config_path = "protein_reformer/spiece.model"
tokenizer = ReformerTokenizer(vocab_file=tokenizer_config_path,
                              do_lower_case=True,
                              model_max_length=MODEL_MAX_LENGTH)

model_checkpoint = 'output/checkpoint-6500/'
model = ReformerModel.from_pretrained(model_checkpoint)

sequence_file_path = "data/yeast/yeast.txt"
f = open(sequence_file_path, "r")
sequence_txt = f.readlines()
f.close()

input_sequence_list = [
    tokenizer(sequence.strip(), truncation=True,
              return_tensors='pt')['input_ids'].cuda()
    for sequence in sequence_txt
]
model.cuda()
protein_vectors_list = [
    torch.mean(model(inp)[1][-1], dim=1) for inp in input_sequence_list
]
protein_vectors = torch.cat(protein_vectors_list, dim=0)

from sklearn.manifold import TSNE
protein_vectors_tsne = TSNE(n_components=2).fit_transform(
Example #2
0
# from transformers import pipeline
# nlp = pipeline("sentiment-analysis")
# result = nlp("I hate you")[0]
# print(f"label: {result['label']}, with score: {round(result['score'], 4)}")
# result = nlp("I love you")[0]
# print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

from transformers import ReformerTokenizer, ReformerModel
import torch
tokenizer = ReformerTokenizer.from_pretrained(
    'google/reformer-crime-and-punishment')
model = ReformerModel.from_pretrained('google/reformer-crime-and-punishment',
                                      return_dict=True)
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = model(**inputs)
last_hidden_states = outputs.last_hidden_state
print(last_hidden_states)