from gpt2_client import GPT2Client gpt2 = GPT2Client('117M') gpt2.load_model() #inputText = 'Trump is building a wall. A big wall. To stop CHINA from taking our jobs. Make America Great again!' genText = gpt2.generate(interactive=True, n_samples=1, return_text=True) #genText = gpt2.generate(return_text=True) with open('test.txt', 'a') as filePtr: #filePtr.write(inputText + '\n') for line in genText: filePtr.write(line + '\n') filePtr.write('\n')
from gpt2_client import GPT2Client gpt2 = GPT2Client('345M') gpt2.load_model() my_corpus = './_data/_data_en_es.txt' genText = gpt2.finetune(my_corpus, return_text=True)
def __init__(self, **kwargs): GPT2Client.__init__(self, **kwargs)
# note also you don't have to take the argmax from bert but can sample from it # another primitive would be back-translating from an embedding # if you can't balance the beam score, you can perhaps alternate faithfulness # and maybe you can keep the "best of all time" # would be good to do an autoencoder type of embed and decode primitive # def get_score(sentence): # tokenize_input = bert_tokeniser.tokenize(sentence) # tensor_input = torch.tensor([bert_tokeniser.convert_tokens_to_ids(tokenize_input)]) # predictions=bert_model(tensor_input) # loss_fct = torch.nn.CrossEntropyLoss() # loss = loss_fct(predictions.squeeze(),tensor_input.squeeze()).data # return math.exp(loss) from gpt2_client import GPT2Client gpt2 = GPT2Client('774M') gpt2.load_model(force_download=False) from transformers import GPT2Tokenizer, GPT2LMHeadModel with torch.no_grad(): gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2') gpt2_model.eval() gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') def get_score(sentence): tokenize_input = gpt2_tokenizer.encode(sentence) tensor_input = torch.tensor([tokenize_input]) loss=gpt2_model(tensor_input, labels=tensor_input)[0] return np.exp(loss.detach().numpy()) # syn_then_do("please cancel my restaurant reservations")
""" The following snippet covers how to use GPT2Client to generate text randomly. It will return an array containing the `n_samples` pieces of generated text. """ from gpt2_client import GPT2Client gpt2 = GPT2Client('117M', save_dir="models") # optional -> if you already have the assets, you can comment this out gpt2.load_model(force_download=False) # interative mode is False by default # This will return 4 pieces of generated text text = gpt2.generate(n_samples=4, return_text=True) print(text)