def get_answer(text: str, model: GPT2LMHeadModel, tokenizer: GPT2Tokenizer): cntx_token_id, answer_token_id = tokenizer.additional_special_tokens_ids context = tokenizer.encode(text) context = [tokenizer.bos_token_id] + [cntx_token_id ] + context + [answer_token_id] context = torch.LongTensor([context]) ans = model.generate(input_ids=context, max_length=100, temperature=0.7)[0][1:-1] return tokenizer.decode(ans)
def make_predictions( text: str, tokenizer: GPT2Tokenizer, gpt2: GPT2LMHeadModel, device: torch.device, max_output_length: int = 100, ) -> Sequence[str]: """Make predictions for text using GPT-2. Args: text: Input text. tokenizer: GPT-2 tokenizer. gpt2: GPT-2 model. device: GPT-2 device. max_output_length: Maximum length of generated sequence. Returns: List of predicted strings after the provided text, or an empty list if the input is over 300 tokens long. """ text = unicodedata.normalize("NFKC", text) input_ids = tokenizer.encode(text) input_ids = torch.tensor([input_ids]).to(device) # pylint: disable=not-callable input_id_length = len(input_ids[0]) # Long inputs usually result in useless outputs, so no predictions are acceptable if input_id_length > 300: return [] # Enforce maximum generated length to prevent memory issues max_length = min(input_id_length + max_output_length, 350) with torch.cuda.amp.autocast(): # Run with FP16 sample_outputs = gpt2.generate( input_ids, do_sample=True, max_length=max_length, min_length=2, # We want output that is at least two words temperature=0.8, top_k=50, top_p=0.8, num_return_sequences=40, ) suggestions = [] for output in sample_outputs: decoded_output = result_replace(tokenizer.decode(output[input_id_length:])) suggestions.append(decoded_output) return suggestions
def predict_next_token( words: str, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: GPT2Tokenizer, top: int = 3 ) -> Tuple[Tuple[str, float], ...]: """ Predict the next token, given a some starting words. :param words: a string of a few words (max tokens: 1023) :param gpt2_model: GPT2LMHeadModel preferably :param gpt2_tokenizer: GPT2Tokenizer :param top: the number of probable tokens to return :return: a tuple of tuples (token, probability) ## OOME on circleci :-( # >>> gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') # >>> gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2') # >>> _ = gpt2_model.eval() # >>> predict_next_token('I am looking', gpt2_model, gpt2_tokenizer) # (('forward', 0.3665640652179718), ('for', 0.35346919298171997), ('to', 0.08423731476068497)) """ tokens_tensor = torch.tensor( # pylint: disable=not-callable gpt2_tokenizer.encode(words, add_special_tokens=True) ).unsqueeze( 0 ) # Batch size 1 if tokens_tensor.shape[1] > 1023: LOG.warning( "Too many tokens, should be 1023 or less, found %s", tokens_tensor.shape[1] ) soft = torch.nn.Softmax(dim=1) gpt2_model.eval() with torch.no_grad(): predictions = gpt2_model(tokens_tensor)[0].squeeze(0) predictions = soft(predictions) values, indices = torch.topk( # pylint: disable=no-member predictions[-1, :], top ) id_prob = list(zip(indices, values)) return tuple( [ # type: ignore (gpt2_tokenizer.decode(int(tmp[0])).strip(), float(tmp[1])) for tmp in id_prob ] )
def generate( input_text: str, model: GPT2LMHeadModel, tokenizer: GPT2Tokenizer, max_generation_len: int = 200, max_context_len: int = 256, ): generated_sentence = input_text prompt_tokens = torch.tensor( tokenizer.encode(generated_sentence)).to("cuda").unsqueeze(0) text_form = st.empty() for _ in tqdm(range(max_generation_len)): # NOTE: uncomment this and remove `model.half()` if it fits into the GPU # with torch.cuda.amp.autocast(): # outputs = model(prompt_tokens) context_len = prompt_tokens.shape[1] if context_len > max_context_len: prompt_tokens = prompt_tokens[:, (context_len - max_context_len):] outputs = model(prompt_tokens) last_scores = outputs[0][:, -1, :] probs = torch.softmax(last_scores, dim=-1) predicted_token = predict_token(probs) predicted_token = predicted_token.to("cuda") prompt_tokens = torch.cat([prompt_tokens, predicted_token], dim=1) predicted_word = tokenizer.decode( predicted_token, skip_special_tokens=True, ) generated_sentence += predicted_word text_form.empty() text_form.text(generated_sentence) return generated_sentence
def process_lines( self, text_lines: List[str], tokenizer: GPT2Tokenizer, ) -> Tuple[List[str], List[int]]: """ Process a list of lines with the following rules: - If line is blank: pass, do nothing. - If line contain text: - If text is not dialog: append the current line and continue. - Else: 1. Iterate and append to the current sentence until finding a blank line. Args: text_lines (List[str]): list containing the lines to be processed. Returns: Tuple[List[str], List[int]]: tuple containing a list of the processed lines and a list with the tokens computed from these lines. """ processed_lines = [] tokenized_lines = [] max_stacked_sentences = 12 text_lines_gen = (l for l in text_lines) for line in text_lines_gen: processed_line = self.process_line(line) if processed_line == "": continue if is_dialog(processed_line): processed_line += " \n" else: for _ in range(max_stacked_sentences): try: extra_line = next(text_lines_gen) except StopIteration: break processed_extra_line = self.process_line(extra_line) if is_dialog(processed_extra_line): processed_extra_line = f"\n{processed_extra_line} \n" if processed_extra_line != "": processed_line += f" {processed_extra_line}" else: break processed_line += " \n\n" processed_lines.append(processed_line) tokenized_line = tokenizer.encode(processed_line) tokenized_lines.append(tokenized_line) return processed_lines, tokenized_lines