def sample_with_past(x, model, steps, temperature=1., sample_logits=True, top_k=None, top_p=None, callback=None): # x is conditioning sample = x cond_len = x.shape[1] past = None for n in range(steps): if callback is not None: callback(n) logits, _, present = model.forward_with_past(x, past=past, past_length=(n+cond_len-1)) if past is None: past = [present] else: past.append(present) logits = logits[:, -1, :] / temperature if top_k is not None: logits = top_k_top_p_filtering(logits, top_k=top_k, top_p=top_p) probs = F.softmax(logits, dim=-1) if not sample_logits: _, x = torch.topk(probs, k=1, dim=-1) else: x = torch.multinomial(probs, num_samples=1) # append to the sequence and continue sample = torch.cat((sample, x), dim=1) del past sample = sample[:, cond_len:] # cut conditioning off return sample
def run_short(prompt, num): try: prompt = prompt.strip() input_ids = tokenizer.encode(prompt, return_tensors='pt') # input_ids also need to apply gpu device! input_ids = input_ids.to(device) # get logits of last hidden state next_token_logits = model(input_ids).logits[:, -1, :] # filter filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=50, top_p=1.0) # sample probs = F.softmax(filtered_next_token_logits, dim=-1) next_token = torch.multinomial(probs, num_samples=num) result = {} for idx, token in enumerate(next_token.tolist()[0]): result[idx] = tokenizer.decode(token) return result except Exception as e: print(e) return 500
def respond_to_batch(model, queries, txt_len=20, top_k=0, top_p=1.0): """Sample text from language model.""" input_ids = queries for i in range(txt_len): # Get Logits outputs = model(input_ids) next_token_logits = outputs[0][:, -1, :] next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p) # Sample probs = F.softmax(next_token_logits, dim=-1) next_token = torch.multinomial(probs, num_samples=1).squeeze(1) input_ids = torch.cat([input_ids, next_token.unsqueeze(-1)], dim=-1) return input_ids[:, -txt_len:]
def run_word(sequence, num_samples): try: input_ids = tokenizer.encode(sequence, return_tensors="pt") tokens_tensor = input_ids.to(device) next_token_logits = model(tokens_tensor).logits[:, -1, :] filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=50, top_p=1.0) probs = F.softmax(filtered_next_token_logits, dim=-1) next_token = torch.multinomial(probs, num_samples=num_samples) result = dict() for idx, token in enumerate(next_token.tolist()[0]): result[idx] = tokenizer.decode(token) return result except Exception as e: print(e) return 500
def select_facts(self, row): logits = self.logits[row.q_idx, row.beam_idx].clone().detach() assert logits.shape == (self.nb_facts, ), logits.shape logits[self.stop_explanation_id] -= self.config.stop_delta if len(row.partial_expl) < self.config.min_expl_length: logits[self.stop_explanation_id] = -float('Inf') logits[torch.isnan(logits)] = -float('Inf') logits = logits.unsqueeze(0) logits = top_k_top_p_filtering(logits, top_p=self.config.beam_decode_top_p) logits = logits.squeeze(0) distrib = Categorical(logits=logits) idxs = distrib.sample((self.config.beam_size, )) # beam_size scores = logits[idxs] scores_logprobs = distrib.log_prob(idxs).reshape(idxs.shape) result = tuple(zip(idxs.tolist(), scores_logprobs.tolist())) return result
def respond_to_batch(model, queries, mask=None, seq_ids=None, txt_len=100, top_k=0, top_p=1.0, bos_token=-1, pad_token=-1): """Sample text from language model.""" input_seq = queries batch_size, start_len = queries.shape generation_finished = torch.zeros((batch_size, 1)).cuda() ones = torch.ones_like(generation_finished).cuda() for i in range(txt_len): with torch.no_grad(): outputs = model(input_ids=input_seq, attention_mask=mask, position_ids=seq_ids) next_token_logits = outputs[0][:, -1] next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p) probs = torch.softmax(next_token_logits, dim=-1) next_token = torch.multinomial(probs, num_samples=1) mask = torch.cat([mask, (1 - generation_finished).long()], dim=-1) input_seq = torch.cat([input_seq, next_token], dim=-1) new_ids = (seq_ids[:, -1:] + (1 - generation_finished).long()) seq_ids = torch.cat([seq_ids, new_ids], dim=-1) generation_finished = torch.where(next_token == bos_token, ones, generation_finished) if torch.all(generation_finished == 1): input_seq, mask, seq_ids = pad_seqs(input_seq, mask, seq_ids, txt_len - i - 1, pad_token) break return input_seq[:, -txt_len:], mask.long(), seq_ids.long()
def test_top_k_top_p_filtering(self): logits = torch.tensor( [ [ 8.2220991, # 3rd highest value; idx. 0 -0.5620044, 5.23229752, 4.0386393, -6.8798378, -0.54785802, -3.2012153, 2.92777176, 1.88171953, 7.35341276, 8.43207833, # 2nd highest value; idx. 10 -9.85711836, -5.96209236, -1.13039161, -7.1115294, -0.8369633, -5.3186408, 7.06427407, 0.81369344, -0.82023817, -5.9179796, 0.58813443, -6.99778438, 4.71551189, -0.18771637, 7.44020759, # 4th highest value; idx. 25 9.38450987, # 1st highest value; idx. 26 2.12662941, -9.32562038, 2.35652522, ], # cummulative prob of 4 highest values <= 0.6 [ 0.58425518, 4.53139238, -5.57510464, -6.28030699, -7.19529503, -4.02122551, 1.39337037, -6.06707057, 1.59480517, -9.643119, 0.03907799, 0.67231762, -8.88206726, 6.27115922, # 4th highest value; idx. 13 2.28520723, 4.82767506, 4.30421368, 8.8275313, # 2nd highest value; idx. 17 5.44029958, -4.4735794, 7.38579536, # 3rd highest value; idx. 20 -2.91051663, 2.61946077, -2.5674762, -9.48959302, -4.02922645, -1.35416918, 9.67702323, # 1st highest value; idx. 27 -5.89478553, 1.85370467, ], # cummulative prob of 4 highest values <= 0.6 ], dtype=torch.float, device=torch_device, ) non_inf_expected_idx = torch.tensor( [[0, 0], [0, 10], [0, 25], [0, 26], [1, 13], [1, 17], [1, 20], [1, 27]], dtype=torch.long, device=torch_device, ) # expected non filtered idx as noted above non_inf_expected_output = torch.tensor( [ 8.2221, 8.4321, 7.4402, 9.3845, 6.2712, 8.8275, 7.3858, 9.6770, ], # expected non filtered values as noted above dtype=torch.float, device=torch_device, ) output = top_k_top_p_filtering(logits, top_k=10, top_p=0.6, min_tokens_to_keep=4) non_inf_output = output[output != -float("inf")].to(device=torch_device) non_inf_idx = (output != -float("inf")).nonzero().to(device=torch_device) self.assertTrue(torch.allclose(non_inf_expected_output, non_inf_output, atol=1e-12)) self.assertTrue(torch.all(torch.eq(non_inf_expected_idx, non_inf_idx)))
#Distilled models are smaller than the models they mimic. Using them instead of the large versions would help improve our carbon footprint. ###2) Causal Language Modeling from transformers import AutoModelWithLMHead, AutoTokenizer, top_k_top_p_filtering import torch from torch.nn import functional as F tokenizer = AutoTokenizer.from_pretrained("gpt2") model = AutoModelWithLMHead.from_pretrained("gpt2") sequence = f"Hugging Face is based in DUMBO, New York City, and " input_ids = tokenizer.encode(sequence, return_tensors="pt") # get logits of last hidden state next_token_logits = model(input_ids).logits[:, -1, :] # filter filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=50, top_p=1.0) # sample probs = F.softmax(filtered_next_token_logits, dim=-1) next_token = torch.multinomial(probs, num_samples=1) generated = torch.cat([input_ids, next_token], dim=-1) resulting_string = tokenizer.decode(generated.tolist()[0]) print(resulting_string) #Hugging Face is based in DUMBO, New York City, and has
def run_me(secret='wilberscheid'): nlp = spacy.load("en_core_web_md") def get_syn(word): synonyms = [] for syn in wordnet.synsets(str(word)): for l in syn.lemmas(): synonyms.append(l.name()) return list(set(synonyms)) # to improve ability of model to write secrets within text, try and reduce letters # that do not appear as the first letter of a word # frequencies of letter as the first letter in a word # found at https://en.wikipedia.org/wiki/Letter_frequency # frequencies add up to .9026, but this was the only source I could find for these values # and for our use case it will be good enough letter_frequ = { 'a':.017, 'b':.044, 'c': .052, 'd': .032, 'e':.028, 'f':.04, 'g':.016, 'h':.042, 'i':.073, 'j':.0051, 'k':.0086, 'l':.024, 'm':.038, 'n':.023, 'o':.076, 'p':.043, 'q':.0022, 'r':.028, 's':.067, 't':.16, 'u':.012, 'v':.0082, 'w':.055, 'x':.00045, 'y':.0076, 'z':.00045} secret_word = str(secret) secret_word = secret_word.replace(' ', '') secret_score = 1 for letter in secret_word: secret_score *= letter_frequ.get(letter) print('secret score (where higher score is better): ', secret_score * 10000000000000) tokenizer = AutoTokenizer.from_pretrained("gpt2-large") model = AutoModelForCausalLM.from_pretrained("gpt2-large", return_dict=True) sequence = f"Once upon a time" not_found = 0 possible = 1 for letter in secret_word: input_ids = tokenizer.encode(sequence, return_tensors="pt") # get logits of last hidden state next_token_logits = model(input_ids).logits[:, -1, :] # filter filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=300) # sample probs = F.softmax(filtered_next_token_logits, dim=-1) next_token = torch.multinomial(probs, num_samples=300) list_of_words = tokenizer.decode(next_token.tolist()[0]).split() for x in list_of_words: if len(x) < 2 and x.lower() not in ['a', 'i']: list_of_words.remove(x) # check to see what words have the correct first letter and make sure these words are in a vocab list_of_words_with_letter = [w.lower() for w in list_of_words if (w[0] == letter and w in words.words())] # take every word and find synanoms then add them to a list if they start with the right letter for word in list_of_words: syns = get_syn(word) for syn in syns: if syn[0] == letter: list_of_words_with_letter.append(syn) print() print(list_of_words_with_letter) possible *= len(list_of_words_with_letter) found = 0 for word in list_of_words: # if the word genreated by the model fits our secret, add it if word[0].lower() == letter: print('word: ', word) sequence += (' ' + word) found = 1 print('sequence: ', sequence) break # use word net to look for synonyms for each word in word list if found == 0: for word in list_of_words: syn_words = get_syn(word) for syn in syn_words: if syn[0].lower() == letter: print('syn: ', syn, ' word: ', word) sequence += (' ' + syn) found = 1 print('sequence: ', sequence) break if found == 1: break if found == 0: print('did not find word that fit secret') print('#'*90) not_found += 1 print(sequence) print('#'*90) print('not found: ', not_found) return sequence
def _generate_no_beam_search( self, input_ids, cur_len, max_length, min_length, do_sample, temperature, top_k, top_p, repetition_penalty, no_repeat_ngram_size, bad_words_ids, pad_token_id, eos_token_id, batch_size, attention_mask, use_cache, model_kwargs, ): """Generate sequences for each example without beam search (num_beams == 1). All returned sequence are generated independantly. """ # length of generated sentences / unfinished sentences unfinished_sents = input_ids.new(batch_size).fill_(1) sent_lengths = input_ids.new(batch_size).fill_(max_length) past = None # -- begin change --- p_eos_prev = None # --- end change --- while cur_len < max_length: model_inputs = self.prepare_inputs_for_generation( input_ids, past=past, attention_mask=attention_mask, use_cache=use_cache, **model_kwargs ) outputs = self(**model_inputs, return_dict=True) # --- begin change --- logits = outputs[0] log_ps, p_eos_prev = self.st_softmax( logits, eos_token_id, p_eos_prev=p_eos_prev, return_p_eos=True ) next_token_log_ps = log_ps[:, -1, :] p_eos_prev = p_eos_prev[:, -1:, :] next_token_logits = next_token_log_ps # use log_ps as 'logits' # --- end change --- #next_token_logits = outputs.logits[:, -1, :] scores = self.postprocess_next_token_scores( scores=next_token_logits, input_ids=input_ids, no_repeat_ngram_size=no_repeat_ngram_size, bad_words_ids=bad_words_ids, cur_len=cur_len, min_length=min_length, max_length=max_length, eos_token_id=eos_token_id, repetition_penalty=repetition_penalty, batch_size=batch_size, num_beams=1, ) # if model has past, then set the past variable to speed up decoding if "past_key_values" in outputs: past = outputs.past_key_values elif "mems" in outputs: past = outputs.mems if do_sample: # Temperature (higher temperature => more likely to sample low probability tokens) if temperature != 1.0: scores = scores / temperature # Top-p/top-k filtering next_token_logscores = top_k_top_p_filtering(scores, top_k=top_k, top_p=top_p) # Sample probs = F.softmax(next_token_logscores, dim=-1) next_token = torch.multinomial(probs, num_samples=1).squeeze(1) else: # Greedy decoding next_token = torch.argmax(next_token_logits, dim=-1) # update generations and finished sentences if eos_token_id is not None: # pad finished sentences if eos_token_id exist tokens_to_add = next_token * unfinished_sents + (pad_token_id) * (1 - unfinished_sents) else: tokens_to_add = next_token # add token and increase length by one input_ids = torch.cat([input_ids, tokens_to_add.unsqueeze(-1)], dim=-1) cur_len = cur_len + 1 if eos_token_id is not None: eos_in_sents = tokens_to_add == eos_token_id # if sentence is unfinished and the token to add is eos, sent_lengths is filled with current length is_sents_unfinished_and_token_to_add_is_eos = unfinished_sents.mul(eos_in_sents.long()).bool() sent_lengths.masked_fill_(is_sents_unfinished_and_token_to_add_is_eos, cur_len) # unfinished_sents is set to zero if eos in sentence unfinished_sents.mul_((~eos_in_sents).long()) # stop when there is a </s> in each sentence, or if we exceed the maximul length if unfinished_sents.max() == 0: break # extend attention_mask for new generated input if only decoder if self.config.is_encoder_decoder is False: attention_mask = torch.cat( [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1 ) return input_ids
def generate(self, input_batch, max_len=30, num_samples=1, mode='sample'): self.eval() device = next(self.parameters()).device context, context_utts_attn_mask, context_attn_mask = [ t.to(device) for t in input_batch[:3] ] ground_truth = input_batch[6].numpy() context_hiddens, context_encoding = self.context_encoding( context, context_utts_attn_mask, context_attn_mask) generated = torch.zeros( (num_samples, 1), dtype=torch.long, device=device).fill_(self.tokenizer.cls_token_id) # [batch_sz x 1] (1=seq_len) sample_lens = torch.ones((num_samples, 1), dtype=torch.long, device=device) len_inc = torch.ones((num_samples, 1), dtype=torch.long, device=device) for _ in range(max_len): outputs, *_ = self.decoder( generated, generated.ne(self.tokenizer.pad_token_id).long(), None, None, None, None, encoder_hidden_states=context_hiddens, encoder_attention_mask=context_attn_mask, ) # [batch_size x seq_len x vocab_size] next_token_logits = outputs[:, -1, :] / self.decoder_config.temperature # repetition penalty from CTRL (https://arxiv.org/abs/1909.05858) for i in range(num_samples): for _ in set(generated[i].tolist()): next_token_logits[ i, _] /= self.decoder_config.repetition_penalty filtered_logits = top_k_top_p_filtering( next_token_logits, top_k=self.decoder_config.top_k, top_p=self.decoder_config.top_p) if mode == 'greedy': # greedy sampling: next_token = torch.argmax(filtered_logits, dim=-1).unsqueeze(-1) else: next_token = torch.multinomial(torch.softmax(filtered_logits, dim=-1), num_samples=num_samples) next_token[len_inc == 0] = self.tokenizer.pad_token_id generated = torch.cat((generated, next_token), dim=1) len_inc = len_inc * ( next_token != self.tokenizer.sep_token_id).long( ) # stop incresing length (set 0 bit) when EOS is encountered if len_inc.sum() < 1: break sample_lens = sample_lens + len_inc # to numpy sample_words = generated.data.cpu().numpy() sample_lens = sample_lens.data.cpu().numpy() context = context.data.cpu().numpy() return sample_words, sample_lens, context, ground_truth # nparray: [repeat x seq_len]
def modify(self, logits: torch.tensor): """Delegates call to the transformers `top_k_top_p_filtering` func.""" top_k_top_p_filtering(logits=logits, top_k=self._top_k, top_p=self._top_p, filter_value=_MINUS_INF)