def beam_search_pick(prime, width): """Returns the beam search pick.""" if not len(prime) or prime == ' ': prime = random.choice(list(vocab.keys())) prime_labels = [vocab.get(word, 0) for word in prime.split()] bs = BeamSearch(beam_search_predict, sess.run(self.cell.zero_state(1, tf.float32)), prime_labels) samples, scores = bs.search(None, None, k=width, maxsample=num) return samples[np.argmin(scores)]
def test_multiple_beams(self): bs = BeamSearch(naive_predict, self.initial_state, self.prime_labels) samples, scores = bs.search(None, None, k=4, maxsample=5) self.assertIn([0, 1, 4, 4, 4], samples) # All permutations of this form must be in the results. self.assertIn([0, 1, 4, 4, 3], samples) self.assertIn([0, 1, 4, 3, 4], samples) self.assertIn([0, 1, 3, 4, 4], samples) # Make sure that the best beam has the lowest score. self.assertEqual(samples[np.argmin(scores)], [0, 1, 4, 4, 4])
def beam_search_pick(prime_labels, width, initial_state, tokens=False, attention_key_words=None, keywords_count=None): """Returns the beam search pick.""" bs = BeamSearch(beam_search_predict, initial_state, prime_labels, attention_key_words, keywords_count) eos = vocab.get('</s>', 0) if tokens else None oov = vocab.get('<unk>', None) samples, scores = bs.search(oov, eos, k=width, maxsample=num) # returning the best sequence return samples[np.argmin(scores)]
class MetricEvaluator(object): def __init__(self, loader, beam_search=False, beam_width=4, batch_size=64): self.batch_size = batch_size self.loader = loader # Dumping essential params self.word2idx = loader.corpus.trg_params['word2idx'] self.idx2word = loader.corpus.trg_params['idx2word'] self.sos = loader.corpus.trg_params['word2idx']['<s>'] self.beam_search = None if beam_search: self.beam_search = BeamSearch(self.word2idx, beam_width=beam_width) def compute_scores(self, model, split, compute_ppl=False): itr = self.loader.create_epoch_iterator(split, self.batch_size) model.eval() refs = [] hyps = [] costs = [] for i, (src, src_lengths, trg) in tqdm(enumerate(itr)): if compute_ppl: loss = model.score(src, src_lengths, trg) costs.append(loss.data[0]) if self.beam_search is None: out = model.inference(src, src_lengths, sos=self.sos) out = out.cpu().data.tolist() else: src = model.encoder(src, src_lengths) out = self.beam_search.search(model.decoder, src) trg = trg.cpu().data.tolist() for ref, hyp in zip(trg, out): refs.append(self.loader.corpus.idx2sent(self.idx2word, ref)) hyps.append(self.loader.corpus.idx2sent(self.idx2word, hyp)) score = compute_bleu(refs, hyps) return score, costs
def test_single_beam(self): bs = BeamSearch(naive_predict, self.initial_state, self.prime_labels) samples, scores = bs.search(None, None, k=1, maxsample=5) self.assertEqual(samples, [[0, 1, 4, 4, 4]])