class Scorer: def __init__(self, model, path_2_vocab, score_fn=score_fun_linear): self._model = model self._model.eval() self._model.crit.keep_order=True self._vocab = Vocab(vocab_file=path_2_vocab) self._vocab.build_vocab() self._score_fn = score_fn print('---->>> Testing Model.') self.test_model(candidates=['they had one night in which to prepare for deach', 'they had one night in which to prepare for death', 'i hate school', 'i love school', 'the fox jumps on a grass', 'the crox jump a la glass']) print('---->>> Done testing model') @staticmethod def chunks(l, n): for i in range(0, len(l), n): yield l[i:i + n] def nlm_compute(self, candidates_full, batch_size=256): results = torch.zeros(len(candidates_full)) with torch.no_grad(): for j, candidates in enumerate(self.chunks(candidates_full, batch_size)): sents = self._vocab.encode_sents( [['<S>'] + string.strip().lower().split() + ['<S>'] for string in candidates]) seq_lens = torch.tensor([x.shape[0] for x in sents], dtype=torch.long) sents_th = torch.zeros(seq_lens.max(), seq_lens.shape[0],dtype=torch.long).cuda() for i, sent in enumerate(sents): sents_th[:seq_lens[i], i] = sent mems = tuple() ret = self._model(sents_th[:-1], sents_th[1:], *mems) max_len = seq_lens.max()-1 mask = torch.arange(max_len).expand(seq_lens.shape[0], max_len) >= seq_lens.unsqueeze(1)-1 result = -1 * ret[0].masked_fill(mask.transpose(0,1).to("cuda"), 0).sum(dim=0) results[j*batch_size:j*batch_size + len(result)] = result return results def test_model(self, candidates): for item in zip(list(self.nlm_compute(candidates).cpu().detach().numpy()), candidates): print("{0} ---- {1}".format(item[0], item[1])) def chose_best_candidate(self, candidates: List) -> str: candidates_t = [c[3] for c in candidates] nln_scores = self.nlm_compute(candidates_t) candidate = candidates[0][3] score = -100000000000.0 for i in range(len(candidates)): s1 = candidates[i][0] s2 = candidates[i][1] s3 = candidates[i][2] s4 = nln_scores[i].item() new_score = self._score_fn(s1, s2, s3, s4) if new_score > score: candidate = candidates[i][3] score = new_score return (candidate, nln_scores)
args.tgt_len, args.ext_len, args.mem_len, args.clamp_len)) model.reset_length(args.tgt_len, args.ext_len, args.mem_len) if args.clamp_len > 0: model.clamp_len = args.clamp_len if args.same_length: model.same_length = True # Load dataset # strings = ["a barrel's the jolliest bed going on the tramp i mean", "a bit late to secure accommodations isn't it"] strings = ["they had one night in which to prepare for deach", "they had one night in which to prepare for death", "i hate school", "i love school", "the fox jumps on a grass", "the crox jump a la glass", "she's an engineer", "he's an engineer", "she's a nurse", "he's a nurse", "she's a manager", "he's a manager"] vocab = Vocab(vocab_file=args.vocab_file) vocab.build_vocab() sents = vocab.encode_sents([['<S>'] + string.strip().lower().split() + ['<S>'] for string in strings]) device = torch.device('cuda' if args.cuda else 'cpu') ############################################################################### # Scoring code ############################################################################### def score(sents, device): # Turn on evaluation mode which disables dropout. model.eval() log_losses = [] start_time = time.time() with torch.no_grad(): for sent in sents: sent = sent[:, None].to(device) mems = tuple()