Python Vocab.encode_sents Examples

Programming Language: Python

Namespace/Package Name: utils.vocabulary

Class/Type: Vocab

Method/Function: encode_sents

Examples at hotexamples.com: 2

Python Vocab.encode_sents - 2 examples found. These are the top rated real world Python examples of utils.vocabulary.Vocab.encode_sents extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Vocab(20)

build_vocab(19)

count_file(15)

encode_file(13)

encode_sents(2)

build_distribution(1)

create_tokens(1)

encode_file_only_for_lables(1)

from_symbols(1)

write_to_file(1)

Example #1

Show file

File: process_beam_dump.py Project: zloop1982/OpenSeq2Seq

class Scorer:
  def __init__(self, model, path_2_vocab, score_fn=score_fun_linear):
    self._model = model
    self._model.eval()
    self._model.crit.keep_order=True
    self._vocab = Vocab(vocab_file=path_2_vocab)
    self._vocab.build_vocab()
    self._score_fn = score_fn

    print('---->>> Testing Model.')
    self.test_model(candidates=['they had one night in which to prepare for deach',
                                'they had one night in which to prepare for death',
                                'i hate school', 'i love school',
                                'the fox jumps on a grass',
                                'the crox jump a la glass'])
    print('---->>> Done testing model')


  @staticmethod
  def chunks(l, n):
    for i in range(0, len(l), n):
      yield l[i:i + n]


  def nlm_compute(self, candidates_full, batch_size=256):
    results = torch.zeros(len(candidates_full))
    with torch.no_grad():
      for j, candidates in enumerate(self.chunks(candidates_full, batch_size)):
        sents = self._vocab.encode_sents(
          [['<S>'] + string.strip().lower().split() + ['<S>'] for string in candidates])
        seq_lens = torch.tensor([x.shape[0] for x in sents], dtype=torch.long)
        sents_th = torch.zeros(seq_lens.max(), seq_lens.shape[0],dtype=torch.long).cuda()
        for i, sent in enumerate(sents):
          sents_th[:seq_lens[i], i] = sent
       
        mems = tuple()
        ret = self._model(sents_th[:-1], sents_th[1:], *mems)
        max_len = seq_lens.max()-1
        mask = torch.arange(max_len).expand(seq_lens.shape[0], max_len) >= seq_lens.unsqueeze(1)-1
        result = -1 * ret[0].masked_fill(mask.transpose(0,1).to("cuda"), 0).sum(dim=0)
        results[j*batch_size:j*batch_size + len(result)] = result
    return results
  

  def test_model(self, candidates):
    for item in zip(list(self.nlm_compute(candidates).cpu().detach().numpy()), candidates):
      print("{0} ---- {1}".format(item[0], item[1]))


  def chose_best_candidate(self, candidates: List) -> str:
    candidates_t = [c[3] for c in candidates]
    nln_scores = self.nlm_compute(candidates_t)
    candidate = candidates[0][3]
    score = -100000000000.0
    for i in range(len(candidates)):
      s1 = candidates[i][0]
      s2 = candidates[i][1]
      s3 = candidates[i][2]
      s4 = nln_scores[i].item()
      new_score = self._score_fn(s1, s2, s3, s4)
      if new_score > score:
        candidate = candidates[i][3]
        score = new_score
    return (candidate, nln_scores)

Example #2

Show file

       args.tgt_len, args.ext_len, args.mem_len, args.clamp_len))

model.reset_length(args.tgt_len, args.ext_len, args.mem_len)
if args.clamp_len > 0:
    model.clamp_len = args.clamp_len
if args.same_length:
    model.same_length = True



# Load dataset
# strings = ["a barrel's the jolliest bed going on the tramp i mean", "a bit late to secure accommodations isn't it"]
strings = ["they had one night in which to prepare for deach", "they had one night in which to prepare for death", "i hate school", "i love school", "the fox jumps on a grass", "the crox jump a la glass", "she's an engineer", "he's an engineer", "she's a nurse", "he's a nurse", "she's a manager", "he's a manager"]
vocab = Vocab(vocab_file=args.vocab_file)
vocab.build_vocab()
sents = vocab.encode_sents([['<S>'] + string.strip().lower().split() + ['<S>'] for string in strings])
device = torch.device('cuda' if args.cuda else 'cpu')

###############################################################################
# Scoring code
###############################################################################

def score(sents, device):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    log_losses = []
    start_time = time.time()
    with torch.no_grad():
        for sent in sents:
            sent = sent[:, None].to(device)
            mems = tuple()