Python Vocab.encode Examples

Programming Language: Python

Namespace/Package Name: nmtlab.utils

Class/Type: Vocab

Method/Function: encode

Examples at hotexamples.com: 4

Python Vocab.encode - 4 examples found. These are the top rated real world Python examples of nmtlab.utils.Vocab.encode extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Vocab(8)

encode(4)

decode(2)

build(1)

encode_token(1)

save(1)

size(1)

Example #1

Show file

File: save_gradfield2.py Project: zomux/lanmt-ebm

tgt_vocab = Vocab(tgt_vocab_path)

src_lines = open(test_src_corpus).readlines()
trg_lines = open(test_tgt_corpus).readlines()

in_grid, out_grid = 16, 4
grid_size = in_grid + 2 * out_grid + 1
all_dict = {}
latent_dim = 2

for idx, (src_line, trg_line) in enumerate(zip(src_lines, trg_lines)):
    ylen = len(trg_line.strip().split())
    if 8 <= ylen:
        #if not (8 <= ylen and ylen <= 12):
        continue
    src_tokens = src_vocab.encode("<s> {} </s>".format(
        src_line.strip()).split())
    trg_tokens = tgt_vocab.encode("<s> {} </s>".format(
        trg_line.strip()).split())
    x = torch.tensor([src_tokens])
    y = torch.tensor([trg_tokens])
    if torch.cuda.is_available():
        x = x.cuda()
        y = y.cuda()
    x_mask = nmt.to_float(torch.ne(x, 0)).cuda()
    y_mask = nmt.to_float(torch.ne(y, 0)).cuda()
    y_length = y_mask.size(1)
    x_states = nmt.embed_layer(x)
    x_states = nmt.x_encoder(x_states, x_mask)
    with torch.no_grad() if OPTS.modeltype == "fakegrad" else suppress():
        y_states = nmt.embed_layer(y)
        q_states = nmt.q_encoder_xy(y_states, y_mask, x_states, x_mask)

Example #2

Show file

File: run2.py Project: WN1695173791/lanmt-ebm

 # Read data
 lines = open(test_src_corpus).readlines()
 latent_candidate_num = OPTS.Tcandidate_num if OPTS.Tlatent_search else None
 decode_times = []
 if OPTS.profile:
     lines = lines * 10
 if OPTS.test_fix_length > 0:
     lines = [l for l in lines if len(l.split()) == OPTS.test_fix_length]
     if not lines:
         raise SystemError
     lines = [lines[0]] * 300
 trains_stop_stdout_monitor()
 with open(OPTS.result_path, "w") as outf:
     for i, line in enumerate(lines):
         # Make a batch
         tokens = src_vocab.encode("<s> {} </s>".format(
             line.strip()).split())
         x = torch.tensor([tokens])
         if torch.cuda.is_available():
             x = x.cuda()
         start_time = time.time()
         # with torch.no_grad() if not OPTS.scorenet else nullcontext():
         # Predict latent and target words from prior
         if OPTS.scorenet:
             targets = scorenet.translate(x,
                                          n_iter=OPTS.Trefine_steps,
                                          step_size=1.0)
         else:
             targets = nmt.translate(x, refine_steps=OPTS.Trefine_steps)
         target_tokens = targets[0].cpu()[0].numpy().tolist()
         if targets is None:
             target_tokens = [2, 2, 2]

Example #3

Show file

File: lm.py Project: WN1695173791/lanmt-ebm

        print("Cannot find model in {}".format(model_path))
        sys.exit()
    nmt.load(model_path)
    if torch.cuda.is_available():
        nmt.cuda()
    nmt.train(False)
    src_vocab = Vocab(src_vocab_path)
    tgt_vocab = Vocab(tgt_vocab_path)

    # Testing for langauge model
    lines = open(test_tgt_corpus).readlines()
    first_line = lines[0]
    first_line = "Gut@@ ach : Noch ach Sicherheit ach Fußgän@@ ger ."
    # first_line = "ach ach ."
    print(first_line)
    first_line_tokens = tgt_vocab.encode("<s> {} </s>".format(
        first_line.strip()).split())
    input = torch.tensor([first_line_tokens])
    if torch.cuda.is_available():
        input = input.cuda()
    # z = vae.compute_codes(input)
    z = nmt.compute_prior_states(input)
    # z = torch.zeros((1, 6, OPTS.latentdim))
    mask = torch.ones((1, z.shape[1]))
    if torch.cuda.is_available():
        mask = mask.cuda()
        z = z.cuda()
    init_z = z.clone()
    for _ in range(10):
        z, tokens = nmt.refine(z,
                               mask,
                               n_steps=1,

Example #4

Show file

File: run.py Project: zomux/tree2code

     DATA_ROOT,
     os.path.basename(OPTS.model_path).split(".")[0])
 autoencoder.train(False)
 if torch.cuda.is_available():
     autoencoder.cuda()
 with open(out_path, "w") as outf:
     print("code path", out_path)
     for i in range(0, len(samples), 512):
         sub_samples = samples[i:i + 512]
         src_lines = [x[0] for x in sub_samples]
         cfg_lines = [x[1] for x in sub_samples]
         processed_samples = []
         for src, cfg in sub_samples:
             src = src.strip()
             cfg = cfg.strip()
             src_ids = src_vocab.encode(src.split())
             enc_tree, dec_tree = treegen.build_trees(cfg)
             processed_samples.append((src_ids, enc_tree, dec_tree))
         src_batch, enc_batch, dec_batch = dataset.batch(
             processed_samples)
         out = autoencoder(src_batch.cuda(),
                           enc_batch,
                           dec_batch,
                           return_code=True)
         codes = out["codes"]
         for j in range(len(src_lines)):
             src = src_lines[j]
             cfg = cfg_lines[j]
             code = codes[j].int()
             outf.write("{}\n".format(code))
         outf.flush()