args.beam, 1, 1).transpose(2, 1).transpose(
                1, 0).contiguous().view(
                args.beam**2, XMB.size(1), XMB.size(2))[top_beam_idxs]

            XMB, MMB = append_batch(XMB, beam_toks, MMB)

            if (beam_toks == end_token).sum().item() == args.beam or _ == context_size_e2 - 1:
                break

        for tok in beam_seqs[0]:
            tokens.append(text_encoder.decoder[tok.item()].replace('</w>', ' ').replace('\n', ''))

        beams = []

        for beam in beam_seqs:
            beams.append(" ".join("".join(
                [text_encoder.decoder[tok.item()].replace(
                    '</w>', ' ').replace('\n', '')
                 for tok in beam if tok != end_token]).split()))

        sequence_all['beams'] = beams
        final_sequences.append(sequence_all)

import pickle

utils.mkpath("/".join(eval_file_name.split("/")[:-1]))

with open(eval_file_name, "wb") as f:
    pickle.dump(final_sequences, f)

Beispiel #2
0
]

special = [data.start_token, data.end_token]
special += ["<{}>".format(relation) for relation in relations]

encoder_path = "model/encoder_bpe_40000.json"
bpe_path = "model/vocab_40000.bpe"

text_encoder = TextEncoder(encoder_path, bpe_path)

for special_token in special:
    text_encoder.decoder[len(text_encoder.encoder)] = special_token
    text_encoder.encoder[special_token] = len(text_encoder.encoder)

data_loader = cdata.GenerationDataLoader(opt)
data_loader.load_data("data/conceptnet/")

data_loader.make_tensors(text_encoder, special, test=False)

opt.data.maxr = data_loader.max_r

save_path = "data/conceptnet/processed/generation"
save_name = os.path.join(save_path,
                         "{}.pickle".format(utils.make_name_string(opt.data)))

utils.mkpath(save_path)

print("Data Loader will be saved to {}".format(save_name))

torch.save(data_loader, save_name)