Example #1
0
def build_save_vocab(train_dataset, fields, opt):
    fields = inputters.build_vocab(
        train_dataset, fields, opt.data_type, opt.share_vocab,
        opt.src_vocab, opt.src_vocab_size, opt.src_words_min_frequency,
        opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency,
        vocab_size_multiple=opt.vocab_size_multiple
    )

    vocab_path = opt.save_data + '.vocab.pt'
    torch.save(fields, vocab_path)
Example #2
0
def build_save_vocab(train_dataset, fields, opt, logger=None):
    """ Building and saving the vocab """
    fields = inputters.build_vocab(train_dataset, fields, opt.data_type,
                                   opt.share_vocab, opt.src_vocab,
                                   opt.src_vocab_size,
                                   opt.src_words_min_frequency, opt.tgt_vocab,
                                   opt.tgt_vocab_size,
                                   opt.tgt_words_min_frequency, logger)

    # Can't save fields, so remove/reconstruct at training time.
    vocab_file = opt.save_data + '.vocab.pt'
    torch.save(inputters.save_fields_to_vocab(fields), vocab_file)
Example #3
0
def build_save_vocab(train_dataset, fields, opt):
    fields = inputters.build_vocab(
        train_dataset, fields, opt.data_type, opt.share_vocab,
        opt.src_vocab, opt.src_vocab_size, opt.src_words_min_frequency,
        opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency,
        vocab_size_multiple=opt.vocab_size_multiple
    )

    # prefix = opt.save_data.split('/')[-1]
    #
    # vocab_path = opt.save_data + '/' + prefix + '.vocab.pt'
    vocab_path = opt.save_data + '.vocab.pt'
    torch.save(fields, vocab_path)
def build_save_vocab(train_dataset, fields, opt):
    fields = inputters.build_vocab(
        train_dataset, fields, opt.data_type, opt.share_vocab,
        opt.src_vocab, opt.src_vocab_size, opt.src_words_min_frequency,
        opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency,
        opt.agenda_vocab, opt.agenda_vocab_size, opt.agenda_words_min_frequency,
        fixed_vocab=opt.fixed_vocab,
        free_src=opt.free_src, free_tgt=opt.free_tgt,
        vocab_size_multiple=opt.vocab_size_multiple
    )

    vocab_path = opt.save_data + '.vocab.pt'
    torch.save(fields, vocab_path)
Example #5
0
def build_save_vocab(train_dataset, fields, opt):
    """ Building and saving the vocab """

    fields = inputters.build_vocab(train_dataset, fields, opt.data_type,
                                   opt.share_vocab, opt.src_vocab,
                                   opt.src_vocab_size,
                                   opt.src_words_min_frequency, opt.tgt_vocab,
                                   opt.tgt_vocab_size,
                                   opt.tgt_words_min_frequency)
    # Can't save fields, so remove/reconstruct at training time.
    vocab_file = opt.save_data + '.vocab.pt'
    # torch.save(inputters.save_fields_to_vocab(fields), vocab_file)
    with open(vocab_file, 'wb') as f:
        pickle.dump(inputters.save_fields_to_vocab(fields), f)
Example #6
0
def build_save_vocab(train_dataset, fields, opt):
    print("build_save_vocab")
    fields = inputters.build_vocab(train_dataset,
                                   fields,
                                   opt.data_type,
                                   opt.share_vocab,
                                   opt.src_vocab,
                                   opt.src_vocab_size,
                                   opt.src_words_min_frequency,
                                   opt.tgt_vocab,
                                   opt.tgt_vocab_size,
                                   opt.tgt_words_min_frequency,
                                   vocab_size_multiple=opt.vocab_size_multiple)
    vocab_path = opt.save_data + '.vocab.pt'
    torch.save(fields, vocab_path)
def build_save_vocab(train_dataset, fields, opt):
    fields = inputters.build_vocab(train_dataset,
                                   fields,
                                   opt.data_type,
                                   opt.share_vocab,
                                   opt.src_vocab,
                                   opt.src_vocab_size,
                                   opt.src_words_min_frequency,
                                   opt.tgt_vocab,
                                   opt.tgt_vocab_size,
                                   opt.tgt_words_min_frequency,
                                   vocab_size_multiple=opt.vocab_size_multiple)
    print(fields['tgt'].base_field.vocab.itos[:100])
    vocab_path = opt.save_data + '.vocab.pt'
    torch.save(fields, vocab_path)
Example #8
0
def build_save_vocab(train_dataset, fields, savepath, opt):
    """ Building and saving the vocab """

    fields = inputters.build_vocab(train_dataset,
                                   fields,
                                   data_type='text',
                                   share_vocab=True,
                                   src_vocab_path='',
                                   src_vocab_size=100,
                                   src_words_min_frequency=1,
                                   tgt_vocab_path='',
                                   tgt_vocab_size=100,
                                   tgt_words_min_frequency=1)
    # Can't save fields, so remove/reconstruct at training time.
    vocab_file = savepath + '/vocab.pt'
    with open(vocab_file, 'wb') as f:
        pickle.dump(inputters.save_fields_to_vocab(fields), f)
Example #9
0
def build_save_vocab(train_dataset, fields, opt):
    fields = inputters.build_vocab(train_dataset,
                                   fields,
                                   opt.data_type,
                                   opt.share_vocab,
                                   opt.src_vocab,
                                   opt.src_vocab_size,
                                   opt.src_words_min_frequency,
                                   opt.tgt_vocab,
                                   opt.tgt_vocab_size,
                                   opt.tgt_words_min_frequency,
                                   vocab_size_multiple=opt.vocab_size_multiple,
                                   opt.lower,
                                   opt.only_words,
                                   opt.save_data)

    # Can't save fields, so remove/reconstruct at training time.
    vocab_path = opt.save_data + '.vocab.pt'
    torch.save(fields, vocab_path)
Example #10
0
def build_save_vocab(train_dataset, fields, opt, index):
    src_vocab = None
    if len(opt.src_vocab) > 0:
        src_vocab = opt.src_vocab[index]

    fields = inputters.build_vocab(train_dataset,
                                   fields,
                                   opt.data_type,
                                   opt.share_vocab,
                                   src_vocab,
                                   opt.src_vocab_size,
                                   opt.src_words_min_frequency,
                                   opt.tgt_vocab,
                                   opt.tgt_vocab_size,
                                   opt.tgt_words_min_frequency,
                                   vocab_size_multiple=opt.vocab_size_multiple)

    vocab_path = opt.save_data + "." + str(index) + '.vocab.pt'
    torch.save(fields, vocab_path)