Python PairDataset.build_vocab Examples

Programming Language: Python

Namespace/Package Name: dataset

Class/Type: PairDataset

Method/Function: build_vocab

Examples at hotexamples.com: 3

Python PairDataset.build_vocab - 3 examples found. These are the top rated real world Python examples of dataset.PairDataset.build_vocab extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PairDataset(15)

build_vocab(3)

indices2smiles(1)

set_batch_size(1)

smiles2indices(1)

split(1)

src(1)

src_smiles(1)

tgt(1)

tgt_props(1)

tgt_smiles(1)

Example #1

Show file

File: predict.py Project: fbjwying2/PNG

    def __init__(self):
        self.DEVICE = torch.device("cuda" if config.is_cuda else "cpu")
        dataset = PairDataset(config.data_path,
                              max_src_len=config.max_src_len,
                              max_tgt_len=config.max_tgt_len,
                              truncate_src=config.truncate_src,
                              truncate_tgt=config.truncate_tgt)

        self.vocab = dataset.build_vocab(embed_file=config.embed_file)
        self.model = Seq2seq(self.vocab)
        self.stop_word = list(
            set([
                self.vocab[x.strip()]
                for x in open(config.stop_word_file).readlines()
            ]))
        self.model.load_model()
        self.model.to(self.DEVICE)

Example #2

Show file

File: predict.py Project: Wasim37/marketing_text_generation

    def __init__(self):
        self.DEVICE = config.DEVICE

        dataset = PairDataset(config.data_path,
                              max_src_len=config.max_src_len,
                              max_tgt_len=config.max_tgt_len,
                              truncate_src=config.truncate_src,
                              truncate_tgt=config.truncate_tgt)

        self.vocab = dataset.build_vocab(embed_file=config.embed_file)

        self.model = PGN(self.vocab)
        self.stop_word = list(
            set([
                self.vocab[x.strip()] for x in open(
                    config.stop_word_file, encoding='utf-8').readlines()
            ]))
        self.model.load_model()
        self.model.to(self.DEVICE)

Example #3

Show file

File: train.py Project: tanshoudong/NLP-program

            # Update minimum evaluating loss.
            if (avg_val_loss < val_losses):
                torch.save(model.encoder, config.encoder_save_name)
                torch.save(model.decoder, config.decoder_save_name)
                torch.save(model.attention, config.attention_save_name)
                torch.save(model.reduce_state, config.reduce_state_save_name)
                val_losses = avg_val_loss
            with open(config.losses_path, 'wb') as f:
                pickle.dump(val_losses, f)

    writer.close()

if __name__ == "__main__":
    # Prepare dataset for training.
    DEVICE = torch.device('cuda') if config.is_cuda else torch.device('cpu')
    dataset = PairDataset(config.data_path,
                          max_src_len=config.max_src_len,
                          max_tgt_len=config.max_tgt_len,
                          truncate_src=config.truncate_src,
                          truncate_tgt=config.truncate_tgt)
    val_dataset = PairDataset(config.val_data_path,
                              max_src_len=config.max_src_len,
                              max_tgt_len=config.max_tgt_len,
                              truncate_src=config.truncate_src,
                              truncate_tgt=config.truncate_tgt)

    vocab = dataset.build_vocab(embed_file=config.embed_file)

    train(dataset, val_dataset, vocab, start_epoch=0)