Ejemplo n.º 1
0
 def test_dropout_WITH_PROB_ZERO(self):
     rnn = EncoderRNN(self.vocab_size, 50, 16, dropout_p=0)
     for param in rnn.parameters():
         param.data.uniform_(-1, 1)
     output1, _ = rnn(self.input_var, self.lengths)
     output2, _ = rnn(self.input_var, self.lengths)
     self.assertTrue(torch.equal(output1.data, output2.data))
Ejemplo n.º 2
0
def chat_with_latest(savepath=SAVE_PATH):
    model = load_latest_state_dict(savepath)

    attn_model = 'dot'
    #attn_model = 'general'
    #attn_model = 'concat'
    hidden_size = 500
    encoder_n_layers = 2
    decoder_n_layers = 2
    dropout = 0.1
    batch_size = 64

    voc = Voc(model['voc_dict']['name'])
    voc.__dict__ = model['voc_dict']

    embedding = nn.Embedding(voc.num_words, hidden_size)

    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)

    encoder.load_state_dict(model['en'])
    decoder.load_state_dict(model['de'])

    searcher = GreedySearchDecoder(encoder, decoder)
    evaluateInput(encoder, decoder, searcher, voc)
 def test_dropout_WITH_PROB_ZERO(self):
     rnn = EncoderRNN(self.dataset.input_vocab, 50, 16, dropout_p=0)
     for param in rnn.parameters():
         param.data.uniform_(-1, 1)
     batch = [[1, 2, 3], [1, 2], [1]]
     output1, _ = rnn(batch)
     output2, _ = rnn(batch)
     self.assertEqual(output1, output2)
Ejemplo n.º 4
0
def train():
    N_EPOCHS = 5
    output_size = 1
    save_dir = 'data/save/Adversarial_Discriminator/'

    attn_model = 'dot'
    hidden_size = 500
    encoder_n_layers = 2
    decoder_n_layers = 2
    dropout = 0.1

    seq2seqModel = load_latest_state_dict(savepath=SAVE_PATH_SEQ2SEQ)
    voc = Voc('name')
    voc.__dict__ = seq2seqModel['voc_dict']

    embedding = nn.Embedding(voc.num_words, hidden_size)
    model = Adversarial_Discriminator(hidden_size, output_size, embedding)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.BCELoss()

    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)

    encoder.load_state_dict(seq2seqModel['en'])
    decoder.load_state_dict(seq2seqModel['de'])
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    searcher = RLGreedySearchDecoder(encoder, decoder, voc)

    train_data = AlexaDataset('train.json',
                              rare_word_threshold=3)  # sorry cornell
    train_data.trimPairsToVocab(voc)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

    test_data = AlexaDataset('test_freq.json', rare_word_threshold=3)
    test_data.trimPairsToVocab(voc)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

    for epoch in range(1, N_EPOCHS + 1):
        test_AdversarialDiscriminatorOnLatestSeq2Seq(model, searcher,
                                                     test_loader, voc)
        loss = trainAdversarialDiscriminatorOnLatestSeq2Seq(
            model, searcher, voc, train_loader, criterion, optimizer,
            embedding, save_dir, epoch)

        if epoch % 1 == 0:
            torch.save(
                {
                    'iteration': epoch,
                    'model': model.state_dict(),
                    'opt': optimizer.state_dict(),
                    'loss': loss,
                    'voc_dict': voc.__dict__,
                    'embedding': embedding.state_dict()
                }, os.path.join(save_dir, '{}_{}.tar'.format(epoch, 'epochs')))
Ejemplo n.º 5
0
    def test_input_dropout_WITH_NON_ZERO_PROB(self):
        rnn = EncoderRNN(self.vocab_size, 50, 16, input_dropout_p=0.5)
        for param in rnn.parameters():
            param.data.uniform_(-1, 1)

        equal = True
        for _ in range(50):
            output1, _ = rnn(self.input_var, self.lengths)
            output2, _ = rnn(self.input_var, self.lengths)
            if not torch.equal(output1.data, output2.data):
                equal = False
                break
        self.assertFalse(equal)
    def test_dropout_WITH_NON_ZERO_PROB(self):
        rnn = EncoderRNN(self.dataset.input_vocab, 50, 16, dropout_p=0.5)
        for param in rnn.parameters():
            param.data.uniform_(-1, 1)
        batch = [[1, 2, 3], [1, 2], [1]]

        equal = True
        for _ in range(50):
            output1, _ = rnn(batch)
            output2, _ = rnn(batch)
            if output1 != output2:
                equal = False
                break
        self.assertFalse(equal)
Ejemplo n.º 7
0
    def test_dropout_WITH_NON_ZERO_PROB(self):
        # It's critical to set n_layer=2 here since dropout won't work
        # when the RNN only has one layer according to pytorch's doc
        rnn = EncoderRNN(self.vocab_size, 50, 16, n_layers=2, dropout_p=0.5)
        for param in rnn.parameters():
            param.data.uniform_(-1, 1)

        equal = True
        for _ in range(50):
            output1, _ = rnn(self.input_var, self.lengths)
            output2, _ = rnn(self.input_var, self.lengths)
            if not torch.equal(output1.data, output2.data):
                equal = False
                break
        self.assertFalse(equal)
Ejemplo n.º 8
0
def main():
    vocabulary = pickle.load(open(f'{EMBEDDING_DIR}/vocab.pkl', 'rb'))
    print("Number of words in data set: %d" % len(vocabulary))
    embedding_matrix, vocab_to_index = map_vocab_to_embedding(vocabulary)

    hidden_size = 600
    encoder = EncoderRNN(embedding_matrix, hidden_size)
    decoder = DecoderRNN(embedding_matrix, hidden_size)
    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()
    train_file = open(os.path.join(EMBEDDING_DIR, "train.pkl"), 'rb')
    train_data = pickle.load(train_file)
    train_file.close()
    n_iters = 2000
    train(train_data, vocab_to_index, vocabulary, encoder, decoder, n_iters)
Ejemplo n.º 9
0
def initialize_model(opt, src, tgt, train):
    # build vocabulary
    src.build_vocab(train, max_size=opt.src_vocab)
    tgt.build_vocab(train, max_size=opt.tgt_vocab)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # Initialize model
    hidden_size = opt.hidden_size
    decoder_hidden_size = hidden_size * 2 if opt.bidirectional else hidden_size
    encoder = EncoderRNN(len(src.vocab), opt.max_len, hidden_size, opt.embedding_size,
                         dropout_p=opt.dropout_p_encoder,
                         n_layers=opt.n_layers,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         variable_lengths=True)
    decoder = DecoderRNN(len(tgt.vocab), opt.max_len, decoder_hidden_size,
                         dropout_p=opt.dropout_p_decoder,
                         n_layers=opt.n_layers,
                         attention_method=opt.attention_method,
                         full_focus=opt.full_focus,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         eos_id=tgt.eos_id, sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    seq2seq.to(device)

    return seq2seq, input_vocab, output_vocab
Ejemplo n.º 10
0
 def test_pretrained_embedding(self):
     hidden_size = 16
     pretrained_embedding = torch.randn(self.vocab_size, hidden_size)
     rnn = EncoderRNN(self.vocab_size, 50, hidden_size,
                      embedding=pretrained_embedding,
                      update_embedding=False)
     self.assertTrue(torch.equal(pretrained_embedding, rnn.embedding.weight.data))
     self.assertFalse(rnn.embedding.weight.requires_grad)
Ejemplo n.º 11
0
def load(file_path, dataset):
    checkpoint = torch.load(file_path, map_location=device)
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    embedding_sd = checkpoint['embedding']
    voc = Voc(checkpoint['voc_dict']['name'])
    voc.__dict__ = checkpoint['voc_dict']

    print('Building encoder and decoder ...')
    # Initialize word embeddings
    embedding = nn.Embedding(voc.num_words, hidden_size)
    if loadFilename:
        embedding.load_state_dict(embedding_sd)
    # Initialize encoder & decoder models
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)
    if loadFilename:
        encoder.load_state_dict(encoder_sd)
        decoder.load_state_dict(decoder_sd)
    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    print('Models built and ready to go!')
    encoder.eval()
    decoder.eval()

    policy = RLGreedySearchDecoder(encoder, decoder, voc)
    env = Env(voc, dataset)
    return policy, env
def init_model():
    if args.load_checkpoint is not None:
        logging.info("loading checkpoint from {}".format(
            os.path.join(args.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME,
                         args.load_checkpoint)))
        checkpoint_path = os.path.join(args.expt_dir,
                                       Checkpoint.CHECKPOINT_DIR_NAME,
                                       args.load_checkpoint)
        checkpoint = Checkpoint.load(checkpoint_path)
        model = checkpoint.model
        input_vocab = checkpoint.input_vocab
        output_vocab = checkpoint.output_vocab
    else:
        # build the vocabulary index and embedding
        spk.build_vocab(train, vectors="glove.6B.100d")
        src.build_vocab(train,
                        max_size=args.vocab_size,
                        vectors="glove.6B.100d")
        tgt.build_vocab(train,
                        max_size=args.vocab_size,
                        vectors="glove.6B.100d")
        input_vocab, output_vocab = src.vocab, tgt.vocab

        # Initialize model
        encoder = EncoderRNN(
            vocab_size=len(input_vocab),
            max_len=args.max_len,
            vectors=input_vocab.vectors if args.embedding else None,
            input_dropout_p=args.input_dropout_p,
            dropout_p=args.dropout_p,
            hidden_size=args.hidden_size,
            bidirectional=args.bidirectional,
            variable_lengths=True)

        decoder = SpkDecoderRNN(
            num_spk=args.num_spk,
            spk_embed_size=args.spk_embed_size,
            vocab_size=len(output_vocab),
            max_len=args.max_len,
            hidden_size=args.hidden_size *
            2 if args.bidirectional else args.hidden_size,
            dropout_p=args.dropout_p,
            input_dropout_p=args.input_dropout_p,
            vectors=input_vocab.vectors if args.embedding else None,
            use_attention=True,
            bidirectional=args.bidirectional,
            eos_id=tgt.eos_id,
            sos_id=tgt.sos_id)
        model = SpkSeq2seq(encoder, decoder)
        if torch.cuda.is_available():
            model.cuda()

        for param in model.parameters():
            param.data.uniform_(-0.08, 0.08)

    return model, input_vocab, output_vocab
Ejemplo n.º 13
0
    def __init__(self,
                 data_path,
                 model_save_path,
                 model_load_path,
                 hidden_size=32,
                 max_vocab=4000,
                 device='cuda'):
        self.src = SourceField()
        self.tgt = TargetField()
        self.max_length = 90
        self.data_path = data_path
        self.model_save_path = model_save_path
        self.model_load_path = model_load_path

        def len_filter(example):
            return len(example.src) <= self.max_length and len(
                example.tgt) <= self.max_length

        self.trainset = torchtext.data.TabularDataset(
            path=os.path.join(self.data_path, 'train'),
            format='tsv',
            fields=[('src', self.src), ('tgt', self.tgt)],
            filter_pred=len_filter)
        self.devset = torchtext.data.TabularDataset(path=os.path.join(
            self.data_path, 'eval'),
                                                    format='tsv',
                                                    fields=[('src', self.src),
                                                            ('tgt', self.tgt)],
                                                    filter_pred=len_filter)
        self.src.build_vocab(self.trainset, max_size=max_vocab)
        self.tgt.build_vocab(self.trainset, max_size=max_vocab)
        weight = torch.ones(len(self.tgt.vocab))
        pad = self.tgt.vocab.stoi[self.tgt.pad_token]
        self.loss = Perplexity(weight, pad)
        self.loss.cuda()
        self.optimizer = None
        self.hidden_size = hidden_size
        self.bidirectional = True
        encoder = EncoderRNN(len(self.src.vocab),
                             self.max_length,
                             self.hidden_size,
                             bidirectional=self.bidirectional,
                             variable_lengths=True)
        decoder = DecoderRNN(len(self.tgt.vocab),
                             self.max_length,
                             self.hidden_size *
                             2 if self.bidirectional else self.hidden_size,
                             dropout_p=0.2,
                             use_attention=True,
                             bidirectional=self.bidirectional,
                             eos_id=self.tgt.eos_id,
                             sos_id=self.tgt.sos_id)
        self.device = device
        self.seq2seq = Seq2seq(encoder, decoder).cuda()
        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
Ejemplo n.º 14
0
def build_model(src, tgt, hidden_size, mini_batch_size, bidirectional, dropout,
                attention, init_value):
    EXPERIMENT.param("Hidden", hidden_size)
    EXPERIMENT.param("Bidirectional", bidirectional)
    EXPERIMENT.param("Dropout", dropout)
    EXPERIMENT.param("Attention", attention)
    EXPERIMENT.param("Mini-batch", mini_batch_size)
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    encoder = EncoderRNN(len(src.vocab),
                         MAX_LEN,
                         hidden_size,
                         rnn_cell="lstm",
                         bidirectional=bidirectional,
                         dropout_p=dropout,
                         variable_lengths=False)
    decoder = DecoderRNN(
        len(tgt.vocab),
        MAX_LEN,
        hidden_size,  # * 2 if bidirectional else hidden_size,
        rnn_cell="lstm",
        use_attention=attention,
        eos_id=tgt.eos_id,
        sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    using_cuda = False
    if torch.cuda.is_available():
        using_cuda = True
        encoder.cuda()
        decoder.cuda()
        seq2seq.cuda()
        loss.cuda()
    EXPERIMENT.param("CUDA", using_cuda)
    for param in seq2seq.parameters():
        param.data.uniform_(-init_value, init_value)

    trainer = SupervisedTrainer(loss=loss,
                                batch_size=mini_batch_size,
                                checkpoint_every=5000,
                                random_seed=42,
                                print_every=1000)
    return seq2seq, trainer
Ejemplo n.º 15
0
    def __init__(self, args):
        super(IPComm_listener, self).__init__()
        self.vocab_size = 10
        self.max_len = 5
        self.hidden_size = args.comm_embed_dim
        self.listener = EncoderRNN(self.vocab_size,
                                   self.max_len,
                                   self.hidden_size,
                                   rnn_cell='gru')
        self.reconstruct_fc = nn.Linear(self.hidden_size, args.comm_embed_dim)

        self.fc = nn.Linear(args.comm_embed_dim, args.comm_embed_dim)
Ejemplo n.º 16
0
    def setUpClass(self):
        self.test_wd = os.getcwd()
        self.dataset = Dataset(path=os.path.join(self.test_wd,'tests/data/eng-fra.txt'),
                               src_max_len=50, tgt_max_len=50, src_max_vocab=50000, tgt_max_vocab=50000)
        self.encoder = EncoderRNN(self.dataset.input_vocab,max_len=10, hidden_size=10, rnn_cell='lstm')
        self.decoder = DecoderRNN(self.dataset.output_vocab, max_len=10, hidden_size=10, rnn_cell='lstm')
        self.seq2seq = Seq2seq(self.encoder,self.decoder)
        if torch.cuda.is_available():
            self.seq2seq.cuda()
        self.mock_seq2seq = Seq2seq(self.encoder, self.decoder)

        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
Ejemplo n.º 17
0
def loadModel(hidden_size=hidden_size,
              encoder_n_layers=encoder_n_layers,
              decoder_n_layers=decoder_n_layers,
              dropout=dropout,
              attn_model=attn_model,
              learning_rate=learning_rate,
              decoder_learning_ratio=decoder_learning_ratio,
              directory=SAVE_PATH):
    state_dict = load_latest_state_dict(directory)
    episode = state_dict['iteration']
    encoder_sd = state_dict['en']
    decoder_sd = state_dict['de']
    encoder_optimizer_sd = state_dict['en_opt']
    decoder_optimizer_sd = state_dict['de_opt']
    embedding_sd = state_dict['embedding']

    voc = Voc('placeholder_name')
    voc.__dict__ = state_dict['voc_dict']

    print('Building encoder and decoder ...')
    # Initialize word embeddings
    embedding = nn.Embedding(voc.num_words, hidden_size)
    embedding.load_state_dict(embedding_sd)
    embedding.to(device)
    # Initialize encoder & decoder models
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    print('Models built and ready to go!')

    # Initialize optimizers
    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=learning_rate * decoder_learning_ratio)
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

    if device == 'cuda':
        # If you have cuda, configure cuda to call
        for state in encoder_optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()

        for state in decoder_optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()
    print('Optimizers built and ready to go!')

    return episode, encoder, decoder, encoder_optimizer, decoder_optimizer, voc
Ejemplo n.º 18
0
    def setUpClass(self):
        test_path = os.path.dirname(os.path.realpath(__file__))
        src = SourceField()
        trg = TargetField()
        dataset = torchtext.data.TabularDataset(
            path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv',
            fields=[('src', src), ('trg', trg)],
        )
        src.build_vocab(dataset)
        trg.build_vocab(dataset)

        encoder = EncoderRNN(len(src.vocab), 10, 10, rnn_cell='lstm')
        decoder = DecoderRNN(len(trg.vocab), 10, 10, trg.sos_id, trg.eos_id, rnn_cell='lstm')
        seq2seq = Seq2seq(encoder, decoder)
        self.predictor = Predictor(seq2seq, src.vocab, trg.vocab)
Ejemplo n.º 19
0
 def __init__(
     self,
     vocabulary_size,
     embedding_size,
     hidden_state_size,
     start_label,
     end_label,
     pad_label,
     slk_parser,
     MAX_LENGTH=500,
     dropout_p=0.1,
     n_layer=3,
 ):
     super().__init__()
     self.embedding = nn.Embedding(vocabulary_size, embedding_size)
     self.sample = False
     self.dropout_p = dropout_p
     self.encoder = EncoderRNN(vocab_size=vocabulary_size,
                               max_len=MAX_LENGTH,
                               input_size=embedding_size,
                               hidden_size=hidden_state_size // 2,
                               n_layers=n_layer,
                               bidirectional=True,
                               rnn_cell='lstm',
                               input_dropout_p=self.dropout_p,
                               dropout_p=self.dropout_p,
                               variable_lengths=False,
                               embedding=None,
                               update_embedding=True)
     self.decoder = DecoderRNN(vocab_size=vocabulary_size,
                               max_len=MAX_LENGTH,
                               hidden_size=hidden_state_size,
                               sos_id=start_label,
                               eos_id=end_label,
                               n_layers=n_layer,
                               rnn_cell='lstm',
                               bidirectional=False,
                               input_dropout_p=self.dropout_p,
                               dropout_p=self.dropout_p,
                               use_attention=True)
     self.is_copy_output = nn.Linear(hidden_state_size, 1)
     self.grammar_mask_output = MaskOutput(hidden_state_size,
                                           vocabulary_size)
     self.decoder_start = torch.ones(1, 1) * start_label
     self.pad_label = pad_label
     self.MAX_LENGTH = MAX_LENGTH
     self.num_layers = n_layer
Ejemplo n.º 20
0
    def setUp(self):
        test_path = os.path.dirname(os.path.realpath(__file__))
        src = SourceField()
        tgt = TargetField()
        self.dataset = torchtext.data.TabularDataset(
            path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv',
            fields=[('src', src), ('tgt', tgt)],
        )
        src.build_vocab(self.dataset)
        tgt.build_vocab(self.dataset)

        encoder = EncoderRNN(len(src.vocab), 10, 10, rnn_cell='lstm')
        decoder = DecoderRNN(len(tgt.vocab), 10, 10, tgt.sos_id, tgt.eos_id, rnn_cell='lstm')
        self.seq2seq = Seq2seq(encoder, decoder)

        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
Ejemplo n.º 21
0
    def setUpClass(self):
        self.test_wd = os.getcwd()
        self.dataset = Dataset(path=os.path.join(self.test_wd,'tests/data/eng-fra.txt'),
                               src_max_len=50, tgt_max_len=50, src_max_vocab=50000, tgt_max_vocab=50000)
        self.encoder = EncoderRNN(self.dataset.input_vocab,max_len=10, hidden_size=10, rnn_cell='lstm')
        self.decoder = DecoderRNN(self.dataset.output_vocab, max_len=10, hidden_size=10, rnn_cell='lstm')
        self.seq2seq = Seq2seq(self.encoder,self.decoder)
        self.mock_seq2seq = Seq2seq(self.encoder, self.decoder)

        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        if not os.path.exists(os.path.join(self.test_wd,'checkpoints')):
            os.mkdir(os.path.join(self.test_wd,'checkpoints'))

        self.seq2seq.save(os.path.join(self.test_wd,'checkpoints'))
        self.mock_seq2seq.load(os.path.join(self.test_wd, 'checkpoints'))
Ejemplo n.º 22
0
def initialize_model(
    train,
    input_vocab,
    output_vocab,
    max_len=10,
    hidden_size=256,
    dropout_p=0.5,
    bidirectional=True,
    n_beam=5,
):
    # Initialize model
    encoder = EncoderRNN(
        len(input_vocab),
        max_len,
        hidden_size,
        bidirectional=bidirectional,
        variable_lengths=True,
    )

    decoder = DecoderRNN(
        len(output_vocab),
        max_len,
        hidden_size * (2 if bidirectional else 1),
        dropout_p=dropout_p,
        use_attention=True,
        bidirectional=bidirectional,
        eos_id=train.tgt_field.eos_id,
        sos_id=train.tgt_field.sos_id,
    )
    #     decoder = TopKDecoder(decoder ,n_beam)
    seq2seq = Seq2seq(encoder, decoder)
    if torch.cuda.is_available():
        seq2seq = seq2seq.cuda()

    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

    # Optimizer and learning rate scheduler can be customized by
    # explicitly constructing the objects and pass to the trainer
    optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
                          max_grad_norm=5)
    scheduler = StepLR(optimizer.optimizer, 1)
    optimizer.set_scheduler(scheduler)

    return seq2seq, optimizer, scheduler
Ejemplo n.º 23
0
    def get_seq2seq():
        decoder = DecoderRNN(len(field.vocab.stoi), args.max_len,
                             args.hidden_size * 2 if bidirectional else args.hidden_size,
                             n_layers=args.n_layers, rnn_cell=rnn_cell,
                             input_dropout_p=0.0, dropout_p=0.0, use_attention=use_attention,
                             bidirectional=bidirectional,
                             eos_id=field.vocab.stoi['<eos>'], sos_id=field.vocab.stoi['<sos>']).to(device)

        if tied:
            # compatibility with the older code
            nn.init.normal_(decoder.out.weight)

        encoder = EncoderRNN(len(field.vocab.stoi), args.max_len, args.hidden_size,
                             input_dropout_p=0.0, dropout_p=0.0,
                             n_layers=args.n_layers, bidirectional=bidirectional,
                             rnn_cell=rnn_cell, variable_lengths=True,
                             embedding=(decoder.out.weight if tied else None)).to(device)

        return Seq2seq(encoder, decoder)
Ejemplo n.º 24
0
    # loss = Perplexity(weight, pad)
    loss = NLLLoss(weight=weight, mask=pad, size_average=True)
    if torch.cuda.is_available():
        loss.cuda()

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        hidden_size = opt.word_lstm_dim
        bidirectional = opt.word_bidirect
        encoder = EncoderRNN(vocab_size=len(src.vocab),
                             max_len=max_len,
                             word_dim=opt.word_dim,
                             hidden_size=hidden_size,
                             input_dropout_p=opt.input_dropout,
                             bidirectional=bidirectional,
                             n_layers=1,
                             rnn_cell='gru',
                             variable_lengths=True)
        decoder = DecoderRNN(vocab_size=len(tgt.vocab),
                             max_len=max_len,
                             hidden_size=hidden_size *
                             2 if bidirectional else 1,
                             dropout_p=opt.dropout,
                             use_attention=True,
                             bidirectional=bidirectional,
                             n_layers=1,
                             rnn_cell='gru',
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id)
Ejemplo n.º 25
0
    # Prepare loss
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        # hidden_size=128
        hidden_size = 300
        bidirectional = True

        encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                             bidirectional=bidirectional, variable_lengths=True)
        decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else 1,
                             dropout_p=0.2, use_attention=True, bidirectional=bidirectional,
                             eos_id=tgt.eos_id, sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
            print(param.data)
        encoder.vectors_stats()
        # encoder.init_vectors(src.vocab.vectors)
        # for param in seq2seq.parameters():
        #     print(param.data)

        if torch.cuda.is_available():
            seq2seq.cuda()
Ejemplo n.º 26
0
def run_training(opt, default_data_dir, num_epochs=100):
    if opt.load_checkpoint is not None:
        logging.info("loading checkpoint from {}".format(
            os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)))
        checkpoint_path = os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)
        checkpoint = Checkpoint.load(checkpoint_path)
        seq2seq = checkpoint.model
        input_vocab = checkpoint.input_vocab
        output_vocab = checkpoint.output_vocab
    else:

        # Prepare dataset
        src = SourceField()
        tgt = TargetField()
        max_len = 50

        data_file = os.path.join(default_data_dir, opt.train_path, 'data.txt')

        logging.info("Starting new Training session on %s", data_file)

        def len_filter(example):
            return (len(example.src) <= max_len) and (len(example.tgt) <= max_len) \
                   and (len(example.src) > 0) and (len(example.tgt) > 0)

        train = torchtext.data.TabularDataset(
            path=data_file, format='json',
            fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
            filter_pred=len_filter
        )

        dev = None
        if opt.no_dev is False:
            dev_data_file = os.path.join(default_data_dir, opt.train_path, 'dev-data.txt')
            dev = torchtext.data.TabularDataset(
                path=dev_data_file, format='json',
                fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
                filter_pred=len_filter
            )

        src.build_vocab(train, max_size=50000)
        tgt.build_vocab(train, max_size=50000)
        input_vocab = src.vocab
        output_vocab = tgt.vocab

        # NOTE: If the source field name and the target field name
        # are different from 'src' and 'tgt' respectively, they have
        # to be set explicitly before any training or inference
        # seq2seq.src_field_name = 'src'
        # seq2seq.tgt_field_name = 'tgt'

        # Prepare loss
        weight = torch.ones(len(tgt.vocab))
        pad = tgt.vocab.stoi[tgt.pad_token]
        loss = Perplexity(weight, pad)
        if torch.cuda.is_available():
            logging.info("Yayyy We got CUDA!!!")
            loss.cuda()
        else:
            logging.info("No cuda available device found running on cpu")

        seq2seq = None
        optimizer = None
        if not opt.resume:
            hidden_size = 128
            decoder_hidden_size = hidden_size * 2
            logging.info("EncoderRNN Hidden Size: %s", hidden_size)
            logging.info("DecoderRNN Hidden Size: %s", decoder_hidden_size)
            bidirectional = True
            encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 variable_lengths=True)
            decoder = DecoderRNN(len(tgt.vocab), max_len, decoder_hidden_size,
                                 dropout_p=0, use_attention=True,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 eos_id=tgt.eos_id, sos_id=tgt.sos_id)

            seq2seq = Seq2seq(encoder, decoder)
            if torch.cuda.is_available():
                seq2seq.cuda()

            for param in seq2seq.parameters():
                param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.

        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        scheduler = StepLR(optimizer.optimizer, 1)
        optimizer.set_scheduler(scheduler)

        # train

        num_epochs = num_epochs
        batch_size = 32
        checkpoint_every = num_epochs / 10
        print_every = num_epochs / 100

        properties = dict(batch_size=batch_size,
                          checkpoint_every=checkpoint_every,
                          print_every=print_every, expt_dir=opt.expt_dir,
                          num_epochs=num_epochs,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        logging.info("Starting training with the following Properties %s", json.dumps(properties, indent=2))
        t = SupervisedTrainer(loss=loss, batch_size=num_epochs,
                              checkpoint_every=checkpoint_every,
                              print_every=print_every, expt_dir=opt.expt_dir)

        seq2seq = t.train(seq2seq, train,
                          num_epochs=num_epochs, dev_data=dev,
                          optimizer=optimizer,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        evaluator = Evaluator(loss=loss, batch_size=batch_size)

        if opt.no_dev is False:
            dev_loss, accuracy = evaluator.evaluate(seq2seq, dev)
            logging.info("Dev Loss: %s", dev_loss)
            logging.info("Accuracy: %s", dev_loss)

    beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 4))

    predictor = Predictor(beam_search, input_vocab, output_vocab)
    while True:
        try:
            seq_str = raw_input("Type in a source sequence:")
            seq = seq_str.strip().split()
            results = predictor.predict_n(seq, n=3)
            for i, res in enumerate(results):
                print('option %s: %s\n', i + 1, res)
        except KeyboardInterrupt:
            logging.info("Bye Bye")
            exit(0)
    # Prepare loss
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        hidden_size = 128
        bidirectional = True
        encoder = EncoderRNN(len(src.vocab),
                             max_len,
                             hidden_size,
                             bidirectional=bidirectional,
                             variable_lengths=True,
                             rnn_cell="lstm")
        decoder = DecoderRNN(len(tgt.vocab),
                             max_len,
                             hidden_size * 2 if bidirectional else hidden_size,
                             dropout_p=0.2,
                             use_attention=True,
                             bidirectional=bidirectional,
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id,
                             rnn_cell="lstm")
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()
Ejemplo n.º 28
0
 seq2seq = None
 optimizer = None
 if not opt.resume:
     # Initialize model
     hidden_size = config['encoder embed']
     # TODO is this ideal?
     feat_hidden_size = len(feats.vocab) // 2
     bidirectional = True
     encoder = EncoderRNN(
         len(src.vocab),
         feats.vocab,
         max_len,
         # TODO can we make these be different sizes?
         hidden_size,
         feat_hidden_size,
         # hidden_size, hidden_size,
         bidirectional=bidirectional,
         rnn_cell='LSTM',
         variable_lengths=True,
         n_layers=config['num layers']
         #,
         # features=feats
     )
     # pdb.set_trace()
     # if config['use_vecs']:
     #     decoder = VecDecoderRNN(len(tgt.vocab),
     #                             max_len,
     #                             hidden_size * 2 if bidirectional else hidden_size,
     #                             dropout_p=float(config['dropout']),
     #                             use_attention=True,
     #                             bidirectional=bidirectional,
Ejemplo n.º 29
0
    tgt_vocab = VocabField(tgt_vocab_list,
                           vocab_size=opt.tgt_vocab_size,
                           sos_token="<SOS>",
                           eos_token="<EOS>")
    pad_id = tgt_vocab.word2idx[tgt_vocab.pad_token]

    # Prepare loss
    weight = torch.ones(len(tgt_vocab.vocab))
    loss = Perplexity(weight, pad_id)
    loss.to(device)

    # Initialize model
    encoder = EncoderRNN(len(src_vocab.vocab),
                         opt.max_src_length,
                         embedding_size=opt.embedding_size,
                         rnn_cell=opt.rnn_cell,
                         n_layers=opt.n_hidden_layer,
                         hidden_size=opt.hidden_size,
                         bidirectional=opt.bidirectional,
                         variable_lengths=False)

    decoder = DecoderRNN(len(tgt_vocab.vocab),
                         opt.max_tgt_length,
                         embedding_size=opt.embedding_size,
                         rnn_cell=opt.rnn_cell,
                         n_layers=opt.n_hidden_layer,
                         hidden_size=opt.hidden_size *
                         2 if opt.bidirectional else opt.hidden_size,
                         bidirectional=opt.bidirectional,
                         dropout_p=0.2,
                         use_attention=opt.use_attn,
                         eos_id=tgt_vocab.word2idx[tgt_vocab.eos_token],
Ejemplo n.º 30
0
# 檢查Constants是否有誤
assert EN.vocab.stoi[Constants.BOS_WORD] == Constants.BOS
assert EN.vocab.stoi[Constants.EOS_WORD] == Constants.EOS
assert EN.vocab.stoi[Constants.PAD_WORD] == Constants.PAD
assert EN.vocab.stoi[Constants.UNK_WORD] == Constants.UNK

# ---------- init model ----------

try:
    G = load_model(opt.load_G_from)
except AttributeError:
    hidden_size = 512
    bidirectional = True
    encoder = EncoderRNN(len(EN.vocab), opt.max_len, hidden_size,
                         input_dropout_p=0, dropout_p=0, n_layers=1,
                         bidirectional=bidirectional, variable_lengths=True, rnn_cell='gru')
    decoder = DecoderRNN(len(EN.vocab), opt.max_len, hidden_size * 2 if bidirectional else 1, n_layers=1,
                         dropout_p=0.2, use_attention=True, bidirectional=bidirectional, rnn_cell='gru',
                         eos_id=Constants.EOS, sos_id=Constants.BOS)
    G = Seq2seq(encoder, decoder)
    for param in G.parameters():
        param.data.uniform_(-0.08, 0.08)

try:
    D = load_model(opt.load_D_from)
except AttributeError:
    D = BinaryClassifierCNN(len(EN.vocab),
                            embed_dim=opt.embed_dim,
                            num_kernel=opt.num_kernel,
                            kernel_sizes=opt.kernel_sizes,