def __init__(self,
                 vocab_size: int,
                 embedding_size: int,
                 n_hidden: int,
                 sos_token: int = 0,
                 eos_token: int = 1,
                 mask_token: int = 2,
                 max_output_length: int = 100,
                 rnn_cell: str = 'lstm') -> None:
        self.decoder = DecoderRNN(vocab_size,
                                  max_output_length,
                                  embedding_size,
                                  n_layers=n_hidden,
                                  rnn_cell=rnn_cell,
                                  use_attention=False,
                                  bidirectional=False,
                                  eos_id=eos_token,
                                  sos_id=sos_token)
        if torch.cuda.is_available(): self.decoder.cuda()

        self.rnn_cell = rnn_cell
        self.n_hidden = n_hidden
        self.embedding_size = embedding_size
        self.SOS_token = sos_token
        self.EOS_token = eos_token
        self.mask_token = mask_token
        self.max_output_length = max_output_length
        token_weights = torch.ones(vocab_size)
        if torch.cuda.is_available(): token_weights = token_weights.cuda()
        self.loss = NLLLoss(weight=token_weights, mask=mask_token)
        self.optimizer = None
Esempio n. 2
0
 def test_dropout_WITH_PROB_ZERO(self):
     rnn = DecoderRNN(self.dataset.output_vocab, 50, 16, dropout_p=0)
     for param in rnn.parameters():
         param.data.uniform_(-1, 1)
     batch = [[1, 2, 3], [1, 2], [1]]
     output1, _, _ = rnn(batch)
     output2, _, _ = rnn(batch)
     self.assertEqual(output1, output2)
Esempio n. 3
0
 def test_dropout_WITH_PROB_ZERO(self):
     rnn = DecoderRNN(self.vocab_size, 50, 16, 0, 1, dropout_p=0)
     for param in rnn.parameters():
         param.data.uniform_(-1, 1)
     output1, _, _ = rnn()
     output2, _, _ = rnn()
     for prob1, prob2 in zip(output1, output2):
         self.assertTrue(torch.equal(prob1.data, prob2.data))
Esempio n. 4
0
    def test_input_dropout_WITH_NON_ZERO_PROB(self):
        rnn = DecoderRNN(self.vocab_size, 50, 16, 0, 1, input_dropout_p=0.5)
        for param in rnn.parameters():
            param.data.uniform_(-1, 1)

        equal = True
        for _ in range(50):
            output1, _, _ = rnn()
            output2, _, _ = rnn()
            if not torch.equal(output1[0].data, output2[0].data):
                equal = False
                break
        self.assertFalse(equal)
Esempio n. 5
0
    def test_dropout_WITH_NON_ZERO_PROB(self):
        rnn = DecoderRNN(self.dataset.output_vocab, 50, 16, dropout_p=0.5)
        for param in rnn.parameters():
            param.data.uniform_(-1, 1)
        batch = [[1, 2, 3], [1, 2], [1]]

        equal = True
        for _ in range(50):
            output1, _, _ = rnn(batch)
            output2, _, _ = rnn(batch)
            if output1[0] != output2[0]:
                equal = False
                break
        self.assertFalse(equal)
def main():
    vocabulary = pickle.load(open(f'{EMBEDDING_DIR}/vocab.pkl', 'rb'))
    print("Number of words in data set: %d" % len(vocabulary))
    embedding_matrix, vocab_to_index = map_vocab_to_embedding(vocabulary)

    hidden_size = 600
    encoder = EncoderRNN(embedding_matrix, hidden_size)
    decoder = DecoderRNN(embedding_matrix, hidden_size)
    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()
    train_file = open(os.path.join(EMBEDDING_DIR, "train.pkl"), 'rb')
    train_data = pickle.load(train_file)
    train_file.close()
    n_iters = 2000
    train(train_data, vocab_to_index, vocabulary, encoder, decoder, n_iters)
Esempio n. 7
0
def build_model(tgt_field, max_len=50, hidden_size=100, bidirectional=False):
    print("building model...")
    vocab: torchtext.vocab.Vocab = tgt_field.vocab
    print("vocab: ", vocab.stoi)

    encoder = EncoderCNN2D()
    decoder = DecoderRNN(vocab_size=len(vocab),
                         max_len=max_len,
                         hidden_size=hidden_size *
                         2 if bidirectional else hidden_size,
                         dropout_p=0.2,
                         use_attention=True,
                         bidirectional=bidirectional,
                         eos_id=tgt_field.eos_id,
                         sos_id=tgt_field.sos_id,
                         rnn_cell='lstm')
    model_obj = Seq2seq(encoder, decoder)
    # if torch.cuda.is_available():
    #   model_obj.cuda()
    # for param in model_obj.parameters():
    #   init.xavier_uniform(param.data)
    for param in model_obj.parameters():
        param.data.uniform_(-0.08, 0.08)

    return model_obj
Esempio n. 8
0
def initialize_model(opt, src, tgt, train):
    # build vocabulary
    src.build_vocab(train, max_size=opt.src_vocab)
    tgt.build_vocab(train, max_size=opt.tgt_vocab)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # Initialize model
    hidden_size = opt.hidden_size
    decoder_hidden_size = hidden_size * 2 if opt.bidirectional else hidden_size
    encoder = EncoderRNN(len(src.vocab), opt.max_len, hidden_size, opt.embedding_size,
                         dropout_p=opt.dropout_p_encoder,
                         n_layers=opt.n_layers,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         variable_lengths=True)
    decoder = DecoderRNN(len(tgt.vocab), opt.max_len, decoder_hidden_size,
                         dropout_p=opt.dropout_p_decoder,
                         n_layers=opt.n_layers,
                         attention_method=opt.attention_method,
                         full_focus=opt.full_focus,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         eos_id=tgt.eos_id, sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    seq2seq.to(device)

    return seq2seq, input_vocab, output_vocab
Esempio n. 9
0
    def __init__(self,
                 data_path,
                 model_save_path,
                 model_load_path,
                 hidden_size=32,
                 max_vocab=4000,
                 device='cuda'):
        self.src = SourceField()
        self.tgt = TargetField()
        self.max_length = 90
        self.data_path = data_path
        self.model_save_path = model_save_path
        self.model_load_path = model_load_path

        def len_filter(example):
            return len(example.src) <= self.max_length and len(
                example.tgt) <= self.max_length

        self.trainset = torchtext.data.TabularDataset(
            path=os.path.join(self.data_path, 'train'),
            format='tsv',
            fields=[('src', self.src), ('tgt', self.tgt)],
            filter_pred=len_filter)
        self.devset = torchtext.data.TabularDataset(path=os.path.join(
            self.data_path, 'eval'),
                                                    format='tsv',
                                                    fields=[('src', self.src),
                                                            ('tgt', self.tgt)],
                                                    filter_pred=len_filter)
        self.src.build_vocab(self.trainset, max_size=max_vocab)
        self.tgt.build_vocab(self.trainset, max_size=max_vocab)
        weight = torch.ones(len(self.tgt.vocab))
        pad = self.tgt.vocab.stoi[self.tgt.pad_token]
        self.loss = Perplexity(weight, pad)
        self.loss.cuda()
        self.optimizer = None
        self.hidden_size = hidden_size
        self.bidirectional = True
        encoder = EncoderRNN(len(self.src.vocab),
                             self.max_length,
                             self.hidden_size,
                             bidirectional=self.bidirectional,
                             variable_lengths=True)
        decoder = DecoderRNN(len(self.tgt.vocab),
                             self.max_length,
                             self.hidden_size *
                             2 if self.bidirectional else self.hidden_size,
                             dropout_p=0.2,
                             use_attention=True,
                             bidirectional=self.bidirectional,
                             eos_id=self.tgt.eos_id,
                             sos_id=self.tgt.sos_id)
        self.device = device
        self.seq2seq = Seq2seq(encoder, decoder).cuda()
        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
Esempio n. 10
0
def build_model(src, tgt, hidden_size, mini_batch_size, bidirectional, dropout,
                attention, init_value):
    EXPERIMENT.param("Hidden", hidden_size)
    EXPERIMENT.param("Bidirectional", bidirectional)
    EXPERIMENT.param("Dropout", dropout)
    EXPERIMENT.param("Attention", attention)
    EXPERIMENT.param("Mini-batch", mini_batch_size)
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    encoder = EncoderRNN(len(src.vocab),
                         MAX_LEN,
                         hidden_size,
                         rnn_cell="lstm",
                         bidirectional=bidirectional,
                         dropout_p=dropout,
                         variable_lengths=False)
    decoder = DecoderRNN(
        len(tgt.vocab),
        MAX_LEN,
        hidden_size,  # * 2 if bidirectional else hidden_size,
        rnn_cell="lstm",
        use_attention=attention,
        eos_id=tgt.eos_id,
        sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    using_cuda = False
    if torch.cuda.is_available():
        using_cuda = True
        encoder.cuda()
        decoder.cuda()
        seq2seq.cuda()
        loss.cuda()
    EXPERIMENT.param("CUDA", using_cuda)
    for param in seq2seq.parameters():
        param.data.uniform_(-init_value, init_value)

    trainer = SupervisedTrainer(loss=loss,
                                batch_size=mini_batch_size,
                                checkpoint_every=5000,
                                random_seed=42,
                                print_every=1000)
    return seq2seq, trainer
Esempio n. 11
0
    def test_k_1(self):
        """ When k=1, the output of topk decoder should be the same as a normal decoder. """
        batch_size = 1
        eos = 1

        for _ in range(10):
            # Repeat the randomized test multiple times
            decoder = DecoderRNN(self.vocab_size, 50, 16, 0, eos)
            for param in decoder.parameters():
                param.data.uniform_(-1, 1)
            topk_decoder = TopKDecoder(decoder, 1)

            output, _, other = decoder(None)
            output_topk, _, other_topk = topk_decoder(None)

            self.assertEqual(len(output), len(output_topk))

            finished = [False] * batch_size
            seq_scores = [0] * batch_size

            for t_step in range(len(output)):
                score, _ = output[t_step].topk(1)
                symbols = other['sequence'][t_step]
                for b in range(batch_size):
                    seq_scores[b] += score[b].data[0]
                    symbol = symbols[b].data[0]
                    if not finished[b] and symbol == eos:
                        finished[b] = True
                        self.assertEqual(other_topk['length'][b], t_step + 1)
                        self.assertTrue(
                            np.isclose(seq_scores[b],
                                       other_topk['score'][b][0]))
                    if not finished[b]:
                        symbol_topk = other_topk['topk_sequence'][t_step][
                            b].data[0][0]
                        self.assertEqual(symbol, symbol_topk)
                        self.assertTrue(
                            torch.equal(output[t_step].data,
                                        output_topk[t_step].data))
                if sum(finished) == batch_size:
                    break
Esempio n. 12
0
 def __init__(self, args):
     super(IPComm_speaker, self).__init__()
     self.vocab_size = 10
     self.max_len = 5
     self.hidden_size = args.comm_embed_dim
     self.eos_id = 1
     self.sos_id = 0
     self.speaker = DecoderRNN(self.vocab_size,
                               self.max_len,
                               self.hidden_size,
                               eos_id=self.eos_id,
                               sos_id=self.sos_id,
                               rnn_cell='gru')
    def setUpClass(self):
        self.test_wd = os.getcwd()
        self.dataset = Dataset(path=os.path.join(self.test_wd,'tests/data/eng-fra.txt'),
                               src_max_len=50, tgt_max_len=50, src_max_vocab=50000, tgt_max_vocab=50000)
        self.encoder = EncoderRNN(self.dataset.input_vocab,max_len=10, hidden_size=10, rnn_cell='lstm')
        self.decoder = DecoderRNN(self.dataset.output_vocab, max_len=10, hidden_size=10, rnn_cell='lstm')
        self.seq2seq = Seq2seq(self.encoder,self.decoder)
        if torch.cuda.is_available():
            self.seq2seq.cuda()
        self.mock_seq2seq = Seq2seq(self.encoder, self.decoder)

        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
Esempio n. 14
0
    def setUpClass(self):
        test_path = os.path.dirname(os.path.realpath(__file__))
        src = SourceField()
        trg = TargetField()
        dataset = torchtext.data.TabularDataset(
            path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv',
            fields=[('src', src), ('trg', trg)],
        )
        src.build_vocab(dataset)
        trg.build_vocab(dataset)

        encoder = EncoderRNN(len(src.vocab), 10, 10, rnn_cell='lstm')
        decoder = DecoderRNN(len(trg.vocab), 10, 10, trg.sos_id, trg.eos_id, rnn_cell='lstm')
        seq2seq = Seq2seq(encoder, decoder)
        self.predictor = Predictor(seq2seq, src.vocab, trg.vocab)
Esempio n. 15
0
 def __init__(
     self,
     vocabulary_size,
     embedding_size,
     hidden_state_size,
     start_label,
     end_label,
     pad_label,
     slk_parser,
     MAX_LENGTH=500,
     dropout_p=0.1,
     n_layer=3,
 ):
     super().__init__()
     self.embedding = nn.Embedding(vocabulary_size, embedding_size)
     self.sample = False
     self.dropout_p = dropout_p
     self.encoder = EncoderRNN(vocab_size=vocabulary_size,
                               max_len=MAX_LENGTH,
                               input_size=embedding_size,
                               hidden_size=hidden_state_size // 2,
                               n_layers=n_layer,
                               bidirectional=True,
                               rnn_cell='lstm',
                               input_dropout_p=self.dropout_p,
                               dropout_p=self.dropout_p,
                               variable_lengths=False,
                               embedding=None,
                               update_embedding=True)
     self.decoder = DecoderRNN(vocab_size=vocabulary_size,
                               max_len=MAX_LENGTH,
                               hidden_size=hidden_state_size,
                               sos_id=start_label,
                               eos_id=end_label,
                               n_layers=n_layer,
                               rnn_cell='lstm',
                               bidirectional=False,
                               input_dropout_p=self.dropout_p,
                               dropout_p=self.dropout_p,
                               use_attention=True)
     self.is_copy_output = nn.Linear(hidden_state_size, 1)
     self.grammar_mask_output = MaskOutput(hidden_state_size,
                                           vocabulary_size)
     self.decoder_start = torch.ones(1, 1) * start_label
     self.pad_label = pad_label
     self.MAX_LENGTH = MAX_LENGTH
     self.num_layers = n_layer
Esempio n. 16
0
    def setUp(self):
        test_path = os.path.dirname(os.path.realpath(__file__))
        src = SourceField()
        tgt = TargetField()
        self.dataset = torchtext.data.TabularDataset(
            path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv',
            fields=[('src', src), ('tgt', tgt)],
        )
        src.build_vocab(self.dataset)
        tgt.build_vocab(self.dataset)

        encoder = EncoderRNN(len(src.vocab), 10, 10, rnn_cell='lstm')
        decoder = DecoderRNN(len(tgt.vocab), 10, 10, tgt.sos_id, tgt.eos_id, rnn_cell='lstm')
        self.seq2seq = Seq2seq(encoder, decoder)

        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
Esempio n. 17
0
    def setUpClass(self):
        self.test_wd = os.getcwd()
        self.dataset = Dataset(path=os.path.join(self.test_wd,'tests/data/eng-fra.txt'),
                               src_max_len=50, tgt_max_len=50, src_max_vocab=50000, tgt_max_vocab=50000)
        self.encoder = EncoderRNN(self.dataset.input_vocab,max_len=10, hidden_size=10, rnn_cell='lstm')
        self.decoder = DecoderRNN(self.dataset.output_vocab, max_len=10, hidden_size=10, rnn_cell='lstm')
        self.seq2seq = Seq2seq(self.encoder,self.decoder)
        self.mock_seq2seq = Seq2seq(self.encoder, self.decoder)

        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        if not os.path.exists(os.path.join(self.test_wd,'checkpoints')):
            os.mkdir(os.path.join(self.test_wd,'checkpoints'))

        self.seq2seq.save(os.path.join(self.test_wd,'checkpoints'))
        self.mock_seq2seq.load(os.path.join(self.test_wd, 'checkpoints'))
Esempio n. 18
0
def initialize_model(
    train,
    input_vocab,
    output_vocab,
    max_len=10,
    hidden_size=256,
    dropout_p=0.5,
    bidirectional=True,
    n_beam=5,
):
    # Initialize model
    encoder = EncoderRNN(
        len(input_vocab),
        max_len,
        hidden_size,
        bidirectional=bidirectional,
        variable_lengths=True,
    )

    decoder = DecoderRNN(
        len(output_vocab),
        max_len,
        hidden_size * (2 if bidirectional else 1),
        dropout_p=dropout_p,
        use_attention=True,
        bidirectional=bidirectional,
        eos_id=train.tgt_field.eos_id,
        sos_id=train.tgt_field.sos_id,
    )
    #     decoder = TopKDecoder(decoder ,n_beam)
    seq2seq = Seq2seq(encoder, decoder)
    if torch.cuda.is_available():
        seq2seq = seq2seq.cuda()

    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

    # Optimizer and learning rate scheduler can be customized by
    # explicitly constructing the objects and pass to the trainer
    optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
                          max_grad_norm=5)
    scheduler = StepLR(optimizer.optimizer, 1)
    optimizer.set_scheduler(scheduler)

    return seq2seq, optimizer, scheduler
Esempio n. 19
0
 def __init__(self, zh_max_len, zh_hidden, dec_layers, input_dropout_p, dropout_p, beam_size, zh_embedding_size):
     
     super(Dec, self).__init__()
     
     self.dec_rnn = DecoderRNN(vocab_size = len(transform.zh_voc),
                               max_len = zh_max_len,
                               embedding_size = zh_embedding_size,
                               hidden_size = zh_hidden,
                               sos_id = transform.zh_go_id,
                               eos_id = transform.zh_eos_id, 
                               n_layers = dec_layers,
                               rnn_cell='lstm',
                               bidirectional=True,
                               input_dropout_p = input_dropout_p,
                               dropout_p=dropout_p,
                               use_attention=True)
     
     self.beam_dec = TopKDecoder(self.dec_rnn, beam_size)
Esempio n. 20
0
 bidirectional = opt.word_bidirect
 encoder = EncoderRNN(vocab_size=len(src.vocab),
                      max_len=max_len,
                      word_dim=opt.word_dim,
                      hidden_size=hidden_size,
                      input_dropout_p=opt.input_dropout,
                      bidirectional=bidirectional,
                      n_layers=1,
                      rnn_cell='gru',
                      variable_lengths=True)
 decoder = DecoderRNN(vocab_size=len(tgt.vocab),
                      max_len=max_len,
                      hidden_size=hidden_size *
                      2 if bidirectional else 1,
                      dropout_p=opt.dropout,
                      use_attention=True,
                      bidirectional=bidirectional,
                      n_layers=1,
                      rnn_cell='gru',
                      eos_id=tgt.eos_id,
                      sos_id=tgt.sos_id)
 seq2seq = Seq2seq(encoder, decoder)
 for param in seq2seq.parameters():
     param.data.uniform_(-0.08, 0.08)
     print(param.data[0:3])
 _, _, norm_val = encoder.vectors_stats()
 encoder.init_vectors(src.vocab.vectors)
 # encoder.scale_vectors(0.08)
 encoder.normalize_vectors(norm_val)
 encoder.vectors_stats()
 for param in seq2seq.parameters():
Esempio n. 21
0
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        # hidden_size=128
        hidden_size = 300
        bidirectional = True

        encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                             bidirectional=bidirectional, variable_lengths=True)
        decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else 1,
                             dropout_p=0.2, use_attention=True, bidirectional=bidirectional,
                             eos_id=tgt.eos_id, sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
            print(param.data)
        encoder.vectors_stats()
        # encoder.init_vectors(src.vocab.vectors)
        # for param in seq2seq.parameters():
        #     print(param.data)

        if torch.cuda.is_available():
            seq2seq.cuda()

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
Esempio n. 22
0
def run_training(opt, default_data_dir, num_epochs=100):
    if opt.load_checkpoint is not None:
        logging.info("loading checkpoint from {}".format(
            os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)))
        checkpoint_path = os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)
        checkpoint = Checkpoint.load(checkpoint_path)
        seq2seq = checkpoint.model
        input_vocab = checkpoint.input_vocab
        output_vocab = checkpoint.output_vocab
    else:

        # Prepare dataset
        src = SourceField()
        tgt = TargetField()
        max_len = 50

        data_file = os.path.join(default_data_dir, opt.train_path, 'data.txt')

        logging.info("Starting new Training session on %s", data_file)

        def len_filter(example):
            return (len(example.src) <= max_len) and (len(example.tgt) <= max_len) \
                   and (len(example.src) > 0) and (len(example.tgt) > 0)

        train = torchtext.data.TabularDataset(
            path=data_file, format='json',
            fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
            filter_pred=len_filter
        )

        dev = None
        if opt.no_dev is False:
            dev_data_file = os.path.join(default_data_dir, opt.train_path, 'dev-data.txt')
            dev = torchtext.data.TabularDataset(
                path=dev_data_file, format='json',
                fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
                filter_pred=len_filter
            )

        src.build_vocab(train, max_size=50000)
        tgt.build_vocab(train, max_size=50000)
        input_vocab = src.vocab
        output_vocab = tgt.vocab

        # NOTE: If the source field name and the target field name
        # are different from 'src' and 'tgt' respectively, they have
        # to be set explicitly before any training or inference
        # seq2seq.src_field_name = 'src'
        # seq2seq.tgt_field_name = 'tgt'

        # Prepare loss
        weight = torch.ones(len(tgt.vocab))
        pad = tgt.vocab.stoi[tgt.pad_token]
        loss = Perplexity(weight, pad)
        if torch.cuda.is_available():
            logging.info("Yayyy We got CUDA!!!")
            loss.cuda()
        else:
            logging.info("No cuda available device found running on cpu")

        seq2seq = None
        optimizer = None
        if not opt.resume:
            hidden_size = 128
            decoder_hidden_size = hidden_size * 2
            logging.info("EncoderRNN Hidden Size: %s", hidden_size)
            logging.info("DecoderRNN Hidden Size: %s", decoder_hidden_size)
            bidirectional = True
            encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 variable_lengths=True)
            decoder = DecoderRNN(len(tgt.vocab), max_len, decoder_hidden_size,
                                 dropout_p=0, use_attention=True,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 eos_id=tgt.eos_id, sos_id=tgt.sos_id)

            seq2seq = Seq2seq(encoder, decoder)
            if torch.cuda.is_available():
                seq2seq.cuda()

            for param in seq2seq.parameters():
                param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.

        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        scheduler = StepLR(optimizer.optimizer, 1)
        optimizer.set_scheduler(scheduler)

        # train

        num_epochs = num_epochs
        batch_size = 32
        checkpoint_every = num_epochs / 10
        print_every = num_epochs / 100

        properties = dict(batch_size=batch_size,
                          checkpoint_every=checkpoint_every,
                          print_every=print_every, expt_dir=opt.expt_dir,
                          num_epochs=num_epochs,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        logging.info("Starting training with the following Properties %s", json.dumps(properties, indent=2))
        t = SupervisedTrainer(loss=loss, batch_size=num_epochs,
                              checkpoint_every=checkpoint_every,
                              print_every=print_every, expt_dir=opt.expt_dir)

        seq2seq = t.train(seq2seq, train,
                          num_epochs=num_epochs, dev_data=dev,
                          optimizer=optimizer,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        evaluator = Evaluator(loss=loss, batch_size=batch_size)

        if opt.no_dev is False:
            dev_loss, accuracy = evaluator.evaluate(seq2seq, dev)
            logging.info("Dev Loss: %s", dev_loss)
            logging.info("Accuracy: %s", dev_loss)

    beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 4))

    predictor = Predictor(beam_search, input_vocab, output_vocab)
    while True:
        try:
            seq_str = raw_input("Type in a source sequence:")
            seq = seq_str.strip().split()
            results = predictor.predict_n(seq, n=3)
            for i, res in enumerate(results):
                print('option %s: %s\n', i + 1, res)
        except KeyboardInterrupt:
            logging.info("Bye Bye")
            exit(0)
Esempio n. 23
0
 if bidirectional:
     hidden_size = hidden_size * 2
 if config['use_vecs']:
     # aug_size = len(train_vecs[0][0])
     aug_size = vectors.vector_size
 else:
     # aug_size = 0
     aug_size = feat_hidden_size
 # pdb.set_trace()
 decoder = DecoderRNN(len(tgt.vocab),
                      max_len,
                      feat_hidden_size,
                      hidden_size=hidden_size,
                      aug_size=aug_size,
                      dropout_p=float(config['dropout']),
                      input_dropout_p=float(config['dropout']),
                      use_attention=True,
                      bidirectional=bidirectional,
                      rnn_cell='LSTM',
                      eos_id=tgt.eos_id,
                      sos_id=tgt.sos_id,
                      n_layers=config['num layers'])
 # if torch.cuda.is_available():
 #     encoder.cuda()
 #     decoder.cuda()
 # topk_decoder = TopKDecoder(decoder, 3)
 seq2seq = Seq2seq(encoder, decoder)
 # seq2seq = Seq2seq(encoder, topk_decoder)
 if torch.cuda.is_available():
     # pdb.set_trace()
     # seq2seq.to(DEVICE)
Esempio n. 24
0
    # Initialize model
    encoder = EncoderRNN(len(src_vocab.vocab),
                         opt.max_src_length,
                         embedding_size=opt.embedding_size,
                         rnn_cell=opt.rnn_cell,
                         n_layers=opt.n_hidden_layer,
                         hidden_size=opt.hidden_size,
                         bidirectional=opt.bidirectional,
                         variable_lengths=False)

    decoder = DecoderRNN(len(tgt_vocab.vocab),
                         opt.max_tgt_length,
                         embedding_size=opt.embedding_size,
                         rnn_cell=opt.rnn_cell,
                         n_layers=opt.n_hidden_layer,
                         hidden_size=opt.hidden_size *
                         2 if opt.bidirectional else opt.hidden_size,
                         bidirectional=opt.bidirectional,
                         dropout_p=0.2,
                         use_attention=opt.use_attn,
                         eos_id=tgt_vocab.word2idx[tgt_vocab.eos_token],
                         sos_id=tgt_vocab.word2idx[tgt_vocab.sos_token])
    seq2seq = Seq2seq(encoder, decoder)
    seq2seq.to(device)

    if opt.resume and not opt.load_checkpoint:
        last_checkpoint = get_last_checkpoint(opt.model_dir)
        if last_checkpoint:
            opt.load_checkpoint = os.path.join(opt.model_dir, last_checkpoint)
            opt.skip_steps = int(last_checkpoint.strip('.pt').split('/')[-1])

    if opt.load_checkpoint:
Esempio n. 25
0
        hidden_size = 128
        bidirectional = True
        encoder = EncoderRNN(
            len(src.vocab),
            max_len,
            hidden_size,
            bidirectional=bidirectional,
            rnn_cell="lstm",
            variable_lengths=True,
        )
        decoder = DecoderRNN(
            len(tgt.vocab),
            max_len,
            hidden_size * 2,
            dropout_p=0.2,
            use_attention=True,
            bidirectional=bidirectional,
            rnn_cell="lstm",
            eos_id=tgt.eos_id,
            sos_id=tgt.sos_id,
        )
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

    # train
    t = SupervisedTrainer(
        loss=loss,
    weight = torch.ones(len(tgt_vocab.vocab))
    loss = Perplexity(weight, pad_id)
    loss.to(device)

    # Initialize model
    encoder = EncoderRNN(len(src_vocab.vocab),
                         opt.max_src_length,
                         hidden_size=opt.hidden_size,
                         bidirectional=opt.bidirectional,
                         variable_lengths=False)

    decoder = DecoderRNN(len(tgt_vocab.vocab),
                         opt.max_tgt_length,
                         hidden_size=opt.hidden_size *
                         2 if opt.bidirectional else opt.hidden_size,
                         dropout_p=0.2,
                         use_attention=opt.use_attn,
                         bidirectional=opt.bidirectional,
                         eos_id=tgt_vocab.word2idx[tgt_vocab.eos_token],
                         sos_id=tgt_vocab.word2idx[tgt_vocab.sos_token])
    seq2seq = Seq2seq(encoder, decoder)
    seq2seq.to(device)

    if opt.resume and not opt.load_checkpoint:
        last_checkpoint = get_last_checkpoint(opt.model_dir)
        if last_checkpoint:
            opt.load_checkpoint = os.path.join(opt.model_dir, last_checkpoint)
            opt.skip_steps = int(last_checkpoint.strip('.pt').split('/')[-1])

    if opt.load_checkpoint:
        seq2seq.load_state_dict(torch.load(opt.load_checkpoint))
Esempio n. 27
0
    dataset = Dataset(opt.train_path, src_max_len=50, tgt_max_len=50)
    input_vocab = dataset.input_vocab
    output_vocab = dataset.output_vocab

    dev_set = Dataset(opt.dev_path,
                      src_max_len=50,
                      tgt_max_len=50,
                      src_vocab=input_vocab,
                      tgt_vocab=output_vocab)

    # Prepare model
    hidden_size = 128
    encoder = EncoderRNN(input_vocab, dataset.src_max_len, hidden_size)
    decoder = DecoderRNN(output_vocab,
                         dataset.tgt_max_len,
                         hidden_size,
                         dropout_p=0.2,
                         use_attention=True)
    seq2seq = Seq2seq(encoder, decoder)

    if opt.resume:
        print("resuming training")
        latest_checkpoint = Checkpoint.get_latest_checkpoint(opt.expt_dir)
        seq2seq.load(latest_checkpoint)
    else:
        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

    # Prepare loss
    weight = torch.ones(output_vocab.get_vocab_size())
    mask = output_vocab.MASK_token_id
Esempio n. 28
0
def train():
    src = SourceField(sequential=True,
                      tokenize=lambda x: [i for i in jieba.lcut(x)])
    tgt = TargetField(sequential=True,
                      tokenize=lambda x: [i for i in jieba.lcut(x)])
    max_len = 50

    def len_filter(example):
        return len(example.src) <= max_len and len(example.tgt) <= max_len

    train = torchtext.data.TabularDataset(path=opt.train_path,
                                          format='csv',
                                          fields=[('src', src), ('tgt', tgt)],
                                          filter_pred=len_filter)
    dev = torchtext.data.TabularDataset(path=opt.dev_path,
                                        format='csv',
                                        fields=[('src', src), ('tgt', tgt)],
                                        filter_pred=len_filter)

    src.build_vocab(train, max_size=50000)
    tgt.build_vocab(train, max_size=50000)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # NOTE: If the source field name and the target field name
    # are different from 'src' and 'tgt' respectively, they have
    # to be set explicitly before any training or inference
    # seq2seq.src_field_name = 'src'
    # seq2seq.tgt_field_name = 'tgt'

    # Prepare loss
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        hidden_size = 128
        bidirectional = True
        encoder = EncoderRNN(len(src.vocab),
                             max_len,
                             hidden_size,
                             bidirectional=bidirectional,
                             variable_lengths=True)
        decoder = DecoderRNN(len(tgt.vocab),
                             max_len,
                             hidden_size * 2 if bidirectional else hidden_size,
                             dropout_p=0.2,
                             use_attention=True,
                             bidirectional=bidirectional,
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        # scheduler = StepLR(optimizer.optimizer, 1)
        # optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=32,
                          checkpoint_every=50,
                          print_every=10,
                          expt_dir=opt.expt_dir)

    seq2seq = t.train(seq2seq,
                      train,
                      num_epochs=6,
                      dev_data=dev,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
                      resume=opt.resume)
    predictor = Predictor(seq2seq, input_vocab, output_vocab)
if not opt.resume:
    # Initialize model
    hidden_size = params['hidden_size']
    bidirectional = True
    encoder = EncoderRNN(len(src.vocab),
                         max_len,
                         hidden_size,
                         bidirectional=bidirectional,
                         variable_lengths=True,
                         n_layers=params['n_layers'],
                         rnn_cell=params['rnn_cell'])
    decoder = DecoderRNN(len(tgt.vocab),
                         max_len,
                         hidden_size * 2 if bidirectional else hidden_size,
                         dropout_p=0.2,
                         use_attention=True,
                         bidirectional=bidirectional,
                         rnn_cell=params['rnn_cell'],
                         n_layers=params['n_layers'],
                         eos_id=tgt.eos_id,
                         sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    if torch.cuda.is_available():
        seq2seq.cuda()

    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

    # Optimizer and learning rate scheduler can be customized by
    # explicitly constructing the objects and pass to the trainer.
    #
    optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
Esempio n. 30
0
    def test_k_greater_than_1(self):
        """ Implement beam search manually and compare results from topk decoder. """
        max_len = 50
        beam_size = 3
        batch_size = 1
        hidden_size = 8
        sos = 0
        eos = 1

        for _ in range(10):
            decoder = DecoderRNN(self.vocab_size, max_len, hidden_size, sos,
                                 eos)
            for param in decoder.parameters():
                param.data.uniform_(-1, 1)
            topk_decoder = TopKDecoder(decoder, beam_size)

            encoder_hidden = torch.autograd.Variable(
                torch.randn(1, batch_size, hidden_size))
            _, hidden_topk, other_topk = topk_decoder(
                None, encoder_hidden=encoder_hidden)

            # Queue state:
            #   1. time step
            #   2. symbol
            #   3. hidden state
            #   4. accumulated log likelihood
            #   5. beam number
            batch_queue = [[(-1, sos, encoder_hidden[:, b, :].unsqueeze(1), 0,
                             None)] for b in range(batch_size)]
            time_batch_queue = [batch_queue]
            batch_finished_seqs = [list() for _ in range(batch_size)]
            for t in range(max_len):
                new_batch_queue = []
                for b in range(batch_size):
                    new_queue = []
                    for k in range(min(len(time_batch_queue[t][b]),
                                       beam_size)):
                        _, inputs, hidden, seq_score, _ = time_batch_queue[t][
                            b][k]
                        if inputs == eos:
                            batch_finished_seqs[b].append(
                                time_batch_queue[t][b][k])
                            continue
                        inputs = torch.autograd.Variable(
                            torch.LongTensor([[inputs]]))
                        context, hidden, attn = decoder.forward_step(
                            inputs, hidden, None)
                        decoder_outputs, symbols = decoder.decoder(
                            context, attn, None, None)
                        decoder_outputs = decoder_outputs.log()
                        topk_score, topk = decoder_outputs[0].data.topk(
                            beam_size)
                        for score, sym in zip(topk_score.tolist()[0],
                                              topk.tolist()[0]):
                            new_queue.append(
                                (t, sym, hidden, score + seq_score, k))
                    new_queue = sorted(new_queue,
                                       key=lambda x: x[3],
                                       reverse=True)[:beam_size]
                    new_batch_queue.append(new_queue)
                time_batch_queue.append(new_batch_queue)

            # finished beams
            finalist = [l[:beam_size] for l in batch_finished_seqs]
            # unfinished beams
            for b in range(batch_size):
                if len(finalist[b]) < beam_size:
                    last_step = sorted(time_batch_queue[-1][b],
                                       key=lambda x: x[3],
                                       reverse=True)
                    finalist[b] += last_step[:beam_size - len(finalist[b])]

            # back track
            topk = []
            for b in range(batch_size):
                batch_topk = []
                for k in range(beam_size):
                    seq = [finalist[b][k]]
                    prev_k = seq[-1][4]
                    prev_t = seq[-1][0]
                    while prev_k is not None:
                        seq.append(time_batch_queue[prev_t][b][prev_k])
                        prev_k = seq[-1][4]
                        prev_t = seq[-1][0]
                    batch_topk.append([s for s in reversed(seq)])
                topk.append(batch_topk)

            for b in range(batch_size):
                topk[b] = sorted(topk[b], key=lambda s: s[-1][3], reverse=True)

            topk_scores = other_topk['score']
            topk_lengths = other_topk['topk_length']
            topk_pred_symbols = other_topk['topk_sequence']
            for b in range(batch_size):
                precision_error = False
                for k in range(beam_size - 1):
                    if np.isclose(topk_scores[b][k], topk_scores[b][k + 1]):
                        precision_error = True
                        break
                if precision_error:
                    break
                for k in range(beam_size):
                    self.assertEqual(topk_lengths[b][k], len(topk[b][k]) - 1)
                    self.assertTrue(
                        np.isclose(topk_scores[b][k], topk[b][k][-1][3]))
                    total_steps = topk_lengths[b][k]
                    for t in range(total_steps):
                        self.assertEqual(topk_pred_symbols[t][b, k].data[0],
                                         topk[b][k][t +
                                                    1][1])  # topk includes SOS