Ejemplo n.º 1
0
def __test_seq2seq_model__():
    """
        batch_size = 4
        p = '/Users/tg/work/me/rtg/saral/runs/1S-rnn-basic'
        exp = Experiment(p)
        steps = 3000
        check_pt = 100
        trainer = SteppedRNNNMTTrainer(exp=exp, lr=0.01, warmup_steps=100)
        trainer.train(steps=steps, check_point=check_pt, batch_size=batch_size)
    """
    from rtg.dummy import DummyExperiment
    from rtg.module.decoder import Decoder

    vocab_size = 50
    batch_size = 30
    exp = DummyExperiment("tmp.work",
                          config={'model_type': 'seq'
                                  '2seq'},
                          read_only=True,
                          vocab_size=vocab_size)
    emb_size = 100
    model_dim = 100
    steps = 3000
    check_pt = 100

    assert 2 == Batch.bos_val
    src = tensor([[4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
                  [13, 12, 11, 10, 9, 8, 7, 6, 5, 4]])
    src_lens = tensor([src.size(1)] * src.size(0))

    for reverse in (False, ):
        # train two models;
        #  first, just copy the numbers, i.e. y = x
        #  second, reverse the numbers y=(V + reserved - x)
        log.info(f"====== REVERSE={reverse}; VOCAB={vocab_size}======")
        model, args = RNNMT.make_model('DummyA',
                                       'DummyB',
                                       vocab_size,
                                       vocab_size,
                                       attention='dot',
                                       emb_size=emb_size,
                                       hid_size=model_dim,
                                       n_layers=1)
        trainer = SteppedRNNMTTrainer(exp=exp,
                                      model=model,
                                      lr=0.01,
                                      warmup_steps=100)
        decr = Decoder.new(exp, model)

        def check_pt_callback(**args):
            res = decr.greedy_decode(src, src_lens, max_len=17)
            for score, seq in res:
                log.info(f'{score:.4f} :: {seq}')

        trainer.train(steps=steps,
                      check_point=check_pt,
                      batch_size=batch_size,
                      check_pt_callback=check_pt_callback)
Ejemplo n.º 2
0
def __test_model__():
    from rtg.dummy import DummyExperiment
    vocab_size = 30
    args = {
        'src_vocab': vocab_size,
        'tgt_vocab': vocab_size,
        'enc_layers': 0,
        'dec_layers': 4,
        'hid_size': 64,
        'ff_size': 64,
        'n_heads': 4,
        'activation': 'gelu'
    }
    if False:
        for n, p in model.named_parameters():
            print(n, p.shape)

    from rtg.module.decoder import Decoder

    config = {
        'model_type': 'tfmnmt',
        'trainer': {
            'init_args': {
                'chunk_size': 2
            }
        }
    }
    exp = DummyExperiment("work.tmp.t2t",
                          config=config,
                          read_only=True,
                          vocab_size=vocab_size)
    exp.model_args = args
    trainer = TransformerTrainer(exp=exp, warmup_steps=200)
    decr = Decoder.new(exp, trainer.model)

    assert 2 == Batch.bos_val
    src = tensor(
        [[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, Batch.eos_val, Batch.pad_value],
         [
             13, 12, 11, 10, 9, 8, 7, 6, Batch.eos_val, Batch.pad_value,
             Batch.pad_value, Batch.pad_value
         ]])
    src_lens = tensor([src.size(1)] * src.size(0))

    def check_pt_callback(**args):
        res = decr.greedy_decode(src, src_lens, max_len=12)
        for score, seq in res:
            log.info(f'{score:.4f} :: {seq}')

    batch_size = 50
    steps = 1000
    check_point = 50
    trainer.train(steps=steps,
                  check_point=check_point,
                  batch_size=batch_size,
                  check_pt_callback=check_pt_callback)
Ejemplo n.º 3
0
def __test_model__():
    from rtg.data.dummy import DummyExperiment
    from rtg import Batch, my_tensor as tensor

    vocab_size = 24
    args = {
        'src_vocab': vocab_size,
        'tgt_vocab': vocab_size,
        'enc_layers': 0,
        'dec_layers': 4,
        'hid_size': 32,
        'eff_dims': [],
        'dff_dims': [64, 128, 128, 64],
        'enc_depth_probs': [],
        'dec_depth_probs': [1.0, 0.75, 0.5, 0.75],
        'n_heads': 4,
        'activation': 'relu'
    }

    from rtg.module.decoder import Decoder

    config = {
        'model_type': 'wvskptfmnmt',
        'trainer': {'init_args': {'chunk_size': 2, 'grad_accum': 5}},
        'optim': {
            'args': {
                # "cross_entropy", "smooth_kld", "binary_cross_entropy", "triplet_loss"
                'criterion': "smooth_kld",
                'lr': 0.01,
                'inv_sqrt': True
            }
        }
    }

    exp = DummyExperiment("work.tmp.wvskptfmnmt", config=config, read_only=True,
                          vocab_size=vocab_size)
    exp.model_args = args
    trainer = WVSKPTransformerTrainer(exp=exp, warmup_steps=200, **config['optim']['args'])
    decr = Decoder.new(exp, trainer.model)

    assert 2 == Batch.bos_val
    src = tensor([[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, Batch.eos_val, Batch.pad_value],
                  [13, 12, 11, 10, 9, 8, 7, 6, Batch.eos_val, Batch.pad_value, Batch.pad_value,
                   Batch.pad_value]])
    src_lens = tensor([src.size(1)] * src.size(0))

    def check_pt_callback(**args):
        res = decr.greedy_decode(src, src_lens, max_len=12)
        for score, seq in res:
            log.info(f'{score:.4f} :: {seq}')

    batch_size = 50
    steps = 200
    check_point = 10
    trainer.train(steps=steps, check_point=check_point, batch_size=batch_size,
                  check_pt_callback=check_pt_callback)
Ejemplo n.º 4
0
    def __init__(self,
                 batch: List[Example],
                 sort_dec=False,
                 batch_first=True,
                 add_eos_x=True,
                 add_eos_y=True,
                 add_bos_x=False,
                 add_bos_y=False):
        """
        :param batch: List fo Examples
        :param sort_dec: True if the examples be sorted as descending order of their source sequence lengths
        :Param Batch_First: first dimension is batch
        """
        self.eos_x = add_eos_x
        self.eos_y = add_eos_y
        self.bos_x = add_bos_x
        self.bos_y = add_bos_y
        self.batch_first = batch_first

        self.bos_eos_check(batch, 'x', add_bos_x, add_eos_x)
        if sort_dec:
            batch = sorted(batch, key=lambda _: len(_.x), reverse=True)
        self._len = len(batch)
        self.x_len = tensor([len(e.x) for e in batch])
        self.x_toks = self.x_len.sum().float().item()
        self.max_x_len = self.x_len.max()

        # create x_seqs on CPU RAM and move to GPU at once
        self.x_seqs = torch.full(size=(self._len, self.max_x_len),
                                 fill_value=self.pad_value,
                                 dtype=torch.long)
        for i, ex in enumerate(batch):
            self.x_seqs[i, :len(ex.x)] = torch.tensor(ex.x, dtype=torch.long)
        self.x_seqs = self.x_seqs.to(device)
        if not batch_first:  # transpose
            self.x_seqs = self.x_seqs.t()

        first_y = batch[0].y
        self.has_y = first_y is not None
        if self.has_y:
            self.bos_eos_check(batch, 'y', add_bos_y, add_eos_y)
            self.y_len = tensor([len(e.y) for e in batch])
            self.y_toks = self.y_len.sum().float().item()
            self.max_y_len = self.y_len.max().item()
            y_seqs = torch.full(size=(self._len, self.max_y_len),
                                fill_value=self.pad_value,
                                dtype=torch.long)
            for i, ex in enumerate(batch):
                y_seqs[i, :len(ex.y)] = torch.tensor(ex.y, dtype=torch.long)
            self.y_seqs = y_seqs.to(device)
            if not batch_first:  # transpose
                self.y_seqs = self.y_seqs.t()
Ejemplo n.º 5
0
Archivo: rnnmt.py Proyecto: isi-nlp/rtg
    def forward(self, batch: Batch):
        assert batch.batch_first
        batch_size = len(batch)
        enc_outs, enc_hids = self.encode(batch.x_seqs, batch.x_len, hids=None,
                                         max_y_len=batch.max_y_len)

        dec_inps = tensor([[batch.bos_val]] * batch_size, dtype=torch.long)
        dec_hids = enc_hids
        outp_probs = torch.zeros((batch.max_y_len - 1, batch_size), device=device)

        for t in range(1, batch.max_y_len):
            word_probs, dec_hids, _ = self.dec(enc_outs, dec_inps, dec_hids)

            # expected output;; log probability for these indices should be high
            expct_word_idx = batch.y_seqs[:, t].view(batch_size, 1)
            expct_word_log_probs = word_probs.gather(dim=1, index=expct_word_idx)
            outp_probs[t - 1] = expct_word_log_probs.squeeze()

            # Randomly switch between gold and the prediction next word
            if random.choice((False, True)):
                dec_inps = expct_word_idx  # Next input is current target
            else:
                pred_word_idx = word_probs.argmax(dim=1)
                dec_inps = pred_word_idx.view(batch_size, 1)
        return outp_probs.t()
Ejemplo n.º 6
0
    def batch_forward(self, batch):
        assert batch.batch_first
        batch_size = len(batch)

        assert not batch.has_y
        seqs = batch.x_seqs
        max_seq_len = batch.max_x_len

        prev_out = tensor([[BOS_TOK_IDX]] * batch_size, dtype=torch.long)
        last_hidden = None
        outp_probs = torch.zeros((max_seq_len - 1, batch_size), device=device)

        for t in range(1, max_seq_len):
            word_probs, last_hidden, _ = self(enc_outs=None, prev_out=prev_out,
                                              last_hidden=last_hidden)

            # expected output;; log probability for these indices should be high
            expct_word_idx = seqs[:, t].view(batch_size, 1)
            expct_word_log_probs = word_probs.gather(dim=1, index=expct_word_idx)
            outp_probs[t - 1] = expct_word_log_probs.squeeze()

            # Randomly switch between gold and the prediction next word
            if random.choice((False, True)):
                prev_out = expct_word_idx  # Next input is current target
            else:
                pred_word_idx = word_probs.argmax(dim=1)
                prev_out = pred_word_idx.view(batch_size, 1)
        return outp_probs.t()
Ejemplo n.º 7
0
def __test_model__():
    from rtg.dummy import DummyExperiment
    from rtg import Batch, my_tensor as tensor

    vocab_size = 24
    args = {
        'src_vocab': vocab_size,
        'tgt_vocab': vocab_size,
        'enc_layers': 4,
        'dec_layers': 3,
        'hid_size': 128,
        'ff_size': 256,
        'dec_rnn_type': 'GRU',
        'enc_heads': 4
    }

    from rtg.module.decoder import Decoder

    exp = DummyExperiment("work.tmp.hybridmt",
                          config={'model_type': 'hybridmt'},
                          read_only=True,
                          vocab_size=vocab_size)
    exp.model_args = args
    trainer = HybridMTTrainer(exp=exp, warmup_steps=200)
    decr = Decoder.new(exp, trainer.model)

    assert 2 == Batch.bos_val
    src = tensor([[4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
                  [13, 12, 11, 10, 9, 8, 7, 6, 5, 4]])
    src_lens = tensor([src.size(1)] * src.size(0))

    def check_pt_callback(**args):
        res = decr.greedy_decode(src, src_lens, max_len=12)
        for score, seq in res:
            log.info(f'{score:.4f} :: {seq}')

    batch_size = 50
    steps = 2000
    check_point = 50
    trainer.train(steps=steps,
                  check_point=check_point,
                  batch_size=batch_size,
                  check_pt_callback=check_pt_callback)
Ejemplo n.º 8
0
Archivo: rnnmt.py Proyecto: isi-nlp/rtg
    def forward(self, enc_outs, enc_hids, max_len, bos_idx):
        batch_size = len(enc_outs)
        assert batch_size == enc_hids[0].shape[1] == enc_hids[1].shape[1]

        dec_inps = tensor([[bos_idx]] * batch_size, dtype=torch.long)
        dec_hids = enc_hids
        result = torch.zeros((batch_size, max_len, self.dec.hid_size), device=device)
        for t in range(max_len):
            dec_outs, dec_hids, _ = self.dec(enc_outs, dec_inps, dec_hids, gen_probs=False)
            result[:, t, :] = dec_outs

        # TODO: check how hidden state flows
        enc_outs, enc_hids = self.enc(result, [max_len] * batch_size, pre_embedded=True)
        return enc_outs, enc_hids
Ejemplo n.º 9
0
    def evaluate(self, data) -> float:
        tot_loss = 0.0
        for i, batch in tqdm(enumerate(data)):
            # Step clear gradients
            self.model.zero_grad()
            # Step Run forward pass.
            pred_len = self.model(batch)
            # Step. Compute the loss, gradients, and update the parameters by

            #  calling optimizer.step()
            loss = self.loss_func(pred_len,
                                  tensor(batch.y_len.data, dtype=torch.float))
            tot_loss += loss
        return tot_loss
Ejemplo n.º 10
0
    def decode_sentence(self,
                        line: str,
                        max_len=20,
                        prepared=False,
                        **args) -> List[StrHypothesis]:

        line = line.strip()
        if prepared:
            in_seq = [int(t) for t in line.split()]
            if in_seq[0] != self.bos_val:
                in_seq.insert(0, self.bos_val)
            if in_seq[-1] != self.eos_val:
                in_seq.append(self.eos_val)
        else:
            in_seq = self.inp_vocab.encode_as_ids(line,
                                                  add_eos=True,
                                                  add_bos=False)
        in_seqs = tensor(in_seq, dtype=torch.long).view(1, -1)
        in_lens = tensor([len(in_seq)], dtype=torch.long)
        if self.debug:
            greedy_score, greedy_out = self.greedy_decode(
                in_seqs, in_lens, max_len, **args)[0]
            greedy_out = self.out_vocab.decode_ids(greedy_out, trunc_eos=True)
            log.debug(f'Greedy : score: {greedy_score:.4f} :: {greedy_out}')

        beams: List[List[Hypothesis]] = self.beam_decode(
            in_seqs, in_lens, max_len, **args)
        beams = beams[
            0]  # first sentence, the only one we passed to it as input
        result = []
        for i, (score, beam_toks) in enumerate(beams):
            out = self.out_vocab.decode_ids(beam_toks, trunc_eos=True)
            if self.debug:
                log.debug(f"Beam {i}: score:{score:.4f} :: {out}")
            result.append((score, out))
        return result
Ejemplo n.º 11
0
    def train(self, num_epochs: int, batch_size: int, **args):
        log.info(
            f'Going to train for {num_epochs} epochs; batch_size={batch_size}')

        train_data = BatchIterable(self.exp.train_file,
                                   batch_size=batch_size,
                                   in_mem=True,
                                   field=self.exp.tgt_vocab)
        val_data = BatchIterable(self.exp.valid_file,
                                 batch_size=batch_size,
                                 in_mem=True,
                                 field=self.exp.tgt_vocab)
        keep_models = args.get('keep_models', 4)
        if num_epochs <= self.start_epoch:
            raise Exception(
                f'The model was already trained to {self.start_epoch} epochs. '
                f'Please increase epoch or clear the existing models')
        for ep in range(self.start_epoch, num_epochs):
            for i, batch in tqdm(enumerate(train_data)):
                # Step clear gradients
                self.model.zero_grad()
                # Step Run forward pass.

                pred_len = self.model(batch)
                # Step. Compute the loss, gradients, and update the parameters by

                #  calling optimizer.step()
                loss = self.loss_func(
                    pred_len, tensor(batch.y_len.data, dtype=torch.float))
                loss.backward()
                self.optimizer.step()

            log.info(f'Epoch {ep+1} complete.. validating...')
            score = self.evaluate(val_data)
            self.exp.store_model(epoch=ep,
                                 model=self.model,
                                 score=score,
                                 keep=keep_models)