def __test_seq2seq_model__(): """ batch_size = 4 p = '/Users/tg/work/me/rtg/saral/runs/1S-rnn-basic' exp = Experiment(p) steps = 3000 check_pt = 100 trainer = SteppedRNNNMTTrainer(exp=exp, lr=0.01, warmup_steps=100) trainer.train(steps=steps, check_point=check_pt, batch_size=batch_size) """ from rtg.dummy import DummyExperiment from rtg.module.decoder import Decoder vocab_size = 50 batch_size = 30 exp = DummyExperiment("tmp.work", config={'model_type': 'seq' '2seq'}, read_only=True, vocab_size=vocab_size) emb_size = 100 model_dim = 100 steps = 3000 check_pt = 100 assert 2 == Batch.bos_val src = tensor([[4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [13, 12, 11, 10, 9, 8, 7, 6, 5, 4]]) src_lens = tensor([src.size(1)] * src.size(0)) for reverse in (False, ): # train two models; # first, just copy the numbers, i.e. y = x # second, reverse the numbers y=(V + reserved - x) log.info(f"====== REVERSE={reverse}; VOCAB={vocab_size}======") model, args = RNNMT.make_model('DummyA', 'DummyB', vocab_size, vocab_size, attention='dot', emb_size=emb_size, hid_size=model_dim, n_layers=1) trainer = SteppedRNNMTTrainer(exp=exp, model=model, lr=0.01, warmup_steps=100) decr = Decoder.new(exp, model) def check_pt_callback(**args): res = decr.greedy_decode(src, src_lens, max_len=17) for score, seq in res: log.info(f'{score:.4f} :: {seq}') trainer.train(steps=steps, check_point=check_pt, batch_size=batch_size, check_pt_callback=check_pt_callback)
def __test_model__(): from rtg.dummy import DummyExperiment vocab_size = 30 args = { 'src_vocab': vocab_size, 'tgt_vocab': vocab_size, 'enc_layers': 0, 'dec_layers': 4, 'hid_size': 64, 'ff_size': 64, 'n_heads': 4, 'activation': 'gelu' } if False: for n, p in model.named_parameters(): print(n, p.shape) from rtg.module.decoder import Decoder config = { 'model_type': 'tfmnmt', 'trainer': { 'init_args': { 'chunk_size': 2 } } } exp = DummyExperiment("work.tmp.t2t", config=config, read_only=True, vocab_size=vocab_size) exp.model_args = args trainer = TransformerTrainer(exp=exp, warmup_steps=200) decr = Decoder.new(exp, trainer.model) assert 2 == Batch.bos_val src = tensor( [[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, Batch.eos_val, Batch.pad_value], [ 13, 12, 11, 10, 9, 8, 7, 6, Batch.eos_val, Batch.pad_value, Batch.pad_value, Batch.pad_value ]]) src_lens = tensor([src.size(1)] * src.size(0)) def check_pt_callback(**args): res = decr.greedy_decode(src, src_lens, max_len=12) for score, seq in res: log.info(f'{score:.4f} :: {seq}') batch_size = 50 steps = 1000 check_point = 50 trainer.train(steps=steps, check_point=check_point, batch_size=batch_size, check_pt_callback=check_pt_callback)
def __test_model__(): from rtg.data.dummy import DummyExperiment from rtg import Batch, my_tensor as tensor vocab_size = 24 args = { 'src_vocab': vocab_size, 'tgt_vocab': vocab_size, 'enc_layers': 0, 'dec_layers': 4, 'hid_size': 32, 'eff_dims': [], 'dff_dims': [64, 128, 128, 64], 'enc_depth_probs': [], 'dec_depth_probs': [1.0, 0.75, 0.5, 0.75], 'n_heads': 4, 'activation': 'relu' } from rtg.module.decoder import Decoder config = { 'model_type': 'wvskptfmnmt', 'trainer': {'init_args': {'chunk_size': 2, 'grad_accum': 5}}, 'optim': { 'args': { # "cross_entropy", "smooth_kld", "binary_cross_entropy", "triplet_loss" 'criterion': "smooth_kld", 'lr': 0.01, 'inv_sqrt': True } } } exp = DummyExperiment("work.tmp.wvskptfmnmt", config=config, read_only=True, vocab_size=vocab_size) exp.model_args = args trainer = WVSKPTransformerTrainer(exp=exp, warmup_steps=200, **config['optim']['args']) decr = Decoder.new(exp, trainer.model) assert 2 == Batch.bos_val src = tensor([[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, Batch.eos_val, Batch.pad_value], [13, 12, 11, 10, 9, 8, 7, 6, Batch.eos_val, Batch.pad_value, Batch.pad_value, Batch.pad_value]]) src_lens = tensor([src.size(1)] * src.size(0)) def check_pt_callback(**args): res = decr.greedy_decode(src, src_lens, max_len=12) for score, seq in res: log.info(f'{score:.4f} :: {seq}') batch_size = 50 steps = 200 check_point = 10 trainer.train(steps=steps, check_point=check_point, batch_size=batch_size, check_pt_callback=check_pt_callback)
def __init__(self, batch: List[Example], sort_dec=False, batch_first=True, add_eos_x=True, add_eos_y=True, add_bos_x=False, add_bos_y=False): """ :param batch: List fo Examples :param sort_dec: True if the examples be sorted as descending order of their source sequence lengths :Param Batch_First: first dimension is batch """ self.eos_x = add_eos_x self.eos_y = add_eos_y self.bos_x = add_bos_x self.bos_y = add_bos_y self.batch_first = batch_first self.bos_eos_check(batch, 'x', add_bos_x, add_eos_x) if sort_dec: batch = sorted(batch, key=lambda _: len(_.x), reverse=True) self._len = len(batch) self.x_len = tensor([len(e.x) for e in batch]) self.x_toks = self.x_len.sum().float().item() self.max_x_len = self.x_len.max() # create x_seqs on CPU RAM and move to GPU at once self.x_seqs = torch.full(size=(self._len, self.max_x_len), fill_value=self.pad_value, dtype=torch.long) for i, ex in enumerate(batch): self.x_seqs[i, :len(ex.x)] = torch.tensor(ex.x, dtype=torch.long) self.x_seqs = self.x_seqs.to(device) if not batch_first: # transpose self.x_seqs = self.x_seqs.t() first_y = batch[0].y self.has_y = first_y is not None if self.has_y: self.bos_eos_check(batch, 'y', add_bos_y, add_eos_y) self.y_len = tensor([len(e.y) for e in batch]) self.y_toks = self.y_len.sum().float().item() self.max_y_len = self.y_len.max().item() y_seqs = torch.full(size=(self._len, self.max_y_len), fill_value=self.pad_value, dtype=torch.long) for i, ex in enumerate(batch): y_seqs[i, :len(ex.y)] = torch.tensor(ex.y, dtype=torch.long) self.y_seqs = y_seqs.to(device) if not batch_first: # transpose self.y_seqs = self.y_seqs.t()
def forward(self, batch: Batch): assert batch.batch_first batch_size = len(batch) enc_outs, enc_hids = self.encode(batch.x_seqs, batch.x_len, hids=None, max_y_len=batch.max_y_len) dec_inps = tensor([[batch.bos_val]] * batch_size, dtype=torch.long) dec_hids = enc_hids outp_probs = torch.zeros((batch.max_y_len - 1, batch_size), device=device) for t in range(1, batch.max_y_len): word_probs, dec_hids, _ = self.dec(enc_outs, dec_inps, dec_hids) # expected output;; log probability for these indices should be high expct_word_idx = batch.y_seqs[:, t].view(batch_size, 1) expct_word_log_probs = word_probs.gather(dim=1, index=expct_word_idx) outp_probs[t - 1] = expct_word_log_probs.squeeze() # Randomly switch between gold and the prediction next word if random.choice((False, True)): dec_inps = expct_word_idx # Next input is current target else: pred_word_idx = word_probs.argmax(dim=1) dec_inps = pred_word_idx.view(batch_size, 1) return outp_probs.t()
def batch_forward(self, batch): assert batch.batch_first batch_size = len(batch) assert not batch.has_y seqs = batch.x_seqs max_seq_len = batch.max_x_len prev_out = tensor([[BOS_TOK_IDX]] * batch_size, dtype=torch.long) last_hidden = None outp_probs = torch.zeros((max_seq_len - 1, batch_size), device=device) for t in range(1, max_seq_len): word_probs, last_hidden, _ = self(enc_outs=None, prev_out=prev_out, last_hidden=last_hidden) # expected output;; log probability for these indices should be high expct_word_idx = seqs[:, t].view(batch_size, 1) expct_word_log_probs = word_probs.gather(dim=1, index=expct_word_idx) outp_probs[t - 1] = expct_word_log_probs.squeeze() # Randomly switch between gold and the prediction next word if random.choice((False, True)): prev_out = expct_word_idx # Next input is current target else: pred_word_idx = word_probs.argmax(dim=1) prev_out = pred_word_idx.view(batch_size, 1) return outp_probs.t()
def __test_model__(): from rtg.dummy import DummyExperiment from rtg import Batch, my_tensor as tensor vocab_size = 24 args = { 'src_vocab': vocab_size, 'tgt_vocab': vocab_size, 'enc_layers': 4, 'dec_layers': 3, 'hid_size': 128, 'ff_size': 256, 'dec_rnn_type': 'GRU', 'enc_heads': 4 } from rtg.module.decoder import Decoder exp = DummyExperiment("work.tmp.hybridmt", config={'model_type': 'hybridmt'}, read_only=True, vocab_size=vocab_size) exp.model_args = args trainer = HybridMTTrainer(exp=exp, warmup_steps=200) decr = Decoder.new(exp, trainer.model) assert 2 == Batch.bos_val src = tensor([[4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [13, 12, 11, 10, 9, 8, 7, 6, 5, 4]]) src_lens = tensor([src.size(1)] * src.size(0)) def check_pt_callback(**args): res = decr.greedy_decode(src, src_lens, max_len=12) for score, seq in res: log.info(f'{score:.4f} :: {seq}') batch_size = 50 steps = 2000 check_point = 50 trainer.train(steps=steps, check_point=check_point, batch_size=batch_size, check_pt_callback=check_pt_callback)
def forward(self, enc_outs, enc_hids, max_len, bos_idx): batch_size = len(enc_outs) assert batch_size == enc_hids[0].shape[1] == enc_hids[1].shape[1] dec_inps = tensor([[bos_idx]] * batch_size, dtype=torch.long) dec_hids = enc_hids result = torch.zeros((batch_size, max_len, self.dec.hid_size), device=device) for t in range(max_len): dec_outs, dec_hids, _ = self.dec(enc_outs, dec_inps, dec_hids, gen_probs=False) result[:, t, :] = dec_outs # TODO: check how hidden state flows enc_outs, enc_hids = self.enc(result, [max_len] * batch_size, pre_embedded=True) return enc_outs, enc_hids
def evaluate(self, data) -> float: tot_loss = 0.0 for i, batch in tqdm(enumerate(data)): # Step clear gradients self.model.zero_grad() # Step Run forward pass. pred_len = self.model(batch) # Step. Compute the loss, gradients, and update the parameters by # calling optimizer.step() loss = self.loss_func(pred_len, tensor(batch.y_len.data, dtype=torch.float)) tot_loss += loss return tot_loss
def decode_sentence(self, line: str, max_len=20, prepared=False, **args) -> List[StrHypothesis]: line = line.strip() if prepared: in_seq = [int(t) for t in line.split()] if in_seq[0] != self.bos_val: in_seq.insert(0, self.bos_val) if in_seq[-1] != self.eos_val: in_seq.append(self.eos_val) else: in_seq = self.inp_vocab.encode_as_ids(line, add_eos=True, add_bos=False) in_seqs = tensor(in_seq, dtype=torch.long).view(1, -1) in_lens = tensor([len(in_seq)], dtype=torch.long) if self.debug: greedy_score, greedy_out = self.greedy_decode( in_seqs, in_lens, max_len, **args)[0] greedy_out = self.out_vocab.decode_ids(greedy_out, trunc_eos=True) log.debug(f'Greedy : score: {greedy_score:.4f} :: {greedy_out}') beams: List[List[Hypothesis]] = self.beam_decode( in_seqs, in_lens, max_len, **args) beams = beams[ 0] # first sentence, the only one we passed to it as input result = [] for i, (score, beam_toks) in enumerate(beams): out = self.out_vocab.decode_ids(beam_toks, trunc_eos=True) if self.debug: log.debug(f"Beam {i}: score:{score:.4f} :: {out}") result.append((score, out)) return result
def train(self, num_epochs: int, batch_size: int, **args): log.info( f'Going to train for {num_epochs} epochs; batch_size={batch_size}') train_data = BatchIterable(self.exp.train_file, batch_size=batch_size, in_mem=True, field=self.exp.tgt_vocab) val_data = BatchIterable(self.exp.valid_file, batch_size=batch_size, in_mem=True, field=self.exp.tgt_vocab) keep_models = args.get('keep_models', 4) if num_epochs <= self.start_epoch: raise Exception( f'The model was already trained to {self.start_epoch} epochs. ' f'Please increase epoch or clear the existing models') for ep in range(self.start_epoch, num_epochs): for i, batch in tqdm(enumerate(train_data)): # Step clear gradients self.model.zero_grad() # Step Run forward pass. pred_len = self.model(batch) # Step. Compute the loss, gradients, and update the parameters by # calling optimizer.step() loss = self.loss_func( pred_len, tensor(batch.y_len.data, dtype=torch.float)) loss.backward() self.optimizer.step() log.info(f'Epoch {ep+1} complete.. validating...') score = self.evaluate(val_data) self.exp.store_model(epoch=ep, model=self.model, score=score, keep=keep_models)