def main(argv): torch.manual_seed(FLAGS.seed) np.random.seed(FLAGS.seed) hlog.flags() if not os.path.exists(FLAGS.model_dir): os.mkdir(FLAGS.model_dir) dataset = get_dataset() model = StagedModel(dataset.vocab, copy=True, self_attention=False).to(_flags.device()) #model = RetrievalModel( # dataset.vocab #) model.prepare(dataset) def callback(i_epoch): model.eval() evaluate(dataset, model) if (i_epoch + 1) % FLAGS.n_checkpoint == 0: torch.save( model.state_dict(), os.path.join(FLAGS.model_dir, "model.%05d.chk" % i_epoch)) train(dataset, model, dataset.sample_comp_train, callback, staged=True)
def rnn_main(dataset): model = LanguageModel(dataset.vocab).to(_flags.device()) def sample(): return dataset.sample_train(aug_ratio=FLAGS.aug_ratio) def score_utts(utts): fake = [((), utt) for utt in utts] batch = make_batch(fake, model.vocab, staged=False) mean = model(None, batch.out_data, None, None).item() tot = mean * sum(len(utt) - 1 for utt in utts) return tot def callback(i_epoch): model.eval() final = i_epoch == FLAGS.n_epochs - 1 with hlog.task("eval_val", timer=False): val_acc = evaluate(score_utts, dataset.get_val(), dataset) if FLAGS.TEST and (final or FLAGS.test_curve): with hlog.task("eval_test", timer=False): evaluate(score_utts, dataset.get_test(), dataset) if (i_epoch + 1) % FLAGS.n_checkpoint == 0: torch.save( model.state_dict(), os.path.join(FLAGS.model_dir, "model.%05d.chk" % i_epoch)) return val_acc train(dataset, model, sample, callback, staged=False)
def make_batch(samples, vocab, staged): device = _flags.device() seqs = zip(*samples) if staged: ref, tgt, *extra = seqs (ref_inp, ref_out) = zip(*ref) (inp, out) = zip(*tgt) ref_inp_data, ref_out_data, inp_data, out_data = ( batch_seqs(seq).to(device) for seq in (ref_inp, ref_out, inp, out)) return Datum((ref_inp, ref_out), (inp, out), (ref_inp_data, ref_out_data), (inp_data, out_data), None, None, extra) else: inp, out, *extra = seqs inp_data = batch_seqs(inp).to(device) out_data = batch_seqs(out).to(device) direct_out = [] copy_out = [] for i, o in zip(inp, out): cout = [tok if tok in i[1:-1] else vocab.pad() for tok in o[1:-1]] copy_out.append([o[0]] + cout + [o[-1]]) dout = [vocab.copy() if tok in i[1:-1] else tok for tok in o[1:-1]] direct_out.append([o[0]] + dout + [o[-1]]) direct_out_data = batch_seqs(direct_out).to(device) copy_out_data = batch_seqs(copy_out).to(device) #direct_out_data = None return Datum(inp, out, inp_data, out_data, direct_out_data, copy_out_data, extra)
def main(argv): torch.manual_seed(FLAGS.seed) np.random.seed(FLAGS.seed) hlog.flags() if not os.path.exists(FLAGS.model_dir): os.mkdir(FLAGS.model_dir) dataset = get_dataset() model = ContextModel(dataset.vocab).to(_flags.device()) def callback(i_epoch): pass train(dataset, model, dataset.sample_ctx_train, callback, staged=False)
def main(argv): torch.manual_seed(FLAGS.seed) np.random.seed(FLAGS.seed) hlog.flags() if FLAGS.augment is not None: with open(FLAGS.augment) as fh: aug_data = json.load(fh) else: aug_data = [] dataset = get_dataset(aug_data=aug_data, invert=FLAGS.invert) model = GeneratorModel(dataset.vocab, copy=True, self_attention=False).to(_flags.device()) fine_tune = [True] def sample(): if fine_tune[0]: return dataset.sample_train(aug_ratio=FLAGS.aug_ratio) else: return dataset.sample_train(aug_ratio=FLAGS.aug_ratio) def callback(i_epoch): if not fine_tune[0] and i_epoch >= 20: hlog.log("FINE_TUNE") fine_tune[0] = True model.eval() final = i_epoch == FLAGS.n_epochs - 1 with hlog.task("eval_train", timer=False): train_data = [dataset.sample_train() for _ in range(1000)] evaluate(model, train_data, dataset) with hlog.task("eval_val", timer=False): val_data = dataset.get_val() val_acc = evaluate(model, val_data, dataset, vis=final, beam=final) if FLAGS.TEST and (final or FLAGS.test_curve): with hlog.task("eval_test", timer=False): test_data = dataset.get_test() evaluate(model, test_data, dataset, beam=final) if (i_epoch + 1) % FLAGS.n_checkpoint == 0: torch.save( model.state_dict(), os.path.join(FLAGS.model_dir, "model.%05d.chk" % i_epoch)) return val_acc train(dataset, model, sample, callback, staged=False)
def forward(self, inp, out, _dout, _cout, idx): enc, state = self.encoder(inp) gather = np.zeros((1, enc.shape[1], enc.shape[2])) gather[...] = np.asarray(idx)[np.newaxis, :, np.newaxis] rep = enc.gather(0, torch.LongTensor(gather).to(_flags.device())) rep = self.proj(rep) rep = (rep, torch.zeros_like(rep)) out_prev = out[:-1, :] out_next = out[1:, :] n_batch, n_seq = out_next.shape pred, *_ = self.decoder(rep, out_prev.shape[0], out_prev) pred = pred.view(n_batch * n_seq, -1) out_next = out_next.contiguous().view(-1) loss = self.loss(pred, out_next) return loss