def main(): config = setup_config() vocab_src, vocab_tgt = load_vocabularies(config) train_data, dev_data, opt_data = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) dl = DataLoader(train_data, batch_size=config["batch_size_train"], shuffle=True, num_workers=4) bucketing_dl = BucketingParallelDataLoader(dl) cycle_iterate_dl_back = None if config["back_prefix"] != None: dl_back = DataLoader(dataset=opt_data['back'], batch_size=config["batch_size_train"], shuffle=True, num_workers=2) bucketing_dl_back = BucketingParallelDataLoader(dl_back) cycle_iterate_dl_back = cycle(bucketing_dl_back) model, train_fn, validate_fn = create_model(vocab_src, vocab_tgt, config) model.to(torch.device(config["device"])) train(model, train_fn, validate_fn, bucketing_dl, dev_data, vocab_src, vocab_tgt, config, cycle_iterate_dl_back=cycle_iterate_dl_back)
def main(): config = setup_config() config["train_prefix"] = 'sample' vocab_src, vocab_tgt = load_vocabularies(config) train_data, _, _ = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) val_dl = DataLoader(train_data, batch_size=config["batch_size_eval"], shuffle=False, num_workers=4) val_dl = BucketingParallelDataLoader(val_dl) sentences_x, sentences_y = next(val_dl) model, _, validate_fn = create_model(vocab_src, vocab_tgt, config) model.to(torch.device(config["device"])) # checkpoint_path = "output/aevnmt_z_loss_en-de_run_0/checkpoints/aevnmt_z_loss_en-de_run_0" checkpoint_path = "output/aevnmt_z_loss_de-en_run_0/checkpoints/aevnmt_z_loss_de-en_run_0" state = torch.load(checkpoint_path) model.load_state_dict(state['state_dict']) sample_from_latent(model, vocab_src, vocab_tgt, config) sample_from_posterior(model, sentences_x, vocab_src, vocab_tgt, config)
def main(): # config = setup_config() # config["train_prefix"] = 'sample' # train_data, dev_data, vocab_src, vocab_tgt = load_dataset_joey(config) # dataloader = data.make_data_iter(train_data, 1, train=True) # sample = next(iter(dataloader)) # batch = Batch(sample, vocab_src.stoi[config["pad"]], use_cuda=False if config["device"] == "cpu" else True) # # model_xy, model_yx, _, _, validate_fn = create_models(vocab_src, vocab_tgt, config) # model_xy.to(torch.device(config["device"])) # model_yx.to(torch.device(config["device"])) # # checkpoint_path = "output/coaevnmt_greedy_lm_off_run_5/checkpoints/coaevnmt_greedy_lm_off_run_5" # state = torch.load(checkpoint_path) # model_xy.load_state_dict(state['state_dict_xy']) # model_yx.load_state_dict(state['state_dict_yx']) config = setup_config() config["train_prefix"] = 'sample' vocab_src, vocab_tgt = load_vocabularies(config) train_data, _, _ = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) val_dl = DataLoader(train_data, batch_size=config["batch_size_eval"], shuffle=False, num_workers=4) val_dl = BucketingParallelDataLoader(val_dl) sentences_x, sentences_y = next(val_dl) # model, _, validate_fn = create_model(vocab_src, vocab_tgt, config) # model.to(torch.device(config["device"])) # model_xy, model_yx, _, _, validate_fn = create_models(vocab_src, vocab_tgt, config) # model_xy.to(torch.device(config["device"])) # model_yx.to(torch.device(config["device"])) model_xy, model_yx, bi_train_fn, mono_train_fn, validate_fn = create_models( vocab_src, vocab_tgt, config) model_xy.to(torch.device(config["device"])) model_yx.to(torch.device(config["device"])) checkpoint_path = "output/coaevnmt_curc_diff_greedy_lr2_en-de_run_1/checkpoints/coaevnmt_curc_diff_greedy_lr2_en-de_run_1" state = torch.load(checkpoint_path) model_xy.load_state_dict(state['state_dict_xy']) model_yx.load_state_dict(state['state_dict_yx']) print("validation: {}-{}".format(config["src"], config["tgt"])) sample_from_latent(model_xy, vocab_src, vocab_tgt, config) sample_from_posterior(model_xy, sentences_x, vocab_src, vocab_tgt, config) print("") print("validation: {}-{}".format(config["tgt"], config["src"])) sample_from_latent(model_yx, vocab_tgt, vocab_src, config) sample_from_posterior(model_yx, sentences_y, vocab_tgt, vocab_src, config)
def validate(model, dev_data, vocab_src, vocab_tgt, epoch, config, direction=None): model.eval() device = torch.device( "cpu") if config["device"] == "cpu" else torch.device("cuda:0") with torch.no_grad(): model_hypotheses = [] references = [] val_dl = DataLoader(dev_data, batch_size=config["batch_size_eval"], shuffle=False, num_workers=4) val_dl = BucketingParallelDataLoader(val_dl) for sentences_x, sentences_y in val_dl: if direction == None or direction == "xy": x_in, _, x_mask, x_len = create_batch(sentences_x, vocab_src, device) x_mask = x_mask.unsqueeze(1) else: x_in, _, x_mask, x_len = create_batch(sentences_y, vocab_src, device) x_mask = x_mask.unsqueeze(1) enc_output, enc_hidden = model.encode(x_in, x_len) dec_hidden = model.init_decoder(enc_output, enc_hidden) raw_hypothesis = beam_search(model.decoder, model.emb_tgt, model.generate_tm, enc_output, dec_hidden, x_mask, vocab_tgt.size(), vocab_tgt[SOS_TOKEN], vocab_tgt[EOS_TOKEN], vocab_tgt[PAD_TOKEN], config) hypothesis = batch_to_sentences(raw_hypothesis, vocab_tgt) model_hypotheses += hypothesis.tolist() if direction == None or direction == "xy": references += sentences_y.tolist() else: references += sentences_x.tolist() save_hypotheses(model_hypotheses, epoch, config) model_hypotheses, references = clean_sentences(model_hypotheses, references, config) bleu = compute_bleu(model_hypotheses, references, epoch, config, direction) return bleu
def main(): config = setup_config() vocab_src, vocab_tgt = load_vocabularies(config) train_data, dev_data, opt_data = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) dl_xy = DataLoader(train_data, batch_size=config["batch_size_train"], shuffle=True, num_workers=2) bucketing_dl_xy = BucketingParallelDataLoader(dl_xy) dl_x = DataLoader(dataset=opt_data['mono_src'], batch_size=config["batch_size_train"], shuffle=True, num_workers=2) bucketing_dl_x = BucketingTextDataLoader(dl_x) cycle_iterate_dl_x = cycle(bucketing_dl_x) dl_y = DataLoader(dataset=opt_data['mono_tgt'], batch_size=config["batch_size_train"], shuffle=True, num_workers=2) bucketing_dl_y = BucketingTextDataLoader(dl_y) cycle_iterate_dl_y = cycle(bucketing_dl_y) model, bi_train_fn, mono_train_fn, validate_fn = create_model( vocab_src, vocab_tgt, config) print(model.emb_src is model.model_xy.emb_src) print(model.emb_tgt is model.model_xy.emb_tgt) asf model.to(torch.device(config["device"])) # model_yx.to(torch.device(config["device"])) train(model, bi_train_fn, mono_train_fn, validate_fn, bucketing_dl_xy, dev_data, cycle_iterate_dl_x, cycle_iterate_dl_y, vocab_src, vocab_tgt, config)
def validate(model, dev_data, vocab_src, vocab_tgt, epoch, config, direction=None): model.eval() device = torch.device( "cpu") if config["device"] == "cpu" else torch.device("cuda:0") with torch.no_grad(): model_hypotheses = [] references = [] val_dl = DataLoader(dev_data, batch_size=config["batch_size_eval"], shuffle=False, num_workers=2) val_dl = BucketingParallelDataLoader(val_dl) val_kl = 0 for sentences_x, sentences_y in val_dl: if direction == None or direction == "xy": x_in, _, x_mask, x_len = create_batch(sentences_x, vocab_src, device) x_mask = x_mask.unsqueeze(1) else: x_in, _, x_mask, x_len = create_batch(sentences_y, vocab_src, device) x_mask = x_mask.unsqueeze(1) qz = model.inference(x_in, x_mask, x_len) z = qz.mean pz = torch.distributions.Normal(loc=model.prior_loc, scale=model.prior_scale).expand( qz.mean.size()) kl_loss = torch.distributions.kl.kl_divergence(qz, pz) kl_loss = kl_loss.sum(dim=1) val_kl += kl_loss.sum(dim=0) enc_output, enc_hidden = model.encode(x_in, x_len, z) dec_hidden = model.init_decoder(enc_output, enc_hidden, z) raw_hypothesis = beam_search(model.decoder, model.emb_tgt, model.generate_tm, enc_output, dec_hidden, x_mask, vocab_tgt.size(), vocab_tgt[SOS_TOKEN], vocab_tgt[EOS_TOKEN], vocab_tgt[PAD_TOKEN], config, z) hypothesis = batch_to_sentences(raw_hypothesis, vocab_tgt) model_hypotheses += hypothesis.tolist() if direction == None or direction == "xy": references += sentences_y.tolist() else: references += sentences_x.tolist() val_kl /= len(dev_data) save_hypotheses(model_hypotheses, epoch, config, direction) model_hypotheses, references = clean_sentences(model_hypotheses, references, config) bleu = compute_bleu(model_hypotheses, references, epoch, config, direction, kl=val_kl) return bleu
def main(): config = setup_config() config["dev_prefix"] = "comparable" vocab_src, vocab_tgt = load_vocabularies(config) _, dev_data, _ = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) # _, dev_data, vocab_src, vocab_tgt = load_dataset_joey(config) model, _, validate_fn = create_model(vocab_src, vocab_tgt, config) model.to(torch.device(config["device"])) checkpoint_path = "{}/cond_nmt_new_de-en_run_2/checkpoints/cond_nmt_new_de-en_run_2".format( config["out_dir"]) state = torch.load(checkpoint_path) model.load_state_dict(state['state_dict']) model.eval() device = torch.device( "cpu") if config["device"] == "cpu" else torch.device("cuda:0") with torch.no_grad(): model_hypotheses = [] references = [] val_dl = DataLoader(dev_data, batch_size=config["batch_size_eval"], shuffle=False, num_workers=4) val_dl = BucketingParallelDataLoader(val_dl) for sentences_x, sentences_y in tqdm(val_dl): x_in, _, x_mask, x_len = create_batch(sentences_x, vocab_src, device) x_mask = x_mask.unsqueeze(1) if config["model_type"] == "aevnmt": qz = model.inference(x_in, x_mask) z = qz.mean enc_output, enc_hidden = model.encode(x_in, z) dec_hidden = model.init_decoder(enc_output, enc_hidden, z) raw_hypothesis = beam_search(model.decoder, model.emb_tgt, model.generate_tm, enc_output, dec_hidden, x_mask, vocab_tgt.size(), vocab_tgt[SOS_TOKEN], vocab_tgt[EOS_TOKEN], vocab_tgt[PAD_TOKEN], config) else: enc_output, enc_hidden = model.encode(x_in) dec_hidden = model.decoder.initialize(enc_output, enc_hidden) raw_hypothesis = beam_search(model.decoder, model.emb_tgt, model.generate, enc_output, dec_hidden, x_mask, vocab_tgt.size(), vocab_tgt[SOS_TOKEN], vocab_tgt[EOS_TOKEN], vocab_tgt[PAD_TOKEN], config) hypothesis = batch_to_sentences(raw_hypothesis, vocab_tgt) model_hypotheses += hypothesis.tolist() references += sentences_y.tolist() save_hypotheses(model_hypotheses, 0, config, None)