def main(): config = setup_config() vocab_src, vocab_tgt = load_vocabularies(config) train_data, dev_data, opt_data = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) dl = DataLoader(train_data, batch_size=config["batch_size_train"], shuffle=True, num_workers=4) bucketing_dl = BucketingParallelDataLoader(dl) cycle_iterate_dl_back = None if config["back_prefix"] != None: dl_back = DataLoader(dataset=opt_data['back'], batch_size=config["batch_size_train"], shuffle=True, num_workers=2) bucketing_dl_back = BucketingParallelDataLoader(dl_back) cycle_iterate_dl_back = cycle(bucketing_dl_back) model, train_fn, validate_fn = create_model(vocab_src, vocab_tgt, config) model.to(torch.device(config["device"])) train(model, train_fn, validate_fn, bucketing_dl, dev_data, vocab_src, vocab_tgt, config, cycle_iterate_dl_back=cycle_iterate_dl_back)
def main(): config = setup_config() config["train_prefix"] = 'sample' vocab_src, vocab_tgt = load_vocabularies(config) train_data, _, _ = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) val_dl = DataLoader(train_data, batch_size=config["batch_size_eval"], shuffle=False, num_workers=4) val_dl = BucketingParallelDataLoader(val_dl) sentences_x, sentences_y = next(val_dl) model, _, validate_fn = create_model(vocab_src, vocab_tgt, config) model.to(torch.device(config["device"])) # checkpoint_path = "output/aevnmt_z_loss_en-de_run_0/checkpoints/aevnmt_z_loss_en-de_run_0" checkpoint_path = "output/aevnmt_z_loss_de-en_run_0/checkpoints/aevnmt_z_loss_de-en_run_0" state = torch.load(checkpoint_path) model.load_state_dict(state['state_dict']) sample_from_latent(model, vocab_src, vocab_tgt, config) sample_from_posterior(model, sentences_x, vocab_src, vocab_tgt, config)
def main(): # config = setup_config() # config["train_prefix"] = 'sample' # train_data, dev_data, vocab_src, vocab_tgt = load_dataset_joey(config) # dataloader = data.make_data_iter(train_data, 1, train=True) # sample = next(iter(dataloader)) # batch = Batch(sample, vocab_src.stoi[config["pad"]], use_cuda=False if config["device"] == "cpu" else True) # # model_xy, model_yx, _, _, validate_fn = create_models(vocab_src, vocab_tgt, config) # model_xy.to(torch.device(config["device"])) # model_yx.to(torch.device(config["device"])) # # checkpoint_path = "output/coaevnmt_greedy_lm_off_run_5/checkpoints/coaevnmt_greedy_lm_off_run_5" # state = torch.load(checkpoint_path) # model_xy.load_state_dict(state['state_dict_xy']) # model_yx.load_state_dict(state['state_dict_yx']) config = setup_config() config["train_prefix"] = 'sample' vocab_src, vocab_tgt = load_vocabularies(config) train_data, _, _ = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) val_dl = DataLoader(train_data, batch_size=config["batch_size_eval"], shuffle=False, num_workers=4) val_dl = BucketingParallelDataLoader(val_dl) sentences_x, sentences_y = next(val_dl) # model, _, validate_fn = create_model(vocab_src, vocab_tgt, config) # model.to(torch.device(config["device"])) # model_xy, model_yx, _, _, validate_fn = create_models(vocab_src, vocab_tgt, config) # model_xy.to(torch.device(config["device"])) # model_yx.to(torch.device(config["device"])) model_xy, model_yx, bi_train_fn, mono_train_fn, validate_fn = create_models( vocab_src, vocab_tgt, config) model_xy.to(torch.device(config["device"])) model_yx.to(torch.device(config["device"])) checkpoint_path = "output/coaevnmt_curc_diff_greedy_lr2_en-de_run_1/checkpoints/coaevnmt_curc_diff_greedy_lr2_en-de_run_1" state = torch.load(checkpoint_path) model_xy.load_state_dict(state['state_dict_xy']) model_yx.load_state_dict(state['state_dict_yx']) print("validation: {}-{}".format(config["src"], config["tgt"])) sample_from_latent(model_xy, vocab_src, vocab_tgt, config) sample_from_posterior(model_xy, sentences_x, vocab_src, vocab_tgt, config) print("") print("validation: {}-{}".format(config["tgt"], config["src"])) sample_from_latent(model_yx, vocab_tgt, vocab_src, config) sample_from_posterior(model_yx, sentences_y, vocab_tgt, vocab_src, config)
def main(): # config = setup_config() config = setup_config() config["dev_prefix"] = "dev" # config["dev_prefix"] = "test_2016_flickr.lc.norm.tok" # config["dev_prefix"] = "test_2017_flickr.lc.norm.tok" vocab_src, vocab_tgt = load_vocabularies(config) _, dev_data, _ = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) checkpoint_path = "output/aevnmt_z_loss_en-de_run_1/checkpoints/aevnmt_z_loss_en-de_run_1" if config["model_type"] == "coaevnmt": model_xy, model_yx, _, _, validate_fn = create_models( vocab_src, vocab_tgt, config) model_xy.to(torch.device(config["device"])) model_yx.to(torch.device(config["device"])) state = torch.load(checkpoint_path) model_xy.load_state_dict(state['state_dict_xy']) model_yx.load_state_dict(state['state_dict_yx']) printKL(model_xy, dev_data, vocab_src, vocab_tgt, config, direction="xy") printKL(model_yx, dev_data, vocab_tgt, vocab_src, config, direction="yx") elif config["model_type"] == "aevnmt": model, _, _ = create_model(vocab_src, vocab_tgt, config) model.to(torch.device(config["device"])) state = torch.load(checkpoint_path) model.load_state_dict(state['state_dict']) printKL(model, dev_data, vocab_src, vocab_tgt, config, direction="None")
def main(): # config = setup_config() config = setup_config() # config["dev_prefix"] = "dev" # config["dev_prefix"] = "test_2016_flickr.lc.norm.tok" config["dev_prefix"] = "test_2017_flickr.lc.norm.tok" vocab_src, vocab_tgt = load_vocabularies(config) _, dev_data, _ = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) model_xy, model_yx, _, _, validate_fn = create_models( vocab_src, vocab_tgt, config) model_xy.to(torch.device(config["device"])) model_yx.to(torch.device(config["device"])) # checkpoint_path = "output/coaevnmt_greedy_lm_off_run_5/checkpoints/coaevnmt_greedy_lm_off_run_5" # checkpoint_path = "output/coaevnmt_lr3_curriculum_en-de_run_4/checkpoints/coaevnmt_lr3_curriculum_en-de_run_4" # checkpoint_path = "output/coaevnmt_lr3_no_curriculum_no_warmup_en-de_run_4/checkpoints/coaevnmt_lr3_no_curriculum_no_warmup_en-de_run_4" # checkpoint_path = "output/coaevnmt_lr3_beam_dec_3_en-de_run_3/checkpoints/coaevnmt_lr3_beam_dec_3_en-de_run_3" # checkpoint_path = "output/conmt_anc_en-de_run_3/checkpoints/conmt_anc_en-de_run_3" # checkpoint_path = "output/conmt_greedy_2en-de_run_3/checkpoints/conmt_greedy_2en-de_run_3" # checkpoint_path = "output/conmt_greedy_no_warmup_en-de_run_3/checkpoints/conmt_greedy_no_warmup_en-de_run_3" # checkpoint_path = "output/conmt_beam_dec_3_2en-de_run_1/checkpoints/conmt_beam_dec_3_2en-de_run_1" # checkpoint_path = "output/conmt_beam_dec_5_2en-de_run_3/checkpoints/conmt_beam_dec_5_2en-de_run_3" # checkpoint_path = "output/conmt_beam_dec_10_2en-de_run_3/checkpoints/conmt_beam_dec_10_2en-de_run_3" # checkpoint_path = "output/conmt_beam_dec_10_en-de_run_3/checkpoints/conmt_beam_dec_10_en-de_run_3" # checkpoint_path = "output/conmt_curc_diff_greedy_conv_yx_en-de_run_7/checkpoints/conmt_curc_diff_greedy_conv_yx_en-de_run_7" # checkpoint_path = "output/conmt_final_full_en-de_run_3/checkpoints/conmt_final_full_en-de_run_3" # checkpoint_path = "output/conmt_final_half_en-de_run_3/checkpoints/conmt_final_half_en-de_run_3" # checkpoint_path = "output/conmt_final_fourth_en-de_run_3/checkpoints/conmt_final_fourth_en-de_run_3" # checkpoint_path = "output/coaevnmt_final_full_en-de_run_3/checkpoints/coaevnmt_final_full_en-de_run_3" # checkpoint_path = "output/coaevnmt_final_half_en-de_run_3/checkpoints/coaevnmt_final_half_en-de_run_3" checkpoint_path = "output/coaevnmt_final_fourth_en-de_run_3/checkpoints/coaevnmt_final_fourth_en-de_run_3" state = torch.load(checkpoint_path) model_xy.load_state_dict(state['state_dict_xy']) model_yx.load_state_dict(state['state_dict_yx']) print("validation: {}-{}".format(config["src"], config["tgt"])) evaluate(model_xy, dev_data, vocab_src, vocab_tgt, config, direction="xy") print("validation: {}-{}".format(config["tgt"], config["src"])) evaluate(model_yx, dev_data, vocab_tgt, vocab_src, config, direction="yx")
def main(): config = setup_config() vocab_src, vocab_tgt = load_vocabularies(config) train_data, dev_data, opt_data = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) dl_xy = DataLoader(train_data, batch_size=config["batch_size_train"], shuffle=True, num_workers=2) bucketing_dl_xy = BucketingParallelDataLoader(dl_xy) dl_x = DataLoader(dataset=opt_data['mono_src'], batch_size=config["batch_size_train"], shuffle=True, num_workers=2) bucketing_dl_x = BucketingTextDataLoader(dl_x) cycle_iterate_dl_x = cycle(bucketing_dl_x) dl_y = DataLoader(dataset=opt_data['mono_tgt'], batch_size=config["batch_size_train"], shuffle=True, num_workers=2) bucketing_dl_y = BucketingTextDataLoader(dl_y) cycle_iterate_dl_y = cycle(bucketing_dl_y) model, bi_train_fn, mono_train_fn, validate_fn = create_model( vocab_src, vocab_tgt, config) print(model.emb_src is model.model_xy.emb_src) print(model.emb_tgt is model.model_xy.emb_tgt) asf model.to(torch.device(config["device"])) # model_yx.to(torch.device(config["device"])) train(model, bi_train_fn, mono_train_fn, validate_fn, bucketing_dl_xy, dev_data, cycle_iterate_dl_x, cycle_iterate_dl_y, vocab_src, vocab_tgt, config)
def main(): config = setup_config() config["dev_prefix"] = "comparable" vocab_src, vocab_tgt = load_vocabularies(config) _, dev_data, _ = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) model, _, validate_fn = create_model(vocab_src, vocab_tgt, config) model.to(torch.device(config["device"])) checkpoint_path = "{}/cond_nmt_de-en_run_7/checkpoints/cond_nmt_de-en_run_7".format( config["out_dir"]) state = torch.load(checkpoint_path) model.load_state_dict(state['state_dict']) model.eval() device = torch.device( "cpu") if config["device"] == "cpu" else torch.device("cuda:0") with torch.no_grad(): model_hypotheses = [] references = [] val_dl = DataLoader(dev_data, batch_size=config["batch_size_eval"], shuffle=False, num_workers=4) # val_dl = BucketingParallelDataLoader(val_dl) for sentences_x, sentences_y in tqdm(val_dl): sentences_x = np.array(sentences_x) seq_len = np.array([len(s.split()) for s in sentences_x]) sort_keys = np.argsort(-seq_len) sentences_x = sentences_x[sort_keys] # # sentences_y = np.array(sentences_y) x_in, _, x_mask, x_len = create_batch(sentences_x, vocab_src, device) x_mask = x_mask.unsqueeze(1) if config["model_type"] == "aevnmt": qz = model.inference(x_in, x_mask, x_len) z = qz.mean enc_output, enc_hidden = model.encode(x_in, x_len, z) dec_hidden = model.init_decoder(enc_output, enc_hidden, z) raw_hypothesis = beam_search(model.decoder, model.emb_tgt, model.generate_tm, enc_output, dec_hidden, x_mask, vocab_tgt.size(), vocab_tgt[SOS_TOKEN], vocab_tgt[EOS_TOKEN], vocab_tgt[PAD_TOKEN], config) else: enc_output, enc_hidden = model.encode(x_in, x_len) dec_hidden = model.decoder.initialize(enc_output, enc_hidden) raw_hypothesis = beam_search(model.decoder, model.emb_tgt, model.generate_tm, enc_output, dec_hidden, x_mask, vocab_tgt.size(), vocab_tgt[SOS_TOKEN], vocab_tgt[EOS_TOKEN], vocab_tgt[PAD_TOKEN], config) hypothesis = batch_to_sentences(raw_hypothesis, vocab_tgt) inverse_sort_keys = np.argsort(sort_keys) model_hypotheses += hypothesis[inverse_sort_keys].tolist() references += sentences_y.tolist() save_hypotheses(model_hypotheses, 0, config, None) model_hypotheses, references = clean_sentences(model_hypotheses, references, config) bleu = sacrebleu.raw_corpus_bleu(model_hypotheses, [references]).score print(bleu)
def main(): config = setup_config() config["dev_prefix"] = "comparable" vocab_src, vocab_tgt = load_vocabularies(config) _, dev_data, _ = load_data(config, vocab_src=vocab_src, vocab_tgt=vocab_tgt) # _, dev_data, vocab_src, vocab_tgt = load_dataset_joey(config) model, _, validate_fn = create_model(vocab_src, vocab_tgt, config) model.to(torch.device(config["device"])) checkpoint_path = "{}/cond_nmt_new_de-en_run_2/checkpoints/cond_nmt_new_de-en_run_2".format( config["out_dir"]) state = torch.load(checkpoint_path) model.load_state_dict(state['state_dict']) model.eval() device = torch.device( "cpu") if config["device"] == "cpu" else torch.device("cuda:0") with torch.no_grad(): model_hypotheses = [] references = [] val_dl = DataLoader(dev_data, batch_size=config["batch_size_eval"], shuffle=False, num_workers=4) val_dl = BucketingParallelDataLoader(val_dl) for sentences_x, sentences_y in tqdm(val_dl): x_in, _, x_mask, x_len = create_batch(sentences_x, vocab_src, device) x_mask = x_mask.unsqueeze(1) if config["model_type"] == "aevnmt": qz = model.inference(x_in, x_mask) z = qz.mean enc_output, enc_hidden = model.encode(x_in, z) dec_hidden = model.init_decoder(enc_output, enc_hidden, z) raw_hypothesis = beam_search(model.decoder, model.emb_tgt, model.generate_tm, enc_output, dec_hidden, x_mask, vocab_tgt.size(), vocab_tgt[SOS_TOKEN], vocab_tgt[EOS_TOKEN], vocab_tgt[PAD_TOKEN], config) else: enc_output, enc_hidden = model.encode(x_in) dec_hidden = model.decoder.initialize(enc_output, enc_hidden) raw_hypothesis = beam_search(model.decoder, model.emb_tgt, model.generate, enc_output, dec_hidden, x_mask, vocab_tgt.size(), vocab_tgt[SOS_TOKEN], vocab_tgt[EOS_TOKEN], vocab_tgt[PAD_TOKEN], config) hypothesis = batch_to_sentences(raw_hypothesis, vocab_tgt) model_hypotheses += hypothesis.tolist() references += sentences_y.tolist() save_hypotheses(model_hypotheses, 0, config, None)
def __init__(self, config, load=False, shared_params=None): self.config = config # vocabulary self.w2i, self.i2w = load_vocabularies(config) theano.config.compute_test_value = config[ 'compute_test_values'] # warn to enable assert config['eos_symbol'] in self.w2i, \ 'word vocabulary needs to include eos' assert config['unk_symbol'] in self.w2i, \ 'word vocabulary needs to include UNK' # save vocabularies output_dir = config['output_dir'] if not os.path.exists(output_dir): os.makedirs(output_dir) logger.warn('Vocabularies: {}'.format(len(self.w2i))) # hyper-parameters self.dim = config['dim'] self.dim_emb = config['dim_emb'] #self.dim_emb_image = config['dim_emb_image'] self.dim_per_factor = config['dim_per_factor'] self.dim_v = config['dim_v'] self.dropout = config['dropout'] self.dropout_word = config['dropout_word'] self.dropout_emb = config['dropout_emb'] self.dropout_rec = config['dropout_rec'] self.verbose = config['verbose'] gain = 'relu' if config['activation_mlp'] == 'relu' else 1.0 if self.config['max_words'] == -1: self.voc_size = len(self.w2i) else: self.voc_size = self.config['max_words'] logger.warn('Using actual vocsize : {}'.format(self.voc_size)) # self.params is a dictionary that willi hold all the parameters in # the strict order defined in this __init__() self.params = OrderedDict() self.theano_params = OrderedDict() # build the bi-rnn encoder # N.B. params are added inside this method self.init_encoder_params(**config) if config['mode'] == 'imaginet': # build the MLP for image prediction self.params = Model.init_mlp_params(self.params, gain=gain, **config) if load: self.load(os.path.join(config['output_dir'], config['model_name'])) self.init_theano_params() # multi-task support # we replace whatever parameters we already have at this point with # the ones that we received as optional input # this needs to be done BEFORE building the model if shared_params is not None: self.apply_shared_theano_params(shared_params) # compile theano functions for training the model trng, f_loss, f_grad_shared, f_update, raw_grads = \ self.compile_training_functions(config) self.trng = trng self.f_loss = f_loss self.f_grad_shared = f_grad_shared self.f_update = f_update self.raw_grads = raw_grads # compile theano functions for evaluating the model f_encode, f_predict = self.compile_mlp_predict(config, trng=trng) self.f_encode = f_encode self.f_predict = f_predict