Exemple #1
0
def main():
    config = setup_config()

    vocab_src, vocab_tgt = load_vocabularies(config)
    train_data, dev_data, opt_data = load_data(config,
                                               vocab_src=vocab_src,
                                               vocab_tgt=vocab_tgt)
    dl = DataLoader(train_data,
                    batch_size=config["batch_size_train"],
                    shuffle=True,
                    num_workers=4)
    bucketing_dl = BucketingParallelDataLoader(dl)

    cycle_iterate_dl_back = None
    if config["back_prefix"] != None:
        dl_back = DataLoader(dataset=opt_data['back'],
                             batch_size=config["batch_size_train"],
                             shuffle=True,
                             num_workers=2)
        bucketing_dl_back = BucketingParallelDataLoader(dl_back)
        cycle_iterate_dl_back = cycle(bucketing_dl_back)

    model, train_fn, validate_fn = create_model(vocab_src, vocab_tgt, config)
    model.to(torch.device(config["device"]))

    train(model,
          train_fn,
          validate_fn,
          bucketing_dl,
          dev_data,
          vocab_src,
          vocab_tgt,
          config,
          cycle_iterate_dl_back=cycle_iterate_dl_back)
def main():
    config = setup_config()
    config["train_prefix"] = 'sample'

    vocab_src, vocab_tgt = load_vocabularies(config)
    train_data, _, _ = load_data(config,
                                 vocab_src=vocab_src,
                                 vocab_tgt=vocab_tgt)

    val_dl = DataLoader(train_data,
                        batch_size=config["batch_size_eval"],
                        shuffle=False,
                        num_workers=4)
    val_dl = BucketingParallelDataLoader(val_dl)
    sentences_x, sentences_y = next(val_dl)

    model, _, validate_fn = create_model(vocab_src, vocab_tgt, config)
    model.to(torch.device(config["device"]))

    # checkpoint_path = "output/aevnmt_z_loss_en-de_run_0/checkpoints/aevnmt_z_loss_en-de_run_0"
    checkpoint_path = "output/aevnmt_z_loss_de-en_run_0/checkpoints/aevnmt_z_loss_de-en_run_0"
    state = torch.load(checkpoint_path)
    model.load_state_dict(state['state_dict'])

    sample_from_latent(model, vocab_src, vocab_tgt, config)
    sample_from_posterior(model, sentences_x, vocab_src, vocab_tgt, config)
def main():
    # config = setup_config()
    # config["train_prefix"] = 'sample'
    # train_data, dev_data, vocab_src, vocab_tgt = load_dataset_joey(config)
    # dataloader = data.make_data_iter(train_data, 1, train=True)
    # sample = next(iter(dataloader))
    # batch = Batch(sample, vocab_src.stoi[config["pad"]], use_cuda=False if config["device"] == "cpu" else True)
    #
    # model_xy, model_yx, _, _, validate_fn = create_models(vocab_src, vocab_tgt, config)
    # model_xy.to(torch.device(config["device"]))
    # model_yx.to(torch.device(config["device"]))
    #
    # checkpoint_path = "output/coaevnmt_greedy_lm_off_run_5/checkpoints/coaevnmt_greedy_lm_off_run_5"
    # state = torch.load(checkpoint_path)
    # model_xy.load_state_dict(state['state_dict_xy'])
    # model_yx.load_state_dict(state['state_dict_yx'])

    config = setup_config()
    config["train_prefix"] = 'sample'

    vocab_src, vocab_tgt = load_vocabularies(config)
    train_data, _, _ = load_data(config,
                                 vocab_src=vocab_src,
                                 vocab_tgt=vocab_tgt)

    val_dl = DataLoader(train_data,
                        batch_size=config["batch_size_eval"],
                        shuffle=False,
                        num_workers=4)
    val_dl = BucketingParallelDataLoader(val_dl)
    sentences_x, sentences_y = next(val_dl)

    # model, _, validate_fn = create_model(vocab_src, vocab_tgt, config)
    # model.to(torch.device(config["device"]))
    # model_xy, model_yx, _, _, validate_fn = create_models(vocab_src, vocab_tgt, config)
    # model_xy.to(torch.device(config["device"]))
    # model_yx.to(torch.device(config["device"]))
    model_xy, model_yx, bi_train_fn, mono_train_fn, validate_fn = create_models(
        vocab_src, vocab_tgt, config)
    model_xy.to(torch.device(config["device"]))
    model_yx.to(torch.device(config["device"]))

    checkpoint_path = "output/coaevnmt_curc_diff_greedy_lr2_en-de_run_1/checkpoints/coaevnmt_curc_diff_greedy_lr2_en-de_run_1"
    state = torch.load(checkpoint_path)
    model_xy.load_state_dict(state['state_dict_xy'])
    model_yx.load_state_dict(state['state_dict_yx'])

    print("validation: {}-{}".format(config["src"], config["tgt"]))
    sample_from_latent(model_xy, vocab_src, vocab_tgt, config)
    sample_from_posterior(model_xy, sentences_x, vocab_src, vocab_tgt, config)
    print("")
    print("validation: {}-{}".format(config["tgt"], config["src"]))
    sample_from_latent(model_yx, vocab_tgt, vocab_src, config)
    sample_from_posterior(model_yx, sentences_y, vocab_tgt, vocab_src, config)
def main():
    # config = setup_config()
    config = setup_config()
    config["dev_prefix"] = "dev"
    # config["dev_prefix"] = "test_2016_flickr.lc.norm.tok"
    # config["dev_prefix"] = "test_2017_flickr.lc.norm.tok"

    vocab_src, vocab_tgt = load_vocabularies(config)
    _, dev_data, _ = load_data(config,
                               vocab_src=vocab_src,
                               vocab_tgt=vocab_tgt)

    checkpoint_path = "output/aevnmt_z_loss_en-de_run_1/checkpoints/aevnmt_z_loss_en-de_run_1"

    if config["model_type"] == "coaevnmt":
        model_xy, model_yx, _, _, validate_fn = create_models(
            vocab_src, vocab_tgt, config)
        model_xy.to(torch.device(config["device"]))
        model_yx.to(torch.device(config["device"]))

        state = torch.load(checkpoint_path)
        model_xy.load_state_dict(state['state_dict_xy'])
        model_yx.load_state_dict(state['state_dict_yx'])

        printKL(model_xy,
                dev_data,
                vocab_src,
                vocab_tgt,
                config,
                direction="xy")
        printKL(model_yx,
                dev_data,
                vocab_tgt,
                vocab_src,
                config,
                direction="yx")
    elif config["model_type"] == "aevnmt":
        model, _, _ = create_model(vocab_src, vocab_tgt, config)
        model.to(torch.device(config["device"]))

        state = torch.load(checkpoint_path)
        model.load_state_dict(state['state_dict'])

        printKL(model,
                dev_data,
                vocab_src,
                vocab_tgt,
                config,
                direction="None")
Exemple #5
0
def main():
    # config = setup_config()
    config = setup_config()
    # config["dev_prefix"] = "dev"
    # config["dev_prefix"] = "test_2016_flickr.lc.norm.tok"
    config["dev_prefix"] = "test_2017_flickr.lc.norm.tok"

    vocab_src, vocab_tgt = load_vocabularies(config)
    _, dev_data, _ = load_data(config,
                               vocab_src=vocab_src,
                               vocab_tgt=vocab_tgt)
    model_xy, model_yx, _, _, validate_fn = create_models(
        vocab_src, vocab_tgt, config)
    model_xy.to(torch.device(config["device"]))
    model_yx.to(torch.device(config["device"]))

    # checkpoint_path = "output/coaevnmt_greedy_lm_off_run_5/checkpoints/coaevnmt_greedy_lm_off_run_5"
    # checkpoint_path = "output/coaevnmt_lr3_curriculum_en-de_run_4/checkpoints/coaevnmt_lr3_curriculum_en-de_run_4"
    # checkpoint_path = "output/coaevnmt_lr3_no_curriculum_no_warmup_en-de_run_4/checkpoints/coaevnmt_lr3_no_curriculum_no_warmup_en-de_run_4"
    # checkpoint_path = "output/coaevnmt_lr3_beam_dec_3_en-de_run_3/checkpoints/coaevnmt_lr3_beam_dec_3_en-de_run_3"
    # checkpoint_path = "output/conmt_anc_en-de_run_3/checkpoints/conmt_anc_en-de_run_3"
    # checkpoint_path = "output/conmt_greedy_2en-de_run_3/checkpoints/conmt_greedy_2en-de_run_3"

    # checkpoint_path = "output/conmt_greedy_no_warmup_en-de_run_3/checkpoints/conmt_greedy_no_warmup_en-de_run_3"
    # checkpoint_path = "output/conmt_beam_dec_3_2en-de_run_1/checkpoints/conmt_beam_dec_3_2en-de_run_1"
    # checkpoint_path = "output/conmt_beam_dec_5_2en-de_run_3/checkpoints/conmt_beam_dec_5_2en-de_run_3"
    # checkpoint_path = "output/conmt_beam_dec_10_2en-de_run_3/checkpoints/conmt_beam_dec_10_2en-de_run_3"
    # checkpoint_path = "output/conmt_beam_dec_10_en-de_run_3/checkpoints/conmt_beam_dec_10_en-de_run_3"
    # checkpoint_path = "output/conmt_curc_diff_greedy_conv_yx_en-de_run_7/checkpoints/conmt_curc_diff_greedy_conv_yx_en-de_run_7"
    # checkpoint_path = "output/conmt_final_full_en-de_run_3/checkpoints/conmt_final_full_en-de_run_3"
    # checkpoint_path = "output/conmt_final_half_en-de_run_3/checkpoints/conmt_final_half_en-de_run_3"
    # checkpoint_path = "output/conmt_final_fourth_en-de_run_3/checkpoints/conmt_final_fourth_en-de_run_3"

    # checkpoint_path = "output/coaevnmt_final_full_en-de_run_3/checkpoints/coaevnmt_final_full_en-de_run_3"
    # checkpoint_path = "output/coaevnmt_final_half_en-de_run_3/checkpoints/coaevnmt_final_half_en-de_run_3"
    checkpoint_path = "output/coaevnmt_final_fourth_en-de_run_3/checkpoints/coaevnmt_final_fourth_en-de_run_3"

    state = torch.load(checkpoint_path)
    model_xy.load_state_dict(state['state_dict_xy'])
    model_yx.load_state_dict(state['state_dict_yx'])

    print("validation: {}-{}".format(config["src"], config["tgt"]))
    evaluate(model_xy, dev_data, vocab_src, vocab_tgt, config, direction="xy")

    print("validation: {}-{}".format(config["tgt"], config["src"]))
    evaluate(model_yx, dev_data, vocab_tgt, vocab_src, config, direction="yx")
Exemple #6
0
def main():
    config = setup_config()

    vocab_src, vocab_tgt = load_vocabularies(config)
    train_data, dev_data, opt_data = load_data(config,
                                               vocab_src=vocab_src,
                                               vocab_tgt=vocab_tgt)

    dl_xy = DataLoader(train_data,
                       batch_size=config["batch_size_train"],
                       shuffle=True,
                       num_workers=2)
    bucketing_dl_xy = BucketingParallelDataLoader(dl_xy)

    dl_x = DataLoader(dataset=opt_data['mono_src'],
                      batch_size=config["batch_size_train"],
                      shuffle=True,
                      num_workers=2)
    bucketing_dl_x = BucketingTextDataLoader(dl_x)
    cycle_iterate_dl_x = cycle(bucketing_dl_x)

    dl_y = DataLoader(dataset=opt_data['mono_tgt'],
                      batch_size=config["batch_size_train"],
                      shuffle=True,
                      num_workers=2)
    bucketing_dl_y = BucketingTextDataLoader(dl_y)
    cycle_iterate_dl_y = cycle(bucketing_dl_y)

    model, bi_train_fn, mono_train_fn, validate_fn = create_model(
        vocab_src, vocab_tgt, config)

    print(model.emb_src is model.model_xy.emb_src)
    print(model.emb_tgt is model.model_xy.emb_tgt)
    asf
    model.to(torch.device(config["device"]))
    # model_yx.to(torch.device(config["device"]))

    train(model, bi_train_fn, mono_train_fn, validate_fn, bucketing_dl_xy,
          dev_data, cycle_iterate_dl_x, cycle_iterate_dl_y, vocab_src,
          vocab_tgt, config)
def main():
    config = setup_config()
    config["dev_prefix"] = "comparable"
    vocab_src, vocab_tgt = load_vocabularies(config)
    _, dev_data, _ = load_data(config,
                               vocab_src=vocab_src,
                               vocab_tgt=vocab_tgt)

    model, _, validate_fn = create_model(vocab_src, vocab_tgt, config)
    model.to(torch.device(config["device"]))

    checkpoint_path = "{}/cond_nmt_de-en_run_7/checkpoints/cond_nmt_de-en_run_7".format(
        config["out_dir"])

    state = torch.load(checkpoint_path)
    model.load_state_dict(state['state_dict'])

    model.eval()
    device = torch.device(
        "cpu") if config["device"] == "cpu" else torch.device("cuda:0")
    with torch.no_grad():
        model_hypotheses = []
        references = []

        val_dl = DataLoader(dev_data,
                            batch_size=config["batch_size_eval"],
                            shuffle=False,
                            num_workers=4)
        # val_dl = BucketingParallelDataLoader(val_dl)
        for sentences_x, sentences_y in tqdm(val_dl):

            sentences_x = np.array(sentences_x)
            seq_len = np.array([len(s.split()) for s in sentences_x])
            sort_keys = np.argsort(-seq_len)
            sentences_x = sentences_x[sort_keys]
            # #
            sentences_y = np.array(sentences_y)

            x_in, _, x_mask, x_len = create_batch(sentences_x, vocab_src,
                                                  device)
            x_mask = x_mask.unsqueeze(1)

            if config["model_type"] == "aevnmt":
                qz = model.inference(x_in, x_mask, x_len)
                z = qz.mean

                enc_output, enc_hidden = model.encode(x_in, x_len, z)
                dec_hidden = model.init_decoder(enc_output, enc_hidden, z)

                raw_hypothesis = beam_search(model.decoder, model.emb_tgt,
                                             model.generate_tm, enc_output,
                                             dec_hidden, x_mask,
                                             vocab_tgt.size(),
                                             vocab_tgt[SOS_TOKEN],
                                             vocab_tgt[EOS_TOKEN],
                                             vocab_tgt[PAD_TOKEN], config)
            else:
                enc_output, enc_hidden = model.encode(x_in, x_len)
                dec_hidden = model.decoder.initialize(enc_output, enc_hidden)

                raw_hypothesis = beam_search(model.decoder, model.emb_tgt,
                                             model.generate_tm, enc_output,
                                             dec_hidden, x_mask,
                                             vocab_tgt.size(),
                                             vocab_tgt[SOS_TOKEN],
                                             vocab_tgt[EOS_TOKEN],
                                             vocab_tgt[PAD_TOKEN], config)

            hypothesis = batch_to_sentences(raw_hypothesis, vocab_tgt)

            inverse_sort_keys = np.argsort(sort_keys)
            model_hypotheses += hypothesis[inverse_sort_keys].tolist()

            references += sentences_y.tolist()
        save_hypotheses(model_hypotheses, 0, config, None)
        model_hypotheses, references = clean_sentences(model_hypotheses,
                                                       references, config)
        bleu = sacrebleu.raw_corpus_bleu(model_hypotheses, [references]).score
        print(bleu)
Exemple #8
0
def main():
    config = setup_config()
    config["dev_prefix"] = "comparable"
    vocab_src, vocab_tgt = load_vocabularies(config)
    _, dev_data, _ = load_data(config,
                               vocab_src=vocab_src,
                               vocab_tgt=vocab_tgt)

    # _, dev_data, vocab_src, vocab_tgt = load_dataset_joey(config)
    model, _, validate_fn = create_model(vocab_src, vocab_tgt, config)
    model.to(torch.device(config["device"]))

    checkpoint_path = "{}/cond_nmt_new_de-en_run_2/checkpoints/cond_nmt_new_de-en_run_2".format(
        config["out_dir"])
    state = torch.load(checkpoint_path)
    model.load_state_dict(state['state_dict'])

    model.eval()
    device = torch.device(
        "cpu") if config["device"] == "cpu" else torch.device("cuda:0")
    with torch.no_grad():
        model_hypotheses = []
        references = []

        val_dl = DataLoader(dev_data,
                            batch_size=config["batch_size_eval"],
                            shuffle=False,
                            num_workers=4)
        val_dl = BucketingParallelDataLoader(val_dl)
        for sentences_x, sentences_y in tqdm(val_dl):
            x_in, _, x_mask, x_len = create_batch(sentences_x, vocab_src,
                                                  device)
            x_mask = x_mask.unsqueeze(1)

            if config["model_type"] == "aevnmt":
                qz = model.inference(x_in, x_mask)
                z = qz.mean

                enc_output, enc_hidden = model.encode(x_in, z)
                dec_hidden = model.init_decoder(enc_output, enc_hidden, z)

                raw_hypothesis = beam_search(model.decoder, model.emb_tgt,
                                             model.generate_tm, enc_output,
                                             dec_hidden, x_mask,
                                             vocab_tgt.size(),
                                             vocab_tgt[SOS_TOKEN],
                                             vocab_tgt[EOS_TOKEN],
                                             vocab_tgt[PAD_TOKEN], config)
            else:
                enc_output, enc_hidden = model.encode(x_in)
                dec_hidden = model.decoder.initialize(enc_output, enc_hidden)

                raw_hypothesis = beam_search(model.decoder, model.emb_tgt,
                                             model.generate, enc_output,
                                             dec_hidden, x_mask,
                                             vocab_tgt.size(),
                                             vocab_tgt[SOS_TOKEN],
                                             vocab_tgt[EOS_TOKEN],
                                             vocab_tgt[PAD_TOKEN], config)

            hypothesis = batch_to_sentences(raw_hypothesis, vocab_tgt)
            model_hypotheses += hypothesis.tolist()

            references += sentences_y.tolist()

        save_hypotheses(model_hypotheses, 0, config, None)
Exemple #9
0
    def __init__(self, config, load=False, shared_params=None):
        self.config = config

        # vocabulary
        self.w2i, self.i2w = load_vocabularies(config)
        theano.config.compute_test_value = config[
            'compute_test_values']  # warn to enable

        assert config['eos_symbol'] in self.w2i, \
            'word vocabulary needs to include eos'
        assert config['unk_symbol'] in self.w2i, \
            'word vocabulary needs to include UNK'

        # save vocabularies
        output_dir = config['output_dir']
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        logger.warn('Vocabularies: {}'.format(len(self.w2i)))

        # hyper-parameters
        self.dim = config['dim']
        self.dim_emb = config['dim_emb']
        #self.dim_emb_image = config['dim_emb_image']
        self.dim_per_factor = config['dim_per_factor']
        self.dim_v = config['dim_v']
        self.dropout = config['dropout']
        self.dropout_word = config['dropout_word']
        self.dropout_emb = config['dropout_emb']
        self.dropout_rec = config['dropout_rec']
        self.verbose = config['verbose']
        gain = 'relu' if config['activation_mlp'] == 'relu' else 1.0

        if self.config['max_words'] == -1:
            self.voc_size = len(self.w2i)
        else:
            self.voc_size = self.config['max_words']

        logger.warn('Using actual vocsize : {}'.format(self.voc_size))

        # self.params is a dictionary that willi hold all the parameters in
        # the strict order defined in this __init__()
        self.params = OrderedDict()
        self.theano_params = OrderedDict()

        # build the bi-rnn encoder
        # N.B. params are added inside this method
        self.init_encoder_params(**config)
        if config['mode'] == 'imaginet':
            # build the MLP for image prediction
            self.params = Model.init_mlp_params(self.params,
                                                gain=gain,
                                                **config)

        if load:
            self.load(os.path.join(config['output_dir'], config['model_name']))

        self.init_theano_params()

        # multi-task support
        # we replace whatever parameters we already have at this point with
        # the ones that we received as optional input
        # this needs to be done BEFORE building the model
        if shared_params is not None:
            self.apply_shared_theano_params(shared_params)

        # compile theano functions for training the model
        trng, f_loss, f_grad_shared, f_update, raw_grads = \
            self.compile_training_functions(config)
        self.trng = trng
        self.f_loss = f_loss
        self.f_grad_shared = f_grad_shared
        self.f_update = f_update
        self.raw_grads = raw_grads

        # compile theano functions for evaluating the model
        f_encode, f_predict = self.compile_mlp_predict(config, trng=trng)

        self.f_encode = f_encode
        self.f_predict = f_predict