Exemple #1
0
def generate(args):
    """
    Use the trained model for decoding
    Args
        args (argparse.ArgumentParser)
    """
    if args.cuda and torch.cuda.is_available():
        device = 0
        use_cuda = True
    elif args.cuda and not torch.cuda.is_available():
        print("You do not have CUDA, turning cuda off")
        device = -1
        use_cuda = False
    else:
        device = -1
        use_cuda = False

    #Load the vocab
    vocab = du.load_vocab(args.vocab)
    eos_id = vocab.stoi[EOS_TOK]
    pad_id = vocab.stoi[PAD_TOK]

    if args.ranking:  # default is HARD one, the 'Inverse Narrative Cloze' in the paper
        dataset = du.NarrativeClozeDataset(args.valid_data,
                                           vocab,
                                           src_seq_length=MAX_EVAL_SEQ_LEN,
                                           min_seq_length=MIN_EVAL_SEQ_LEN,
                                           LM=False)
        # Batch size during decoding is set to 1
        batches = BatchIter(dataset,
                            1,
                            sort_key=lambda x: len(x.actual),
                            train=False,
                            device=-1)
    else:
        dataset = du.SentenceDataset(args.valid_data,
                                     vocab,
                                     src_seq_length=MAX_EVAL_SEQ_LEN,
                                     min_seq_length=MIN_EVAL_SEQ_LEN,
                                     add_eos=False)  #put in filter pred later
        # Batch size during decoding is set to 1
        batches = BatchIter(dataset,
                            args.batch_size,
                            sort_key=lambda x: len(x.text),
                            train=False,
                            device=-1)

    data_len = len(dataset)

    #Create the model
    with open(args.load, 'rb') as fi:
        if not use_cuda:
            model = torch.load(fi, map_location=lambda storage, loc: storage)
        else:
            model = torch.load(fi, map_location=torch.device('cuda'))

    if not hasattr(model.latent_root, 'nohier'):
        model.latent_root.set_nohier(args.nohier)  #for backwards compatibility

    model.decoder.eval()
    model.set_use_cuda(use_cuda)

    #For reconstruction
    if args.perplexity:
        loss = calc_perplexity(args, model, batches, vocab, data_len)
        print("Loss = {}".format(loss))
    elif args.schema:
        generate_from_seed(args, model, batches, vocab, data_len)
    elif args.ranking:
        do_ranking(args, model, batches, vocab, data_len, use_cuda)
    else:
        #        sample_outputs(model, vocab)
        reconstruct(args, model, batches, vocab)
Exemple #2
0
def classic_train(args):
    """
    Train the model in the ol' fashioned way, just like grandma used to
    Args
        args (argparse.ArgumentParser)
    """
    if args.cuda and torch.cuda.is_available():
        print("Using cuda")
        use_cuda = True
    elif args.cuda and not torch.cuda.is_available():
        print("You do not have CUDA, turning cuda off")
        use_cuda = False
    else:
        use_cuda = False

    #Load the data
    print("\nLoading Vocab")
    vocab = du.load_vocab(args.vocab)
    print("Vocab Loaded, Size {}".format(len(vocab.stoi.keys())))

    if args.use_pretrained:
        pretrained = GloVe(name='6B',
                           dim=args.emb_size,
                           unk_init=torch.Tensor.normal_)
        vocab.load_vectors(pretrained)
        print("Vectors Loaded")

    print("Loading Dataset")
    dataset = du.SentenceDataset(args.train_data,
                                 vocab,
                                 args.src_seq_length,
                                 add_eos=False)  #put in filter pred later
    print("Finished Loading Dataset {} examples".format(len(dataset)))
    batches = BatchIter(dataset,
                        args.batch_size,
                        sort_key=lambda x: len(x.text),
                        train=True,
                        sort_within_batch=True,
                        device=-1)
    data_len = len(dataset)

    if args.load_model:
        print("Loading the Model")
        model = torch.load(args.load_model)
    else:
        print("Creating the Model")
        bidir_mod = 2 if args.bidir else 1
        latents = example_tree(
            args.num_latent_values,
            (bidir_mod * args.enc_hid_size, args.latent_dim),
            use_cuda=use_cuda)  #assume bidirectional
        hidsize = (args.enc_hid_size, args.dec_hid_size)
        model = DAVAE(args.emb_size,
                      hidsize,
                      vocab,
                      latents,
                      layers=args.nlayers,
                      use_cuda=use_cuda,
                      pretrained=args.use_pretrained,
                      dropout=args.dropout)

    #create the optimizer
    if args.load_opt:
        print("Loading the optimizer state")
        optimizer = torch.load(args.load_opt)
    else:
        print("Creating the optimizer anew")
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    start_time = time.time()  #start of epoch 1
    curr_epoch = 1
    valid_loss = [0.0]
    for iteration, bl in enumerate(
            batches
    ):  #this will continue on forever (shuffling every epoch) till epochs finished
        batch, batch_lens = bl.text
        target, target_lens = bl.target

        if use_cuda:
            batch = Variable(batch.cuda())
        else:
            batch = Variable(batch)

        model.zero_grad()
        latent_values, latent_root, diff, dec_outputs = model(
            batch, batch_lens)
        # train set to True so returns total loss
        loss, _ = monolithic_compute_loss(iteration,
                                          model,
                                          target,
                                          target_lens,
                                          latent_values,
                                          latent_root,
                                          diff,
                                          dec_outputs,
                                          use_cuda,
                                          args=args)

        # backward propagation
        loss.backward()
        # Gradient clipping
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        # Optimize
        optimizer.step()

        # End of an epoch - run validation
        if ((args.batch_size * iteration) % data_len == 0
                or iteration % args.validate_after == 0) and iteration != 0:
            print("\nFinished Training Epoch/iteration {}/{}".format(
                curr_epoch, iteration))

            # do validation
            print("Loading Validation Dataset.")
            val_dataset = du.SentenceDataset(
                args.valid_data, vocab, args.src_seq_length,
                add_eos=False)  #put in filter pred later
            print("Finished Loading Validation Dataset {} examples.".format(
                len(val_dataset)))
            val_batches = BatchIter(val_dataset,
                                    args.batch_size,
                                    sort_key=lambda x: len(x.text),
                                    train=False,
                                    sort_within_batch=True,
                                    device=-1)
            valid_loss = 0.0
            for v_iteration, bl in enumerate(val_batches):
                batch, batch_lens = bl.text
                target, target_lens = bl.target
                batch_lens = batch_lens.cpu()
                if use_cuda:
                    batch = Variable(batch.cuda(), volatile=True)
                else:
                    batch = Variable(batch, volatile=True)

                latent_values, latent_root, diff, dec_outputs = model(
                    batch, batch_lens)
                # train set to False so returns only CE loss
                loss, ce_loss = monolithic_compute_loss(iteration,
                                                        model,
                                                        target,
                                                        target_lens,
                                                        latent_values,
                                                        latent_root,
                                                        diff,
                                                        dec_outputs,
                                                        use_cuda,
                                                        args=args,
                                                        train=False)
                valid_loss = valid_loss + ce_loss.data.clone()

            valid_loss = valid_loss / (v_iteration + 1)
            print("**Validation loss {:.2f}.**\n".format(valid_loss[0]))

            # Check max epochs and break
            if (args.batch_size * iteration) % data_len == 0:
                curr_epoch += 1
            if curr_epoch > args.epochs:
                print("Max epoch {}-{} reached. Exiting.\n".format(
                    curr_epoch, args.epochs))
                break

        # Save the checkpoint
        if iteration % args.save_after == 0 and iteration != 0:
            print("Saving checkpoint for epoch {} at {}.\n".format(
                curr_epoch, args.save_model))
            # curr_epoch and validation stats appended to the model name
            torch.save(
                model, "{}_{}_{}_.epoch_{}.loss_{:.2f}.pt".format(
                    args.save_model, args.commit_c, args.commit2_c, curr_epoch,
                    float(valid_loss[0])))
            torch.save(
                optimizer,
                "{}.{}.epoch_{}.loss_{:.2f}.pt".format(args.save_model,
                                                       "optimizer", curr_epoch,
                                                       float(valid_loss[0])))
Exemple #3
0
def classic_train(args, args_dict, args_info):
    """
    Train the model in the ol' fashioned way, just like grandma used to
    Args
        args (argparse.ArgumentParser)
    """
    if args.cuda and torch.cuda.is_available():
        print("Using cuda")
        use_cuda = True
    elif args.cuda and not torch.cuda.is_available():
        print("You do not have CUDA, turning cuda off")
        use_cuda = False
    else:
        use_cuda = False

    #Load the data
    print("\nLoading Vocab")
    print('args.vocab: ', args.vocab)
    vocab, verb_max_idx = du.load_vocab(args.vocab)
    print("Vocab Loaded, Size {}".format(len(vocab.stoi.keys())))
    print(vocab.itos[:40])
    args_dict["vocab"] = len(vocab.stoi.keys())
    vocab2 = du.load_vocab(args.frame_vocab_address, is_Frame=True)
    print(vocab2.itos[:40])
    print("Frames-Vocab Loaded, Size {}".format(len(vocab2.stoi.keys())))
    total_frames = len(vocab2.stoi.keys())
    args.total_frames = total_frames
    args.num_latent_values = args.total_frames
    print('total frames: ', args.total_frames)
    experiment_name = 'SSDVAE_wotemp_{}_eps_{}_num_{}_seed_{}'.format(
        'chain_event', str(args_dict['obsv_prob']), str(args_dict['exp_num']),
        str(args_dict['seed']))

    experiment_name = '{}_eps_{}_num_{}_seed_{}'.format(
        'chain_event', str(args_dict['obsv_prob']), str(args_dict['exp_num']),
        str(args_dict['seed']))

    if args.use_pretrained:
        pretrained = GloVe(name='6B',
                           dim=args.emb_size,
                           unk_init=torch.Tensor.normal_)
        vocab.load_vectors(pretrained)
        print("Vectors Loaded")

    print("Loading Dataset")
    dataset = du.SentenceDataset(path=args.train_data,
                                 path2=args.train_frames,
                                 vocab=vocab,
                                 vocab2=vocab2,
                                 num_clauses=args.num_clauses,
                                 add_eos=False,
                                 is_ref=True,
                                 obsv_prob=args.obsv_prob)

    print("Finished Loading Dataset {} examples".format(len(dataset)))
    batches = BatchIter(dataset,
                        args.batch_size,
                        sort_key=lambda x: len(x.text),
                        train=True,
                        sort_within_batch=True,
                        device=-1)
    data_len = len(dataset)

    if args.load_model:
        print("Loading the Model")
        model = torch.load(args.load_model)
    else:
        print("Creating the Model")
        bidir_mod = 2 if args.bidir else 1
        latents = example_tree(
            args.num_latent_values,
            (bidir_mod * args.enc_hid_size, args.latent_dim),
            frame_max=args.total_frames,
            padding_idx=vocab2.stoi['<pad>'],
            use_cuda=use_cuda,
            nohier_mode=args.nohier)  #assume bidirectional

        hidsize = (args.enc_hid_size, args.dec_hid_size)
        model = SSDVAE(args.emb_size,
                       hidsize,
                       vocab,
                       latents,
                       layers=args.nlayers,
                       use_cuda=use_cuda,
                       pretrained=args.use_pretrained,
                       dropout=args.dropout,
                       frame_max=args.total_frames,
                       latent_dim=args.latent_dim,
                       verb_max_idx=verb_max_idx)

    #create the optimizer
    if args.load_opt:
        print("Loading the optimizer state")
        optimizer = torch.load(args.load_opt)
    else:
        print("Creating the optimizer anew")
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    start_time = time.time()  #start of epoch 1
    curr_epoch = 1
    valid_loss = [0.0]
    min_ppl = 1e10
    print("Loading Validation Dataset.")
    val_dataset = du.SentenceDataset(path=args.valid_data,
                                     path2=args.valid_frames,
                                     vocab=vocab,
                                     vocab2=vocab2,
                                     num_clauses=args.num_clauses,
                                     add_eos=False,
                                     is_ref=True,
                                     obsv_prob=0.0,
                                     print_valid=True)

    print("Finished Loading Validation Dataset {} examples.".format(
        len(val_dataset)))
    val_batches = BatchIter(val_dataset,
                            args.batch_size,
                            sort_key=lambda x: len(x.text),
                            train=False,
                            sort_within_batch=True,
                            device=-1)
    for idx, item in enumerate(val_batches):
        if idx == 0:
            break
        token_rev = [vocab.itos[int(v.numpy())] for v in item.target[0][-1]]
        frame_rev = [vocab2.itos[int(v.numpy())] for v in item.frame[0][-1]]
        ref_frame = [vocab2.itos[int(v.numpy())] for v in item.ref[0][-1]]

        print('token_rev:', token_rev, len(token_rev), "lengths: ",
              item.target[1][-1])
        print('frame_rev:', frame_rev, len(frame_rev), "lengths: ",
              item.frame[1][-1])
        print('ref_frame:', ref_frame, len(ref_frame), "lengths: ",
              item.ref[1][-1])
        print('-' * 50)
    print('Model_named_params:{}'.format(model.named_parameters()))

    for iteration, bl in enumerate(
            batches
    ):  #this will continue on forever (shuffling every epoch) till epochs finished
        batch, batch_lens = bl.text
        f_vals, f_vals_lens = bl.frame
        target, target_lens = bl.target
        f_ref, _ = bl.ref

        if use_cuda:
            batch = Variable(batch.cuda())
            f_vals = Variable(f_vals.cuda())
        else:
            batch = Variable(batch)
            f_vals = Variable(f_vals)

        model.zero_grad()
        latent_values, latent_root, diff, dec_outputs = model(batch,
                                                              batch_lens,
                                                              f_vals=f_vals)

        topics_dict, real_sentence, next_frames_dict, word_to_frame = show_inference(
            model, batch, vocab, vocab2, f_vals, f_ref, args)
        loss, _ = monolithic_compute_loss(iteration,
                                          model,
                                          target,
                                          target_lens,
                                          latent_values,
                                          latent_root,
                                          diff,
                                          dec_outputs,
                                          use_cuda,
                                          args=args,
                                          topics_dict=topics_dict,
                                          real_sentence=real_sentence,
                                          next_frames_dict=next_frames_dict,
                                          word_to_frame=word_to_frame,
                                          train=True,
                                          show=True)

        # backward propagation
        loss.backward()
        # Gradient clipping
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        # Optimize
        optimizer.step()

        # End of an epoch - run validation
        if iteration % 10 == 0:
            print("\nFinished Training Epoch/iteration {}/{}".format(
                curr_epoch, iteration))
            # do validation
            valid_logprobs = 0.0
            valid_lengths = 0.0
            valid_loss = 0.0
            with torch.no_grad():
                for v_iteration, bl in enumerate(val_batches):
                    batch, batch_lens = bl.text
                    f_vals, f_vals_lens = bl.frame
                    target, target_lens = bl.target
                    f_ref, _ = bl.ref
                    batch_lens = batch_lens.cpu()
                    if use_cuda:
                        batch = Variable(batch.cuda())
                        f_vals = Variable(f_vals.cuda())
                    else:
                        batch = Variable(batch)
                        f_vals = Variable(f_vals)
                    latent_values, latent_root, diff, dec_outputs = model(
                        batch, batch_lens, f_vals=f_vals)
                    topics_dict, real_sentence, next_frames_dict, word_to_frame = show_inference(
                        model, batch, vocab, vocab2, f_vals, f_ref, args)
                    loss, ce_loss = monolithic_compute_loss(
                        iteration,
                        model,
                        target,
                        target_lens,
                        latent_values,
                        latent_root,
                        diff,
                        dec_outputs,
                        use_cuda,
                        args=args,
                        topics_dict=topics_dict,
                        real_sentence=real_sentence,
                        next_frames_dict=next_frames_dict,
                        word_to_frame=word_to_frame,
                        train=False,
                        show=False)

                    valid_loss = valid_loss + ce_loss.data.clone()
                    valid_logprobs += ce_loss.data.clone().cpu().numpy(
                    ) * target_lens.sum().cpu().data.numpy()
                    valid_lengths += target_lens.sum().cpu().data.numpy()
                    # print("valid_lengths: ",valid_lengths[0])

            nll = valid_logprobs / valid_lengths
            ppl = np.exp(nll)
            valid_loss = valid_loss / (v_iteration + 1)
            print("**Validation loss {:.2f}.**\n".format(valid_loss[0]))
            print("**Validation NLL {:.2f}.**\n".format(nll))
            print("**Validation PPL {:.2f}.**\n".format(ppl))
            args_dict_wandb = {
                "val_nll": nll,
                "val_ppl": ppl,
                "valid_loss": valid_loss
            }
            if ppl < min_ppl:
                min_ppl = ppl
                args_dict["min_ppl"] = min_ppl
                dir_path = os.path.dirname(os.path.realpath(__file__))
                save_file = "".join([
                    "_" + str(key) + "_" + str(value)
                    for key, value in args_dict.items() if key != "min_ppl"
                ])
                args_to_md(model="chain", args_dict=args_dict)
                model_path = os.path.join(dir_path + "/saved_models/chain_" +
                                          save_file + ".pt")
                torch.save(model, model_path)
                config_path = os.path.join(dir_path + "/saved_configs/chain_" +
                                           save_file + ".pkl")
                with open(config_path, "wb") as f:
                    pickle.dump((args_dict, args_info), f)
            print('\t==> min_ppl {:4.4f} '.format(min_ppl))
Exemple #4
0
def generate(args):
    """
    Use the trained model for decoding
    Args
        args (argparse.ArgumentParser)
    """
    if args.cuda and torch.cuda.is_available():
        device = 0
        use_cuda = True
    elif args.cuda and not torch.cuda.is_available():
        print("You do not have CUDA, turning cuda off")
        device = -1
        use_cuda = False
    else:
        device = -1
        use_cuda = False

    #Load the vocab
    # vocab = du.load_vocab(args.vocab)
    vocab, _ = du.load_vocab(args.vocab)
    vocab2 = du.load_vocab(args.frame_vocab_address, is_Frame=True)

    eos_id = vocab.stoi[EOS_TOK]
    pad_id = vocab.stoi[PAD_TOK]
    if args.ranking:  # default is HARD one, the 'Inverse Narrative Cloze' in the paper
        dataset = du.NarrativeClozeDataset(args.valid_narr,
                                           vocab,
                                           src_seq_length=MAX_EVAL_SEQ_LEN,
                                           min_seq_length=MIN_EVAL_SEQ_LEN,
                                           LM=False)
        print('ranking_dataset: ', len(dataset))
        # Batch size during decoding is set to 1
        batches = BatchIter(dataset,
                            1,
                            sort_key=lambda x: len(x.actual),
                            train=False,
                            device=-1)
    else:
        # dataset = du.SentenceDataset(args.valid_data, vocab, src_seq_length=MAX_EVAL_SEQ_LEN, min_seq_length=MIN_EVAL_SEQ_LEN, add_eos=False) #put in filter pred later
        dataset = du.SentenceDataset(path=args.valid_data,
                                     path2=args.valid_frames,
                                     vocab=vocab,
                                     vocab2=vocab2,
                                     num_clauses=args.num_clauses,
                                     add_eos=False,
                                     is_ref=True,
                                     obsv_prob=0.0,
                                     print_valid=True)
        # Batch size during decoding is set to 1
        batches = BatchIter(dataset,
                            args.batch_size,
                            sort_key=lambda x: len(x.text),
                            train=False,
                            device=-1)

    data_len = len(dataset)

    #Create the model
    with open(args.load, 'rb') as fi:
        if not use_cuda:
            model = torch.load(fi, map_location=lambda storage, loc: storage)
        else:
            model = torch.load(fi, map_location=torch.device('cuda'))

    if not hasattr(model.latent_root, 'nohier'):
        model.latent_root.set_nohier(args.nohier)  #for backwards compatibility

    model.decoder.eval()
    model.set_use_cuda(use_cuda)

    #For reconstruction
    if args.perplexity:
        print('calculating perplexity')
        loss = calc_perplexity(args, model, batches, vocab, data_len)
        NLL = loss
        PPL = np.exp(loss)
        print("Chain-NLL = {}".format(NLL))
        print("Chain-PPL = {}".format(PPL))
        return PPL
    elif args.schema:
        generate_from_seed(args, model, batches, vocab, data_len)
    elif args.ranking:
        ranked_acc = do_ranking(args, model, batches, vocab, data_len,
                                use_cuda)
        return ranked_acc
    else:
        #        sample_outputs(model, vocab)
        reconstruct(args, model, batches, vocab)