Beispiel #1
0
def gen_from_seed(model, vocab, eos_id, pad_id, sos_id, tup_id):

    if args.emb_type:  # GEN FROM SEED WITH ROLE EMB
        print("GEN SEED WITH ROLE EMB")
        vocab2 = du.load_vocab(args.vocab2)
        # will use this to feed in role ids in beam decode
        ROLES = [
            vocab2.stoi[TUP_TOK], vocab2.stoi[VERB], vocab2.stoi[SUB],
            vocab2.stoi[OBJ], vocab2.stoi[PREP]
        ]
        dataset = du.LMRoleSentenceDataset(
            args.data,
            vocab,
            args.role_data,
            vocab2,
            src_seq_length=MAX_EVAL_SEQ_LEN,
            min_seq_length=MIN_EVAL_SEQ_LEN)  #put in filter pred later
        dataset = du.LMRoleSentenceDataset(args.data, vocab, args.role_data,
                                           vocab2)  #put in filter pred later
        batches = BatchIter(dataset,
                            args.batch_size,
                            sort_key=lambda x: len(x.text),
                            train=False,
                            device=device)

        for iteration, bl in enumerate(batches):

            if (iteration + 1) % 25 == 0:
                print("iteration {}".format(iteration + 1))

            ## DATA STEPS
            batch, batch_lens = bl.text
            target, target_lens = bl.target
            role, role_lens = bl.role

            if use_cuda:
                batch = Variable(batch.cuda(), volatile=True)
                role = Variable(role.cuda(), volatile=True)
            else:
                batch = Variable(batch, volatile=True)
                role = Variable(role, volatile=True)

            ## INIT AND DECODE
            hidden = model.init_hidden(args.batch_size)
            #run the model first on t-1 events, except last word. we know corresponding role ids as well.
            seq_len = batch.size(1)
            for i in range(seq_len - 1):
                inp = batch[:, i]
                inp = inp.unsqueeze(args.batch_size)
                typ = role[:, i]
                typ = typ.unsqueeze(1)
                _, hidden = model(inp, hidden, typ)

            #print("seq len {}, decode after {} steps".format(seq_len, i+1))
            # beam set current state to last word in the sequence
            beam_inp = batch[:, i + 1]
            # do not need this anymore as assuming last sequence role obj is prep.
            #role_inp = role[:, i+1]
            #           print("ROLES LIST: {}".format(ROLES))
            #           print("FIRST ID: {}".format(role[:, i+1]))

            # init beam initializes the beam with the last sequence element. ROLE is a list of roe type ids.
            outputs = beam_decode(model,
                                  beam_inp,
                                  hidden,
                                  args.max_len_decode,
                                  args.beam_size,
                                  pad_id,
                                  sos_id,
                                  eos_id,
                                  tup_idx=tup_id,
                                  init_beam=True,
                                  roles=ROLES)
            predicted_events = get_pred_events(outputs, vocab)

            print("CONTEXT: {}".format(
                transform(batch.data.squeeze(), vocab.itos)))
            print("PRED_t: {}".format(
                predicted_events))  # n_best stitched together.

            if (iteration + 1) == args.max_decode:
                print("Max decode reached. Exiting.")
                break

    else:
        print("GEN SEED WITHOUT ROLE EMB")
        dataset = du.LMSentenceDataset(
            args.data,
            vocab,
            src_seq_length=MAX_EVAL_SEQ_LEN,
            min_seq_length=MIN_EVAL_SEQ_LEN)  #put in filter pred later
        batches = BatchIter(dataset,
                            args.batch_size,
                            sort_key=lambda x: len(x.text),
                            train=False,
                            device=device)
        for iteration, bl in enumerate(batches):

            if (iteration + 1) % 25 == 0:
                print("iteration {}".format(iteration + 1))

            ## DATA STEPS
            batch, batch_lens = bl.text
            target, target_lens = bl.target

            if use_cuda:
                batch = Variable(batch.cuda(), volatile=True)
            else:
                batch = Variable(batch, volatile=True)

            ## INIT AND DECODE
            hidden = model.init_hidden(args.batch_size)

            #run the model first on t-1 events, except last word
            seq_len = batch.size(1)
            for i in range(seq_len - 1):
                inp = batch[:, i]
                inp = inp.unsqueeze(args.batch_size)
                _, hidden = model(inp, hidden)

            #print("seq len {}, decode after {} steps".format(seq_len, i+1))
            # beam set current state to last word in the sequence
            beam_inp = batch[:, i + 1]

            # init beam initializesthe beam with the last sequence element
            outputs = beam_decode(model,
                                  beam_inp,
                                  hidden,
                                  args.max_len_decode,
                                  args.beam_size,
                                  pad_id,
                                  sos_id,
                                  eos_id,
                                  tup_idx=tup_id,
                                  init_beam=True)
            predicted_events = get_pred_events(outputs, vocab)

            print("CONTEXT: {}".format(
                transform(batch.data.squeeze(), vocab.itos)))
            print("PRED_t: {}".format(
                predicted_events))  # n_best stitched together.

            if (iteration + 1) == args.max_decode:
                print("Max decode reached. Exiting.")
                break
Beispiel #2
0
def get_perplexity(model, vocab):
    total_loss = 0.0
    if args.emb_type:  # GET PERPLEXITY WITH ROLE EMB
        print("PERPLEXITY WITH ROLE EMB")
        vocab2 = du.load_vocab(args.vocab2)
        dataset = du.LMRoleSentenceDataset(
            args.data,
            vocab,
            args.role_data,
            vocab2,
            src_seq_length=MAX_EVAL_SEQ_LEN,
            min_seq_length=MIN_EVAL_SEQ_LEN)  #put in filter pred later
        batches = BatchIter(dataset,
                            args.batch_size,
                            sort_key=lambda x: len(x.text),
                            train=False,
                            device=device)

        print("DATASET {}".format(len(dataset)))
        for iteration, bl in enumerate(batches):

            if (iteration + 1) % 25 == 0:
                print("iteration {}".format(iteration + 1))

            ## DATA STEPS
            batch, batch_lens = bl.text
            target, target_lens = bl.target
            role, role_lens = bl.role

            if use_cuda:
                batch = Variable(batch.cuda(), volatile=True)
                target = Variable(target.cuda(), volatile=True)
                role = Variable(role.cuda(), volatile=True)
            else:
                batch = Variable(batch, volatile=True)
                target = Variable(target, volatile=True)
                role = Variable(role, volatile=True)

            ## INIT AND DECODE
            hidden = model.init_hidden(args.batch_size)
            ce_loss = calc_perplexity(args, model, batch, vocab, target,
                                      target_lens, hidden, role)
            #print("Loss {}".format(ce_loss))
            total_loss = total_loss + ce_loss.data[0]

            if (iteration + 1) == args.max_decode:
                print("Max decode reached. Exiting.")
                break

        # after iterating over all examples
        loss = total_loss / (iteration + 1)
        print("Average Loss: {}".format(loss))
        return loss

    else:
        print("PERPLEXITY WITHOUT ROLE EMB")
        dataset = du.LMSentenceDataset(
            args.data,
            vocab,
            src_seq_length=MAX_EVAL_SEQ_LEN,
            min_seq_length=MIN_EVAL_SEQ_LEN)  #put in filter pred later
        batches = BatchIter(dataset,
                            args.batch_size,
                            sort_key=lambda x: len(x.text),
                            train=False,
                            device=device)
        for iteration, bl in enumerate(batches):

            if (iteration + 1) % 25 == 0:
                print("iteration {}".format(iteration + 1))

            ## DATA STEPS
            batch, batch_lens = bl.text
            target, target_lens = bl.target

            if use_cuda:
                batch = Variable(batch.cuda(), volatile=True)
                target = Variable(target, volatile=True)
            else:
                batch = Variable(batch, volatile=True)
                target = Variable(target, volatile=True)

            ## INIT AND DECODE
            hidden = model.init_hidden(args.batch_size)
            ce_loss = calc_perplexity(args, model, batch, vocab, target,
                                      target_lens, hidden)
            #print("Loss {}".format(ce_loss))
            total_loss = total_loss + ce_loss.data[0]

            if (iteration + 1) == args.max_decode:
                print("Max decode reached. Exiting.")
                break

        # after iterating over all examples
        loss = total_loss / (iteration + 1)
        print("Average Loss: {}".format(loss))
        return loss
Beispiel #3
0
def do_training(use_cuda=True):

    # Using our data utils to load data
    vocab = du.load_vocab(args.vocab)
    nvocab = len(vocab.stoi.keys())
    print("*Vocab Loaded, Size {}".format(len(vocab.stoi.keys())))

    if args.pretrained:
        print("using pretrained vectors.")
        pretrained = GloVe(name='6B',
                           dim=args.emsize,
                           unk_init=torch.Tensor.normal_)
        vocab.load_vectors(pretrained)
        print("Vectors Loaded")

    if args.emb_type:
        vocab2 = du.load_vocab(args.vocab2)
        nvocab2 = len(vocab2.stoi.keys())
        print("*Vocab2 Loaded, Size {}".format(len(vocab2.stoi.keys())))

        dataset = du.LMRoleSentenceDataset(args.train_data, vocab,
                                           args.train_type_data, vocab2)
        print("*Train Dataset Loaded {} examples".format(len(dataset)))

        # Build the model: word emb + type emb
        model = LSTMLM(args.emsize,
                       args.nhidden,
                       args.nlayers,
                       nvocab,
                       pretrained=args.pretrained,
                       vocab=vocab,
                       type_emb=args.emb_type,
                       ninput2=args.em2size,
                       nvocab2=nvocab2,
                       dropout=args.dropout,
                       use_cuda=use_cuda)
        print("Building word+type emb model.")

    else:

        dataset = du.LMSentenceDataset(args.train_data, vocab)
        print("*Train Dataset Loaded {} examples".format(len(dataset)))

        # Build the model: word emb
        model = LSTMLM(args.emsize,
                       args.nhidden,
                       args.nlayers,
                       nvocab,
                       pretrained=args.pretrained,
                       vocab=vocab,
                       dropout=args.dropout,
                       use_cuda=use_cuda)
        print("Building word emb model.")

    data_len = len(dataset)
    batches = BatchIter(dataset,
                        args.batch_size,
                        sort_key=lambda x: len(x.text),
                        train=True,
                        sort_within_batch=True,
                        device=-1)

    ## some checks
    tally_parameters(model)

    if use_cuda:
        model = model.cuda()

    lr = args.lr

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    val_loss = [0.0]

    # DO TRAINING

    total_loss = 0.0
    lapse = 1
    faulty = False
    for iteration, bl in enumerate(batches):

        # batch is [batch_size, seq_len]
        batch, batch_lens = bl.text
        if args.emb_type:
            role, role_lens = bl.role

        target, target_lens = bl.target

        # init the hidden state before every batch
        hidden = model.init_hidden(batch.size(0))  #args.batch_size)

        # batch has SOS prepended to it.
        # target has EOS appended to it.
        if use_cuda:
            batch = Variable(batch.cuda())
            target = Variable(target.cuda())
            if args.emb_type:
                role = Variable(role.cuda())
        else:
            batch = Variable(batch)
            target = Variable(target)
            if args.emb_type:
                role = Variable(role)

        # Repackaging is not needed.

        # zero the gradients
        model.zero_grad()
        # run the model
        logits = []
        for i in range(batch.size(1)):
            inp = batch[:, i]
            inp = inp.unsqueeze(1)
            if args.emb_type:
                # handle OOI exception by breaking out of the inner loop and moving to the next.
                try:
                    typ = role[:, i]
                    typ = typ.unsqueeze(1)
                    logit, hidden = model(inp, hidden, typ)
                except Exception as e:
                    print("ALERT!! word and type batch error. {}".format(e))
                    faulty = True
                    break
            else:
                # keep updating the hidden state accordingly
                logit, hidden = model(inp, hidden)

            logits += [logit]

        # if this batch was faulty; continue to the next iteration
        if faulty:
            faulty = False
            continue

        # logits is [batch_size, seq_len, vocab_size]
        logits = torch.stack(logits, dim=1)
        if use_cuda:
            loss = masked_cross_entropy(logits, target,
                                        Variable(target_lens.cuda()))
        else:
            loss = masked_cross_entropy(logits, target, Variable(target_lens))

        loss.backward()

        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)

        # optimize
        optimizer.step()

        # aggregate the stats
        total_loss = total_loss + loss.data.clone()
        lapse += 1

        # print based on log interval
        if (iteration + 1) % args.log_interval == 0:
            print("| iteration {} | loss {:5.2f}".format(
                iteration + 1, loss.data[0]))

        # forcing buffers to write
        sys.stdout.flush()

        # saving only after specified iterations
        if (iteration + 1) % args.save_after == 0:
            # summarize every save after num iterations losses
            avg_loss = total_loss / lapse
            print("||| iteration {} | average loss {:5.2f}".format(
                iteration + 1,
                avg_loss.cpu().numpy()[0]))
            # reset values
            total_loss = 0.0
            lapse = 1

            #torch.save(model, "{}_.epoch_{}.iteration_{}.loss_{:.2f}.pt".format(args.save, curr_epoch, iteration+1, val_loss[0]))
            torch.save(model,
                       "{}_.iteration_{}.pt".format(args.save, iteration + 1))
            torch.save(
                optimizer,
                "{}.{}.iteration_{}.pt".format(args.save, "optimizer",
                                               iteration + 1))
            print(
                "model and optimizer saved for iteration {}".format(iteration +
                                                                    1))