Beispiel #1
0
def train(args, batch, vocab, model, save_dir):
    # args: from main
    # batch: a Batch object
    # vocab: a Vocab object
    # model: loaded from main
    # epoch: current epoch
    # save_dir: which directory you'll be saving

    # set loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    step = 99999
    for epoch in range(args.startfrom, args.num_epochs):
        print(
            "=========================== Epoch %d ==========================="
            % (epoch + args.startfrom))

        batch.next_epoch(args.batch_size)  # initialize batch data
        batch.initialize_states(args.num_layers, args.hidden_size)
        total_steps = step
        step = 0
        while (batch.epoch_end == 0):
            step += 1
            # update the minibatch inputs / outputs
            model.zero_grad()
            # get next minibatch
            batch.get_minibatch(0)
            # get inputs and targets from batch object
            inputs_np = np.array(
                [vocab.word_list_to_idx_list(line) for line in batch.batch_in],
                dtype=int)
            targets_np = np.array([
                vocab.word_list_to_idx_list(line) for line in batch.batch_out
            ],
                                  dtype=int)
            inputs = Variable(torch.LongTensor(inputs_np)).cuda()
            targets = Variable(torch.LongTensor(targets_np)).cuda()

            # run model to get outputs
            outputs, states = model(inputs, batch.states)
            batch.states = detach(states)
            targets, outputs = pack_padding(targets, outputs)
            loss = criterion(outputs, targets.view(-1))
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 0.5)
            optimizer.step()

            batch.next_minibatch()
            if step % 10 == 0:
                print(
                    'Epoch [%d/%d], Loss: %.3f, Steps: [%d/%d], Perplexity: %5.2f'
                    % (epoch + args.startfrom, args.num_epochs, loss.data[0],
                       step, total_steps, np.exp(loss.data[0])))
        # save model at end of each epoch
        torch.save(f=os.path.join(save_dir, 'saved_model_%d_epochs.pckl') %
                   (epoch + args.startfrom),
                   obj=model)
Beispiel #2
0
def test(args, batch, vocab, model, save_dir):

    # set loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    total_files = len(batch.full_list)

    print("Total number of files to read: %d" % total_files)
    batch.next_epoch(args.batch_size)  # initialize batch data
    batch.initialize_states(args.num_layers, args.hidden_size)
    overall_total = 0
    overall_correct = 0
    while (1):
        # update the minibatch inputs / outputs
        # get next minibatch
        total = 0
        correct = 0
        batch.get_minibatch(0)
        batch.next_minibatch()

        if batch.epoch_end == 1:
            break

        # make sure all lengths of batch_in and batch_out are same
        inputs_np = np.array([
            vocab.word_list_to_idx_list(line)
            for i, line in enumerate(batch.batch_in)
        ],
                             dtype=int)
        targets_np = np.array(
            [vocab.word_list_to_idx_list(line) for line in batch.batch_out],
            dtype=int)

        # whether to use teacher forcing
        teacher_forcing = False

        inputs = Variable(torch.LongTensor(inputs_np)).cuda()
        targets = Variable(torch.LongTensor(targets_np)).cuda()

        outputs = model(inputs, targets, teacher_forcing)
        targets, outputs = pack_padding(targets, outputs)
        loss = criterion(outputs, targets.view(-1))

        out = outputs.max(1)[1].unsqueeze(1)

        total_unk = (targets == vocab.w2i['<UNK>']).data.cpu().numpy().sum()
        correct_unk = ((targets == vocab.w2i['<UNK>']) *
                       (targets == out)).data.cpu().numpy().sum()
        total += targets.size(0) - total_unk
        correct += (targets == out).data.cpu().numpy().sum() - correct_unk
        print("%d/%d, accuracy: %1.3f, perplexity: %1.3f" \
              %(correct,total,correct/total,np.exp(loss.data[0])))

        overall_total += total
        overall_correct += correct
    # after reading all files, calculate scores
    print("Total: %d/%d, accuracy: %1.3f" %
          (overall_correct, overall_total, overall_correct / overall_total))
Beispiel #3
0
def test(args, batch, vocab, model, save_dir):
    # args: from main
    # batch: a Batch object
    # vocab: a Vocab object
    # model: loaded from main
    # epoch: current epoch
    # save_dir: which directory you'll be saving

    # set loss function and optimizer
    print("=========================== Testing ===========================")
    initial_files = len(batch.full_list)
    overall_correct = 0
    overall_total = 0
    overall_correct_tok = 0
    overall_correct_id = 0
    overall_total_tok = 0
    overall_total_id = 0

    batch.next_epoch(args.batch_size)  # initialize batch data
    batch.initialize_states(args.num_layers, args.hidden_size)
    while (batch.epoch_end == 0):
        # get next minibatch
        batch.get_minibatch(0)
        # get inputs and targets from batch object
        inputs_np = np.array(
            [vocab.word_list_to_idx_list(line) for line in batch.batch_in],
            dtype=int)
        targets_np = np.array(
            [vocab.word_list_to_idx_list(line) for line in batch.batch_out],
            dtype=int)
        inputs = Variable(torch.LongTensor(inputs_np)).cuda()
        targets = Variable(torch.LongTensor(targets_np)).cuda()

        # run model to get outputs
        outputs, states = model(inputs, batch.states)
        input_line = vocab.idx_list_to_word_list(inputs_np[0])
        target_line = vocab.idx_list_to_word_list(targets_np[0])
        output_line = vocab.idx_list_to_word_list(
            outputs[0].max(1)[1].cpu().data.numpy())
        for tup in zip(input_line, target_line, output_line):
            print(' '.join(list(tup)))

        batch.states = detach(states)
        targets, outputs = pack_padding(targets, outputs)
        outputs = outputs.view(targets.size(0), -1).max(1)[1]
        t = targets.squeeze().data.cpu().numpy()
        o = outputs.data.cpu().numpy()
        correct = np.array(t == o, dtype=int)
        correct_tok = np.array(np.multiply(correct, (t < 86)), dtype=int)
        total_tok = np.array(t < 86, dtype=int)
        correct_id = np.array(np.multiply(correct, (t >= 1086)), dtype=int)
        total_id = np.array(t >= 1086, dtype=int)
        # for tup in zip(list(correct_tok),list(total_tok),list(correct_id),list(total_id)):
        #     a,b,c,d = tup
        #     tup = (a,b,c,d)
        #     print(' '.join([str(x) for x in list(tup)]))
        total = targets.size(0)

        overall_correct += correct.sum()
        overall_correct_tok += correct_tok.sum()
        overall_correct_id += correct_id.sum()
        overall_total += total
        overall_total_tok += total_tok.sum()
        overall_total_id += total_id.sum()
        # print(targets.cpu().data==outputs.cpu().data)
        print('Overall: %d/%d, %1.3f' %
              (correct.sum(), total, correct.sum() / total * 100.0))
        print('Tokens : %d/%d, %1.3f' %
              (correct_tok.sum(), total_tok.sum(),
               (correct_tok.sum() / total_tok.sum() * 100.0)))
        print('IDs    : %d/%d, %1.3f' %
              (correct_id.sum(), total_id.sum(),
               (correct_id.sum() / total_id.sum() * 100.0)))
        print('\n\n')
        batch.next_minibatch()
        print("%d/%d files read" %
              (initial_files - len(batch.file_list), initial_files))
    print("Final..........\n")
    print('Overall: %d/%d, %1.3f' %
          (overall_correct, overall_total,
           (overall_correct / overall_total * 100.0)))
    print('Tokens : %d/%d, %1.3f' %
          (overall_correct_tok, overall_total_tok,
           (overall_correct_tok / overall_total_tok * 100.0)))
    print('IDs    : %d/%d, %1.3f' %
          (overall_correct_id, overall_total_id,
           (overall_correct_id / overall_total_id * 100.0)))
Beispiel #4
0
def train(args, batch, vocab, model, save_dir):

    # set loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    step = 99999
    total_files = len(batch.full_list)

    print("Total number of files to read: %d" % total_files)
    for epoch in range(args.startfrom, args.num_epochs):
        print("===================== Epoch %d =====================" %
              (epoch + args.startfrom))

        batch.next_epoch(args.batch_size)  # initialize batch data
        batch.initialize_states(args.num_layers, args.hidden_size)
        total_steps = step
        step = 0
        while (1):
            step += 1
            # update the minibatch inputs / outputs
            model.zero_grad()
            # get next minibatch
            batch.get_minibatch(0)
            batch.next_minibatch()

            if batch.epoch_end == 1:
                break

            # make sure all lengths of batch_in and batch_out are same
            inputs_np = np.array([
                vocab.word_list_to_idx_list(line)
                for i, line in enumerate(batch.batch_in)
            ],
                                 dtype=int)
            targets_np = np.array([
                vocab.word_list_to_idx_list(line) for line in batch.batch_out
            ],
                                  dtype=int)

            # whether to use teacher forcing
            if np.random.random_sample(size=1)[0] < (epoch * 1.0 /
                                                     args.num_epochs):
                teacher_force = True
            else:
                teacher_force = False

            inputs = Variable(torch.LongTensor(inputs_np)).cuda()
            targets = Variable(torch.LongTensor(targets_np)).cuda()

            outputs = model(inputs, targets)

            targets, outputs = pack_padding(targets, outputs)
            loss = criterion(outputs, targets.view(-1))
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 0.5)
            optimizer.step()

            print(
                'Epoch [%d/%d], Files: [%d/%d],  Loss: %.3f, Steps: [%d/%d],Perplexity: %5.2f'
                % (epoch + args.startfrom, args.num_epochs,
                   total_files - len(batch.file_list), total_files,
                   loss.data[0], step, total_steps, np.exp(loss.data[0])))
            if step % 100 == 0:
                torch.save(
                    f=os.path.join(save_dir, 'saved_model_%d_epochs.pckl') %
                    (epoch + args.startfrom),
                    obj=model)
        # save model at end of each epoch
        torch.save(f=os.path.join(save_dir, 'saved_model_%d_epochs.pckl') %
                   (epoch + args.startfrom),
                   obj=model)
Beispiel #5
0
def test(args, batch, vocab, model, save_dir):

    # split model into encoder and decoder
    encoder, decoder = model

    # set loss function and optimizer
    criterion = nn.NLLLoss()
    step = 99999
    total_files = len(batch.full_list)
    print("Total number of files to read: %d" % total_files)
    for epoch in range(1):
        batch.next_epoch(args.batch_size)  # initialize batch data
        batch.initialize_states(args.num_layers, args.hidden_size)
        total_steps = step
        step = 0
        while (batch.epoch_end == 0):
            step += 1
            total = 0
            correct = 0
            # get next minibatch
            batch.get_minibatch(0)
            batch.next_minibatch()

            if batch.epoch_end == 1:
                break

            # get inputs and targets from batch object
            for i in range(len(batch.batch_in)):
                in_line = batch.batch_in[i]
                out_line = batch.batch_out[i]
                oov2idx, idx2oov = vocab.create_oov_list(
                    in_line + out_line, batch.max_oovs)
                batch.oov2idx_list.append(oov2idx)
                batch.idx2oov_list.append(idx2oov)

            # make sure all lengths of batch_in and batch_out are same
            inputs_oov_np = np.array([
                vocab.word_list_to_idx_list(line, batch.oov2idx_list[i])
                for i, line in enumerate(batch.batch_in)
            ],
                                     dtype=int)
            targets_oov_np = np.array([
                vocab.word_list_to_idx_list(line, batch.oov2idx_list[i])
                for i, line in enumerate(batch.batch_out)
            ],
                                      dtype=int)
            inputs_unk_np = np.array([
                vocab.word_list_to_idx_list(line)
                for i, line in enumerate(batch.batch_in)
            ],
                                     dtype=int)
            targets_unk_np = np.array([
                vocab.word_list_to_idx_list(line)
                for i, line in enumerate(batch.batch_out)
            ],
                                      dtype=int)

            teacher_force = False

            inputs = Variable(torch.LongTensor(inputs_unk_np)).cuda()
            targets = Variable(torch.LongTensor(targets_unk_np)).cuda()

            # run model to get outputs
            encoded, _ = encoder(inputs)

            decoder_in, s, w = decoder_initial(inputs.size(0),
                                               vocab.w2i['<SOS>'])
            #             decoder_in = targets[:,0]

            for j in range(targets.size(1)):
                if j == 0:
                    outputs, s, w = decoder(input_idx=decoder_in,
                                            encoded=encoded,
                                            encoded_idx=inputs_oov_np,
                                            prev_state=s,
                                            weighted=w,
                                            order=j)
                else:
                    tmp_out, s, w = decoder(input_idx=decoder_in,
                                            encoded=encoded,
                                            encoded_idx=inputs_oov_np,
                                            prev_state=s,
                                            weighted=w,
                                            order=j)
                    outputs = torch.cat([outputs, tmp_out], dim=1)

                if teacher_force == True:
                    decoder_in = targets[:, j]  # train with ground truth
                else:
                    decoder_in = outputs[:, -1, :].max(1)[1].squeeze()

            targets = Variable(torch.LongTensor(targets_oov_np)).cuda()
            out = outputs.max(2)[1]  # purely for printing purposes
            # print("======================= Inputs  =======================")
            # print(batch.batch_in[0])
            # print('\n')
            # print(' '.join([str(x) for x in inputs_oov_np[0]]))
            # print(' '.join(vocab.idx_list_to_word_list(inputs_oov_np[0],
            #                                            batch.idx2oov_list[0])))
            # print("======================= Targets =======================")
            # print(batch.idx2oov_list[0])
            # print(batch.oov2idx_list[0])
            # print(batch.batch_out[0])
            # print('\n')
            # print(' '.join([str(x) for x in targets_oov_np[0]]))
            # print(' '.join(vocab.idx_list_to_word_list(targets_oov_np[0],
            #                                            batch.idx2oov_list[0])))
            # print("======================= Predict =======================")
            # print(' '.join([str(x) for x in out[0].data.cpu().numpy()]))
            # print(' '.join(vocab.idx_list_to_word_list(out[0].data.cpu().numpy(),
            #                                            batch.idx2oov_list[0])))
            # print('\n')

            targets, outputs = pack_padding(targets, outputs)
            loss = criterion(torch.log(outputs), targets.view(-1))

            out = outputs.max(1)[1].unsqueeze(1)
            total_unk = (
                targets == vocab.w2i['<UNK>']).data.cpu().numpy().sum()
            correct_unk = ((targets == vocab.w2i['<UNK>']) *
                           (targets == out)).data.cpu().numpy().sum()
            total += targets.size(0) - total_unk
            correct += (targets == out).data.cpu().numpy().sum() - correct_unk

            print(
                'Epoch [%d/%d], Files: [%d/%d],  Accuracy: %.3f, Perplexity: %5.2f'
                % (epoch + args.startfrom, args.num_epochs,
                   total_files - len(batch.file_list), total_files,
                   correct / total, np.exp(loss.data[0])))
Beispiel #6
0
def train(args, batch, vocab, model, save_dir):

    # split model into encoder and decoder
    encoder, decoder = model

    # set loss function and optimizer
    criterion = nn.NLLLoss()
    opt_e = torch.optim.Adam(encoder.parameters(), lr=args.lr)
    opt_d = torch.optim.Adam(decoder.parameters(), lr=args.lr)

    step = 99999
    # batch.full_list = batch.full_list[-1*args.batch_size:]
    total_files = len(batch.full_list)
    print("Total number of files to read: %d" % total_files)
    for epoch in range(args.startfrom, args.num_epochs):
        print("===================== Epoch %d =====================" % (epoch))
        batch.seq_length = args.seq_length + epoch * 3 - 30
        random.shuffle(batch.full_list)
        batch.next_epoch(args.batch_size)  # initialize batch data
        batch.initialize_states(args.num_layers, args.hidden_size)
        total_steps = step
        step = 0
        while (batch.epoch_end == 0):
            step += 1

            # print(' '.join([str(len(x)) for x in batch.batch_data]))
            # update the minibatch inputs / outputs
            encoder.zero_grad()
            decoder.zero_grad()
            # get next minibatch
            batch.get_minibatch(0)

            batch.next_minibatch()

            if batch.epoch_end == 1:
                break

            # get inputs and targets from batch object
            batch.oov2idx_list = [dict() for i in range(len(batch.batch_in))]
            batch.idx2oov_list = [dict() for i in range(len(batch.batch_in))]

            for i in range(len(batch.batch_in)):
                in_line = batch.batch_in[i]
                out_line = batch.batch_out[i]
                oov2idx, idx2oov = vocab.create_oov_list(
                    in_line + out_line, batch.max_oovs)
                batch.oov2idx_list[i] = oov2idx
                batch.idx2oov_list[i] = idx2oov

            # make sure all lengths of batch_in and batch_out are same
            inputs_oov_np = np.array([
                vocab.word_list_to_idx_list(line, batch.oov2idx_list[i])
                for i, line in enumerate(batch.batch_in)
            ],
                                     dtype=int)
            targets_oov_np = np.array([
                vocab.word_list_to_idx_list(line, batch.oov2idx_list[i])
                for line in batch.batch_out
            ],
                                      dtype=int)
            inputs_unk_np = np.array([
                vocab.word_list_to_idx_list(line)
                for i, line in enumerate(batch.batch_in)
            ],
                                     dtype=int)
            targets_unk_np = np.array([
                vocab.word_list_to_idx_list(line) for line in batch.batch_out
            ],
                                      dtype=int)

            # whether to use teacher forcing
            if np.random.random_sample(size=1)[0] < (epoch * 1.0 /
                                                     args.num_epochs):
                teacher_force = True
            else:
                teacher_force = False

            inputs = Variable(torch.LongTensor(inputs_unk_np)).cuda()
            targets = Variable(torch.LongTensor(targets_unk_np)).cuda()

            # run model to get outputs
            encoded, _ = encoder(inputs)

            decoder_in, s, w = decoder_initial(inputs.size(0),
                                               vocab.w2i['<SOS>'])
            #             decoder_in = targets[:,0]
            for j in range(targets.size(1)):
                if j == 0:
                    outputs, s, w = decoder(input_idx=decoder_in,
                                            encoded=encoded,
                                            encoded_idx=inputs_oov_np,
                                            prev_state=s,
                                            weighted=w,
                                            order=j)
                else:
                    tmp_out, s, w = decoder(input_idx=decoder_in,
                                            encoded=encoded,
                                            encoded_idx=inputs_oov_np,
                                            prev_state=s,
                                            weighted=w,
                                            order=j)
                    outputs = torch.cat([outputs, tmp_out], dim=1)

                # if teacher_force:
                #     decoder_in = out[:,-1,:].max(1)[1].squeeze()
                # else:
                decoder_in = targets[:, j]  # train with ground truth

            targets = Variable(torch.LongTensor(targets_oov_np)).cuda()
            targets, outputs = pack_padding(targets, outputs)

            loss = criterion(torch.log(outputs), targets.view(-1))

            loss.backward()

            torch.nn.utils.clip_grad_norm(encoder.parameters(), 0.5)
            torch.nn.utils.clip_grad_norm(decoder.parameters(), 0.5)

            opt_e.step()
            opt_d.step()

            print(
                'Epoch [%d/%d], Files: [%d/%d],  Loss: %.3f, Steps: [%d/%d], Perplexity: %5.2f'
                % (epoch, args.num_epochs, total_files - len(batch.file_list),
                   total_files, loss.data[0], step, total_steps,
                   np.exp(loss.data[0])))
            # save intermediate model
            if step % 100 == 0:
                print("Saving model at %d steps..." % step)
                torch.save(
                    f=os.path.join(save_dir, 'saved_encoder_%d_epochs.pckl') %
                    (epoch),
                    obj=encoder)
                torch.save(
                    f=os.path.join(save_dir, 'saved_decoder_%d_epochs.pckl') %
                    (epoch),
                    obj=decoder)
                print("Model saved.\n")
        # save model at end of each epoch
        torch.save(f=os.path.join(save_dir, 'saved_encoder_%d_epochs.pckl') %
                   (epoch),
                   obj=encoder)
        torch.save(f=os.path.join(save_dir, 'saved_decoder_%d_epochs.pckl') %
                   (epoch),
                   obj=decoder)