예제 #1
0
def test():
    from args import conf
    data = SNLI(conf)
    setattr(conf, 'char_vocab_size', len(data.char_vocab))
    setattr(conf, 'word_vocab_size', len(data.TEXT.vocab))
    setattr(conf, 'class_size', len(data.LABEL.vocab))
    setattr(conf, 'max_word_len', data.max_word_len)

    model = BIMPM(conf, data)
    model.load_state_dict(torch.load('results/baseline.pt'))
    model = model.to(conf.device)

    _, acc = evaluate(model, conf, data)
    print(f'test acc: {acc:.3f}')
예제 #2
0
파일: rawr.py 프로젝트: ihsgnef/pathologies
def test():
    data = SNLI(conf)
    conf.char_vocab_size = len(data.char_vocab)
    conf.word_vocab_size = len(data.TEXT.vocab)
    conf.class_size = len(data.LABEL.vocab)
    conf.max_word_len = data.max_word_len

    model = BIMPM(conf, data)
    model.load_state_dict(torch.load('results/baseline.pt'))
    model.word_emb.weight.requires_grad = True
    model = model.to(conf.device).eval()

    batch = next(iter(data.dev_iter))

    output = F.softmax(model(batch.premise, batch.hypothesis), 1)
    original_scores, original_predictions = torch.max(output, 1)
    original_scores = original_scores.detach().cpu().numpy()
    original_predictions = original_predictions.detach().cpu().numpy()

    reduced, removed_indices = get_rawr(
        model,
        batch,
        max_beam_size=rawr_conf.max_beam_size,
        conf_threshold=rawr_conf.conf_threshold,
        p_not_h=False,
    )

    reduced_hypothesis = padding_tensor(
        [torch.LongTensor(r[0]) for r in reduced])
    reduced_hypothesis = reduced_hypothesis.to(conf.device)
    output = F.softmax(model(batch.premise, batch.hypothesis), 1)
    reduced_scores, reduced_predictions = torch.max(output, 1)
    reduced_scores = reduced_scores.detach().cpu().numpy()
    reduced_predictions = reduced_predictions.detach().cpu().numpy()

    print(all(reduced_predictions == original_predictions))
예제 #3
0
파일: rawr.py 프로젝트: ihsgnef/pathologies
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--fold', required=True)
    parser.add_argument('--baseline', default='results/baseline.pt')
    parser.add_argument('--pnoth',
                        default=False,
                        action='store_true',
                        help='reduce premise instead of hypothesis')
    parser.add_argument('--truth',
                        default=False,
                        action='store_true',
                        help='use label instead of prediction as target')
    args = parser.parse_args()

    data = SNLI(conf)
    conf.char_vocab_size = len(data.char_vocab)
    conf.word_vocab_size = len(data.TEXT.vocab)
    conf.class_size = len(data.LABEL.vocab)
    conf.max_word_len = data.max_word_len
    q_vocab = data.TEXT.vocab.itos
    a_vocab = data.LABEL.vocab.itos

    out_dir = prepare_output_dir(conf, 'results', 'rawr')
    print('Generating [{}] rawr data from [{}].'.format(
        args.fold, args.baseline))
    print(out_dir)

    model = BIMPM(conf, data)
    model.load_state_dict(torch.load(args.baseline))
    model.word_emb.weight.requires_grad = True
    model.to(conf.device)

    datasets = {'train': data.train_iter, 'dev': data.dev_iter}

    if args.pnoth:
        fname = 'rawr.{}.premise.pkl'.format(args.fold)
    else:
        fname = 'rawr.{}.hypothesis.pkl'.format(args.fold)

    checkpoint = []
    for batch_i, batch in enumerate(tqdm(datasets[args.fold])):
        if batch_i > len(datasets[args.fold]):
            # otherwise train iter will loop forever!
            break
        batch_size = batch.hypothesis.shape[0]
        model.eval()
        output = F.softmax(model(batch.premise, batch.hypothesis), 1)
        original_scores, original_predictions = torch.max(output, 1)
        original_scores = original_scores.detach().cpu().numpy()
        original_predictions = original_predictions.detach().cpu().numpy()
        batch_cpu = Batch(batch.premise.data.cpu(),
                          batch.hypothesis.data.cpu(), batch.label.data.cpu())

        reduced, removed_indices = get_rawr(
            model,
            batch,
            max_beam_size=rawr_conf.max_beam_size,
            conf_threshold=rawr_conf.conf_threshold,
            p_not_h=args.pnoth)
        for i in range(batch_size):
            og = {
                'premise': batch_cpu.premise[i],
                'hypothesis': batch_cpu.hypothesis[i],
                'premise_readable': to_text(batch_cpu.premise[i], q_vocab),
                'hypothesis_readable': to_text(batch_cpu.hypothesis[i],
                                               q_vocab),
                'prediction': original_predictions[i],
                'prediction_readable': a_vocab[original_predictions[i]],
                'score': original_scores[i],
                'label': batch_cpu.label[i],
                'label_readable': a_vocab[batch_cpu.label[i]]
            }
            checkpoint.append({'original': og, 'reduced': []})
            s1 = batch.hypothesis[i] if args.pnoth else batch.premise[i]
            s1 = s1.to(conf.device)
            for j, s2 in enumerate(reduced[i]):
                s2 = torch.LongTensor(s2).to(conf.device)
                model.eval()
                if args.pnoth:
                    output = model(s2.unsqueeze(0), s1.unsqueeze(0))
                else:
                    output = model(s1.unsqueeze(0), s2.unsqueeze(0))
                output = F.softmax(output, 1)
                pred_scores, pred = torch.max(output, 1)
                pred = pred.detach().cpu().numpy()[0]
                pred_scores = pred_scores.detach().cpu().numpy()[0]
                if args.pnoth:
                    hypo, prem = s1.cpu(), s2.cpu()
                else:
                    prem, hypo = s1.cpu(), s2.cpu()
                checkpoint[-1]['reduced'].append({
                    'premise':
                    prem,
                    'hypothesis':
                    hypo,
                    'premise_readable':
                    to_text(prem, q_vocab),
                    'hypothesis_readable':
                    to_text(hypo, q_vocab),
                    'prediction':
                    pred,
                    'prediction_readable':
                    a_vocab[pred],
                    'score':
                    pred_scores,
                    'label':
                    batch_cpu.label[i],
                    'label_readable':
                    a_vocab[batch_cpu.label[i]],
                    'removed_indices':
                    removed_indices[i][j],
                    'which_reduced':
                    'premise' if args.pnoth else 'hypothesis'
                })
        if batch_i % 1000 == 0 and batch_i > 0:
            out_path = os.path.join(out_dir, '{}.{}'.format(fname, batch_i))
            with open(out_path, 'wb') as f:
                pickle.dump(checkpoint, f)
            checkpoint = []

    if len(checkpoint) > 0:
        out_path = os.path.join(out_dir, '{}.{}'.format(fname, batch_i))
        with open(out_path, 'wb') as f:
            pickle.dump(checkpoint, f)
예제 #4
0
def train(conf, data):
    model = BIMPM(conf, data)
    model = model.to(conf.device)

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(parameters, lr=conf.lr)
    criterion = nn.CrossEntropyLoss()

    model.train()
    loss, last_epoch = 0, -1
    max_dev_acc, max_test_acc = 0, 0

    iterator = data.train_iter
    for i, batch in enumerate(iterator):
        present_epoch = int(iterator.epoch)
        if present_epoch == conf.epoch:
            break
        if present_epoch > last_epoch:
            print('epoch:', present_epoch + 1)
            ckp_dir = 'results/baseline_checkpoints/baseline_epoch_{}.pt'
            torch.save(model.state_dict(), ckp_dir.format(present_epoch + 1))
        last_epoch = present_epoch

        s1, s2 = 'premise', 'hypothesis'
        s1, s2 = getattr(batch, s1), getattr(batch, s2)

        # limit the lengths of input sentences up to max_sent_len
        if conf.max_sent_len >= 0:
            if s1.size()[1] > conf.max_sent_len:
                s1 = s1[:, :conf.max_sent_len]
            if s2.size()[1] > conf.max_sent_len:
                s2 = s2[:, :conf.max_sent_len]

        kwargs = {'p': s1, 'h': s2}

        if conf.use_char_emb:
            char_p = torch.LongTensor(data.characterize(s1))
            char_h = torch.LongTensor(data.characterize(s2))

            char_p = char_p.to(conf.device)
            char_h = char_h.to(conf.device)

            kwargs['char_p'] = char_p
            kwargs['char_h'] = char_h

        # pred = model(**kwargs)
        pred = model(s1, s2)

        optimizer.zero_grad()
        batch_loss = criterion(pred, batch.label)
        loss += batch_loss.data.item()
        batch_loss.backward()
        optimizer.step()

        if (i + 1) % conf.print_freq == 0:
            dev_loss, dev_acc = evaluate(model, conf, data, mode='dev')
            test_loss, test_acc = evaluate(model, conf, data)
            # c = (i + 1) // conf.print_freq
            # writer.add_scalar('loss/train', loss, c)
            # writer.add_scalar('loss/dev', dev_loss, c)
            # writer.add_scalar('acc/dev', dev_acc, c)
            # writer.add_scalar('loss/test', test_loss, c)
            # writer.add_scalar('acc/test', test_acc, c)

            print(f'train loss: {loss:.3f} / \
                    dev loss: {dev_loss:.3f} / \
                    test loss: {test_loss:.3f} /'
                  f'dev acc: {dev_acc:.3f} / \
                    test acc: {test_acc:.3f}')

            if dev_acc > max_dev_acc:
                max_dev_acc = dev_acc
                max_test_acc = test_acc
                best_model = copy.deepcopy(model)

            loss = 0
            model.train()

    # writer.close()
    print(f'max dev acc: {max_dev_acc:.3f} / max test acc: {max_test_acc:.3f}')
    return best_model