Esempio n. 1
0
def test():
    from args import conf
    data = SNLI(conf)
    setattr(conf, 'char_vocab_size', len(data.char_vocab))
    setattr(conf, 'word_vocab_size', len(data.TEXT.vocab))
    setattr(conf, 'class_size', len(data.LABEL.vocab))
    setattr(conf, 'max_word_len', data.max_word_len)

    model = BIMPM(conf, data)
    model.load_state_dict(torch.load('results/baseline.pt'))
    model = model.to(conf.device)

    _, acc = evaluate(model, conf, data)
    print(f'test acc: {acc:.3f}')
Esempio n. 2
0
def test():
    data = SNLI(conf)
    conf.char_vocab_size = len(data.char_vocab)
    conf.word_vocab_size = len(data.TEXT.vocab)
    conf.class_size = len(data.LABEL.vocab)
    conf.max_word_len = data.max_word_len

    model = BIMPM(conf, data)
    model.load_state_dict(torch.load('results/baseline.pt'))
    model.word_emb.weight.requires_grad = True
    model = model.to(conf.device).eval()

    batch = next(iter(data.dev_iter))

    output = F.softmax(model(batch.premise, batch.hypothesis), 1)
    original_scores, original_predictions = torch.max(output, 1)
    original_scores = original_scores.detach().cpu().numpy()
    original_predictions = original_predictions.detach().cpu().numpy()

    reduced, removed_indices = get_rawr(
        model,
        batch,
        max_beam_size=rawr_conf.max_beam_size,
        conf_threshold=rawr_conf.conf_threshold,
        p_not_h=False,
    )

    reduced_hypothesis = padding_tensor(
        [torch.LongTensor(r[0]) for r in reduced])
    reduced_hypothesis = reduced_hypothesis.to(conf.device)
    output = F.softmax(model(batch.premise, batch.hypothesis), 1)
    reduced_scores, reduced_predictions = torch.max(output, 1)
    reduced_scores = reduced_scores.detach().cpu().numpy()
    reduced_predictions = reduced_predictions.detach().cpu().numpy()

    print(all(reduced_predictions == original_predictions))
Esempio n. 3
0
def main():
    from args import conf, tune_conf
    parser = argparse.ArgumentParser()
    parser.add_argument('--baseline', default='results/baseline.pt')
    parser.add_argument(
        '--ent-train',
        default='/scratch0/shifeng/rawr/new_snli/rawr.train.pkl')
    parser.add_argument('--ent-dev',
                        default='/scratch0/shifeng/rawr/new_snli/rawr.dev.pkl')
    args = parser.parse_args()

    out_dir = prepare_output_dir(args, args.root_dir)
    log = logging.getLogger(__name__)
    log.setLevel(logging.DEBUG)
    fh = logging.FileHandler(os.path.join(out_dir, 'output.log'))
    fh.setLevel(logging.DEBUG)
    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(logging.INFO)
    formatter = logging.Formatter(fmt='%(asctime)s %(message)s',
                                  datefmt='%m/%d/%Y %I:%M:%S')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    log.addHandler(fh)
    log.addHandler(ch)
    log.info('===== {} ====='.format(out_dir))
    ''' load regular data '''
    log.info('loading regular training data')
    data = SNLI(conf)
    conf.char_vocab_size = len(data.char_vocab)
    conf.word_vocab_size = len(data.TEXT.vocab)
    conf.class_size = len(data.LABEL.vocab)
    conf.max_word_len = data.max_word_len

    log.info('loading entropy dev data {}'.format(tune_conf.ent_dev))
    with open(tune_conf.ent_dev, 'rb') as f:
        ent_dev = pickle.load(f)
    if isinstance(ent_dev[0], list):
        ent_dev = list(itertools.chain(*ent_dev))
    log.info('{} entropy dev examples'.format(len(ent_dev)))
    ent_dev = [[
        x['data']['premise'], x['data']['hypothesis'], x['data']['label']
    ] for x in ent_dev]

    log.info('loading entropy training data {}'.format(tune_conf.ent_train))
    with open(tune_conf.ent_train, 'rb') as f:
        ent_train = pickle.load(f)
    if isinstance(ent_train[0], list):
        ent_train = list(itertools.chain(*ent_train))
    log.info('{} entropy training examples'.format(len(ent_train)))
    ent_train = [[
        x['data']['premise'], x['data']['hypothesis'], x['data']['label']
    ] for x in ent_train]

    train_ent_batches = batchify(ent_train, tune_conf.batch_size)
    log.info('{} entropy training batches'.format(len(train_ent_batches)))

    log.info('loading model from {}'.format(args.baseline))
    model = BIMPM(conf, data)
    model.load_state_dict(torch.load(args.baseline))
    # model.word_emb.weight.requires_grad = True
    model.cuda(conf.gpu)

    parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
    optimizer = optim.Adam(parameters, lr=tune_conf.lr)
    ent_optimizer = optim.Adam(parameters, lr=tune_conf.ent_lr)
    criterion = nn.CrossEntropyLoss()

    init_loss, init_acc = evaluate(model, data.dev_iter)
    log.info("initial loss {:.4f} accuracy {:.4f}".format(init_loss, init_acc))
    best_acc = init_acc

    dev_ent_batches = batchify(ent_dev, tune_conf.batch_size)
    init_ent, init_ent_acc = evaluate_ent(model, dev_ent_batches)
    log.info("initial entropy {:.4f} ent_acc {:.4f}".format(
        init_ent, init_ent_acc))

    epoch = 0
    i_ent, i_mle = 0, 0  # number of examples
    train_loss, train_ent = 0, 0
    train_mle_iter = iter(data.train_iter)
    train_ent_iter = iter(train_ent_batches)
    while True:
        model.train()
        for i in range(tune_conf.n_ent):
            try:
                prem, hypo, label = next(train_ent_iter)
            except StopIteration:
                random.shuffle(train_ent_batches)
                train_ent_iter = iter(train_ent_batches)
                i_ent = 0
                train_ent = 0
                break
            output = forward(model, prem, hypo, conf.max_sent_len)
            output = F.softmax(output, 1)
            ent = entropy(output).sum()
            train_ent += ent.data.cpu().numpy()[0]
            loss = -tune_conf.gamma * ent
            ent_optimizer.zero_grad()
            loss.backward()
            ent_optimizer.step()
            i_ent += prem.shape[0]

        end_of_epoch = False
        for i in range(tune_conf.n_mle):
            if i_mle >= len(data.train_iter):
                epoch += 1
                end_of_epoch = True
                data.train_iter.init_epoch()
                train_mle_iter = iter(data.train_iter)
                i_mle = 0
                train_loss = 0
                break
            batch = next(train_mle_iter)
            output = forward(model, batch.premise, batch.hypothesis,
                             conf.max_sent_len)
            loss = criterion(output, batch.label)
            train_loss += loss.data.cpu().numpy()[0]
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            i_mle += batch.premise.shape[0]

        if i_mle % 1000 == 0:
            _loss = train_loss / i_mle if i_mle != 0 else 0
            _ent = train_ent / i_ent if i_ent != 0 else 0
            log.info(
                'epoch [{:2}] [{} / {}] loss[{:.5f}] entropy[{:.5f}]'.format(
                    epoch, i_mle, len(data.train_iter), _loss, _ent))

        if end_of_epoch or i_mle % 1e5 == 0:
            dev_loss, dev_acc = evaluate(model, data.dev_iter)
            dev_ent, dev_ent_acc = evaluate_ent(model, dev_ent_batches)
            log.info("dev acc: {:.4f} ent: {:.4f} ent_acc: {:.4f}".format(
                dev_acc, dev_ent, dev_ent_acc))
            model_path = os.path.join(out_dir,
                                      'checkpoint_epoch_{}.pt'.format(epoch))
            torch.save(model.state_dict(), model_path)
            if dev_acc > best_acc:
                best_acc = dev_acc
                model_path = os.path.join(out_dir, 'best_model.pt')
                torch.save(model.state_dict(), model_path)
                log.info("best model saved {}".format(dev_acc))

        if epoch > 40:
            break
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--fold', required=True)
    parser.add_argument('--baseline', default='results/baseline.pt')
    parser.add_argument('--pnoth',
                        default=False,
                        action='store_true',
                        help='reduce premise instead of hypothesis')
    parser.add_argument('--truth',
                        default=False,
                        action='store_true',
                        help='use label instead of prediction as target')
    args = parser.parse_args()

    data = SNLI(conf)
    conf.char_vocab_size = len(data.char_vocab)
    conf.word_vocab_size = len(data.TEXT.vocab)
    conf.class_size = len(data.LABEL.vocab)
    conf.max_word_len = data.max_word_len
    q_vocab = data.TEXT.vocab.itos
    a_vocab = data.LABEL.vocab.itos

    out_dir = prepare_output_dir(conf, 'results', 'rawr')
    print('Generating [{}] rawr data from [{}].'.format(
        args.fold, args.baseline))
    print(out_dir)

    model = BIMPM(conf, data)
    model.load_state_dict(torch.load(args.baseline))
    model.word_emb.weight.requires_grad = True
    model.to(conf.device)

    datasets = {'train': data.train_iter, 'dev': data.dev_iter}

    if args.pnoth:
        fname = 'rawr.{}.premise.pkl'.format(args.fold)
    else:
        fname = 'rawr.{}.hypothesis.pkl'.format(args.fold)

    checkpoint = []
    for batch_i, batch in enumerate(tqdm(datasets[args.fold])):
        if batch_i > len(datasets[args.fold]):
            # otherwise train iter will loop forever!
            break
        batch_size = batch.hypothesis.shape[0]
        model.eval()
        output = F.softmax(model(batch.premise, batch.hypothesis), 1)
        original_scores, original_predictions = torch.max(output, 1)
        original_scores = original_scores.detach().cpu().numpy()
        original_predictions = original_predictions.detach().cpu().numpy()
        batch_cpu = Batch(batch.premise.data.cpu(),
                          batch.hypothesis.data.cpu(), batch.label.data.cpu())

        reduced, removed_indices = get_rawr(
            model,
            batch,
            max_beam_size=rawr_conf.max_beam_size,
            conf_threshold=rawr_conf.conf_threshold,
            p_not_h=args.pnoth)
        for i in range(batch_size):
            og = {
                'premise': batch_cpu.premise[i],
                'hypothesis': batch_cpu.hypothesis[i],
                'premise_readable': to_text(batch_cpu.premise[i], q_vocab),
                'hypothesis_readable': to_text(batch_cpu.hypothesis[i],
                                               q_vocab),
                'prediction': original_predictions[i],
                'prediction_readable': a_vocab[original_predictions[i]],
                'score': original_scores[i],
                'label': batch_cpu.label[i],
                'label_readable': a_vocab[batch_cpu.label[i]]
            }
            checkpoint.append({'original': og, 'reduced': []})
            s1 = batch.hypothesis[i] if args.pnoth else batch.premise[i]
            s1 = s1.to(conf.device)
            for j, s2 in enumerate(reduced[i]):
                s2 = torch.LongTensor(s2).to(conf.device)
                model.eval()
                if args.pnoth:
                    output = model(s2.unsqueeze(0), s1.unsqueeze(0))
                else:
                    output = model(s1.unsqueeze(0), s2.unsqueeze(0))
                output = F.softmax(output, 1)
                pred_scores, pred = torch.max(output, 1)
                pred = pred.detach().cpu().numpy()[0]
                pred_scores = pred_scores.detach().cpu().numpy()[0]
                if args.pnoth:
                    hypo, prem = s1.cpu(), s2.cpu()
                else:
                    prem, hypo = s1.cpu(), s2.cpu()
                checkpoint[-1]['reduced'].append({
                    'premise':
                    prem,
                    'hypothesis':
                    hypo,
                    'premise_readable':
                    to_text(prem, q_vocab),
                    'hypothesis_readable':
                    to_text(hypo, q_vocab),
                    'prediction':
                    pred,
                    'prediction_readable':
                    a_vocab[pred],
                    'score':
                    pred_scores,
                    'label':
                    batch_cpu.label[i],
                    'label_readable':
                    a_vocab[batch_cpu.label[i]],
                    'removed_indices':
                    removed_indices[i][j],
                    'which_reduced':
                    'premise' if args.pnoth else 'hypothesis'
                })
        if batch_i % 1000 == 0 and batch_i > 0:
            out_path = os.path.join(out_dir, '{}.{}'.format(fname, batch_i))
            with open(out_path, 'wb') as f:
                pickle.dump(checkpoint, f)
            checkpoint = []

    if len(checkpoint) > 0:
        out_path = os.path.join(out_dir, '{}.{}'.format(fname, batch_i))
        with open(out_path, 'wb') as f:
            pickle.dump(checkpoint, f)
Esempio n. 5
0
def main():
    from args import conf

    parser = argparse.ArgumentParser()
    parser.add_argument('--train', default='results/rawr.train.hypothesis.pkl')
    parser.add_argument('--dev', default='results/rawr.dev.hypothesis.pkl')
    parser.add_argument('--truth',
                        default=False,
                        action='store_true',
                        help='use label instead of prediction as target')
    parser.add_argument('--ogdev',
                        default=False,
                        action='store_true',
                        help='use original dev set instead of reduced')
    parser.add_argument('--full',
                        default=0,
                        type=float,
                        help='amount of full examples to include')
    args = parser.parse_args()

    conf.train_data = args.train
    conf.dev_data = args.dev

    print('loading regular data...')
    regular_data = SNLI(conf)
    conf.char_vocab_size = len(regular_data.char_vocab)
    conf.word_vocab_size = len(regular_data.TEXT.vocab)
    conf.class_size = len(regular_data.LABEL.vocab)
    conf.max_word_len = regular_data.max_word_len
    conf.out_dir = prepare_output_dir(conf, 'results', 'reduced')

    print('loading reduced data from [{}]'.format(conf.train_data))
    with open(conf.train_data, 'rb') as f:
        train = pickle.load(f)
    print('loading reduced data from [{}]'.format(conf.dev_data))
    with open(conf.dev_data, 'rb') as f:
        dev = pickle.load(f)

    train_label = 'label' if args.truth else 'prediction'
    train = [(x['premise'], x['hypothesis'], ex['original'][train_label])
             for ex in train for x in ex['reduced']]
    # dev = [(x['premise'], x['hypothesis'], x['label'])
    #        for ex in dev for x in ex['reduced']]
    dev = [(x['premise'], x['hypothesis'], x['label']) for ex in dev
           for x in ex['reduced'][:1]]

    train_batches = batchify(train, conf.batch_size)

    if args.full > 0:
        n_examples = int(len(regular_data.train_iter) * args.full)
        print('use {} ({}) full training data'.format(
            n_examples * conf.batch_size, args.full))
        full_batches = []
        for j, x in enumerate(regular_data.train_iter):
            if j > n_examples:
                break
            full_batches.append((x.premise, x.hypothesis, x.label))
        # train_batches += full_batches
        train_batches = full_batches

    print(len(train_batches))

    if args.ogdev:
        dev_batches = list(regular_data.dev_iter)
        dev_batches = [(x.premise, x.hypothesis, x.label) for x in dev_batches]
    else:
        dev_batches = batchify(train, conf.batch_size)

    model = BIMPM(conf, regular_data)
    if conf.gpu > -1:
        model.cuda(conf.gpu)

    print('begin training')
    best_model = train_reduced(model, train_batches, dev_batches, conf)

    torch.save(best_model.state_dict(), os.path.join(conf.out_dir, 'best.pt'))
    print('training finished!')
Esempio n. 6
0
def train(conf, data):
    model = BIMPM(conf, data)
    model = model.to(conf.device)

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(parameters, lr=conf.lr)
    criterion = nn.CrossEntropyLoss()

    model.train()
    loss, last_epoch = 0, -1
    max_dev_acc, max_test_acc = 0, 0

    iterator = data.train_iter
    for i, batch in enumerate(iterator):
        present_epoch = int(iterator.epoch)
        if present_epoch == conf.epoch:
            break
        if present_epoch > last_epoch:
            print('epoch:', present_epoch + 1)
            ckp_dir = 'results/baseline_checkpoints/baseline_epoch_{}.pt'
            torch.save(model.state_dict(), ckp_dir.format(present_epoch + 1))
        last_epoch = present_epoch

        s1, s2 = 'premise', 'hypothesis'
        s1, s2 = getattr(batch, s1), getattr(batch, s2)

        # limit the lengths of input sentences up to max_sent_len
        if conf.max_sent_len >= 0:
            if s1.size()[1] > conf.max_sent_len:
                s1 = s1[:, :conf.max_sent_len]
            if s2.size()[1] > conf.max_sent_len:
                s2 = s2[:, :conf.max_sent_len]

        kwargs = {'p': s1, 'h': s2}

        if conf.use_char_emb:
            char_p = torch.LongTensor(data.characterize(s1))
            char_h = torch.LongTensor(data.characterize(s2))

            char_p = char_p.to(conf.device)
            char_h = char_h.to(conf.device)

            kwargs['char_p'] = char_p
            kwargs['char_h'] = char_h

        # pred = model(**kwargs)
        pred = model(s1, s2)

        optimizer.zero_grad()
        batch_loss = criterion(pred, batch.label)
        loss += batch_loss.data.item()
        batch_loss.backward()
        optimizer.step()

        if (i + 1) % conf.print_freq == 0:
            dev_loss, dev_acc = evaluate(model, conf, data, mode='dev')
            test_loss, test_acc = evaluate(model, conf, data)
            # c = (i + 1) // conf.print_freq
            # writer.add_scalar('loss/train', loss, c)
            # writer.add_scalar('loss/dev', dev_loss, c)
            # writer.add_scalar('acc/dev', dev_acc, c)
            # writer.add_scalar('loss/test', test_loss, c)
            # writer.add_scalar('acc/test', test_acc, c)

            print(f'train loss: {loss:.3f} / \
                    dev loss: {dev_loss:.3f} / \
                    test loss: {test_loss:.3f} /'
                  f'dev acc: {dev_acc:.3f} / \
                    test acc: {test_acc:.3f}')

            if dev_acc > max_dev_acc:
                max_dev_acc = dev_acc
                max_test_acc = test_acc
                best_model = copy.deepcopy(model)

            loss = 0
            model.train()

    # writer.close()
    print(f'max dev acc: {max_dev_acc:.3f} / max test acc: {max_test_acc:.3f}')
    return best_model
            "wb"))

    import json
    json.dump(
        config,
        open(
            "/data/xuht/guoxin/poc/duplicate_sentence_model/duplicate_models/bimpm1/config.json",
            "w"))

    api = ModelAPI(
        "/data/xuht/guoxin/poc/duplicate_sentence_model/duplicate_models/bimpm1",
        "/data/xuht/guoxin/poc/duplicate_sentence_model/duplicate_models/bimpm1"
    )

    api.load_config()
    model = BIMPM()
    api.build_graph(model)

    api.train_step([
        train_anchor_matrix, train_check_matrix, train_label_matrix,
        train_anchor_len_matrix, train_check_len_matrix
    ], [
        dev_anchor_matrix, dev_check_matrix, dev_label_matrix,
        dev_anchor_len_matrix, dev_check_len_matrix
    ])

elif model_type == "siamese_cnn":
    os.environ["CUDA_VISIBLE_DEVICES"] = "3"
    config = {
        "vocab_size": vocab_size,
        "max_length": 200,