예제 #1
0
def main():
    model_name = os.path.basename(os.path.dirname(os.path.realpath(__file__)))
    model = '../../models/{}.npz'.format(model_name)
    dev_datasets = [
        '../../data/word_sequence/premise_snli_1.0_dev.txt',
        '../../data/word_sequence/hypothesis_snli_1.0_dev.txt',
        '../../data/word_sequence/label_snli_1.0_dev.txt'
    ]
    test_datasets = [
        '../../data/word_sequence/premise_snli_1.0_test.txt',
        '../../data/word_sequence/hypothesis_snli_1.0_test.txt',
        '../../data/word_sequence/label_snli_1.0_test.txt'
    ]
    dictionary = '../../data/word_sequence/vocab_cased.pkl'

    # load model model_options
    with open('%s.pkl' % model, 'rb') as f:
        options = pkl.load(f)

    print options
    # load dictionary and invert
    with open(dictionary, 'rb') as f:
        word_dict = pkl.load(f)
    word_idict = dict()
    for kk, vv in word_dict.iteritems():
        word_idict[vv] = kk

    dev = TextIterator(dev_datasets[0],
                       dev_datasets[1],
                       dev_datasets[2],
                       dictionary,
                       n_words=options['n_words'],
                       batch_size=options['valid_batch_size'],
                       shuffle=False)

    test = TextIterator(test_datasets[0],
                        test_datasets[1],
                        test_datasets[2],
                        dictionary,
                        n_words=options['n_words'],
                        batch_size=options['valid_batch_size'],
                        shuffle=False)

    # allocate model parameters
    params = init_params(options, word_dict)

    # load model parameters and set theano shared variables
    params = load_params(model, params)
    tparams = init_tparams(params)

    trng, use_noise, \
        x1, x1_mask, char_x1, char_x1_mask, x2, x2_mask, char_x2, char_x2_mask, y, \
        opt_ret, \
        cost, \
        f_pred, f_prods = \
        build_model(tparams, options)

    use_noise.set_value(0.)
    dev_acc = pred_acc(f_pred, prepare_data, options, dev, word_idict)
    test_acc = pred_acc(f_pred, prepare_data, options, test, word_idict)

    print 'dev accuracy', dev_acc
    print 'test accuracy', test_acc

    predict_labels_dev = pred_label(f_prods, prepare_data, options, dev,
                                    word_idict)
    predict_labels_test = pred_label(f_prods, prepare_data, options, test,
                                     word_idict)

    with open('predict_gold_samples_dev.txt', 'w') as fw:
        with open(dev_datasets[0], 'r') as f1:
            with open(dev_datasets[1], 'r') as f2:
                with open(dev_datasets[-1], 'r') as f3:
                    for a, b, c, d in zip(predict_labels_dev, f3, f1, f2):
                        fw.write(
                            str(a) + '\t' + b.rstrip() + '\t' + c.rstrip() +
                            '\t' + d.rstrip() + '\n')

    with open('predict_gold_samples_test.txt', 'w') as fw:
        with open(test_datasets[0], 'r') as f1:
            with open(test_datasets[1], 'r') as f2:
                with open(test_datasets[-1], 'r') as f3:
                    for a, b, c, d in zip(predict_labels_test, f3, f1, f2):
                        fw.write(
                            str(a) + '\t' + b.rstrip() + '\t' + c.rstrip() +
                            '\t' + d.rstrip() + '\n')

    print 'Done'
예제 #2
0
def main():
    model_name = os.path.basename(os.path.dirname(os.path.realpath(__file__)))
    model = '../../models/{}.npz'.format(model_name)
    valid_datasets   = ['../../data/sequence_and_features/premise_snli_1.0_dev_token.txt', 
                        '../../data/sequence_and_features/hypothesis_snli_1.0_dev_token.txt',
                        '../../data/sequence_and_features/premise_snli_1.0_dev_lemma.txt', 
                        '../../data/sequence_and_features/hypothesis_snli_1.0_dev_lemma.txt',
                        '../../data/sequence_and_features/label_snli_1.0_dev.txt']
    test_datasets    = ['../../data/sequence_and_features/premise_snli_1.0_test_token.txt', 
                        '../../data/sequence_and_features/hypothesis_snli_1.0_test_token.txt',
                        '../../data/sequence_and_features/premise_snli_1.0_test_lemma.txt', 
                        '../../data/sequence_and_features/hypothesis_snli_1.0_test_lemma.txt',
                        '../../data/sequence_and_features/label_snli_1.0_test.txt']
    dictionary       = ['../../data/sequence_and_features/vocab_cased.pkl',
                        '../../data/sequence_and_features/vocab_cased_lemma.pkl']
    # load model model_options
    with open('%s.pkl' % model, 'rb') as f:
        options = pkl.load(f)

    print options
    # load dictionary and invert
    with open(dictionary[0], 'rb') as f:
        word_dict = pkl.load(f)

    print 'Loading knowledge base ...'
    kb_dicts = options['kb_dicts']
    with open(kb_dicts[0], 'rb') as f:
        kb_dict = pkl.load(f)

    n_words = options['n_words']
    valid_batch_size = options['valid_batch_size']

    valid = TextIterator(valid_datasets[0], valid_datasets[1], valid_datasets[2], valid_datasets[3], valid_datasets[4],
                         dictionary[0], dictionary[1],
                         n_words=n_words,
                         batch_size=valid_batch_size,
                         shuffle=False)
    test = TextIterator(test_datasets[0], test_datasets[1], test_datasets[2], test_datasets[3], test_datasets[4],
                         dictionary[0], dictionary[1],
                         n_words=n_words,
                         batch_size=valid_batch_size,
                         shuffle=False)

    # allocate model parameters
    params = init_params(options, word_dict)

    # load model parameters and set theano shared variables
    params = load_params(model, params)
    tparams = init_tparams(params)

    trng, use_noise, \
        x1, x1_mask, x1_kb, x2, x2_mask, x2_kb, kb_att, y, \
        opt_ret, \
        cost, \
        f_pred, \
        f_probs = \
        build_model(tparams, options)

    use_noise.set_value(0.)
    valid_acc = pred_acc(f_pred, prepare_data, options, valid, kb_dict)
    test_acc = pred_acc(f_pred, prepare_data, options, test, kb_dict)

    print 'valid accuracy', valid_acc
    print 'test accuracy', test_acc

    predict_labels_valid = pred_label(f_pred, prepare_data, options, valid, kb_dict)
    predict_labels_test = pred_label(f_pred, prepare_data, options, test, kb_dict)

    with open('predict_gold_samples_valid.txt', 'w') as fw:
        with open(valid_datasets[0], 'r') as f1:
            with open(valid_datasets[1], 'r') as f2:
                with open(valid_datasets[-1], 'r') as f3:
                    for a, b, c, d in zip(predict_labels_valid, f3, f1, f2):
                        fw.write(str(a) + '\t' + b.rstrip() + '\t' + c.rstrip() + '\t' + d.rstrip() + '\n')

    with open('predict_gold_samples_test.txt', 'w') as fw:
        with open(test_datasets[0], 'r') as f1:
            with open(test_datasets[1], 'r') as f2:
                with open(test_datasets[-1], 'r') as f3:
                    for a, b, c, d in zip(predict_labels_test, f3, f1, f2):
                        fw.write(str(a) + '\t' + b.rstrip() + '\t' + c.rstrip() + '\t' + d.rstrip() + '\n')

    print 'Done'
예제 #3
0
파일: gen.py 프로젝트: smith6036/nli
def main():
    model_name = os.path.basename(os.path.dirname(os.path.realpath(__file__)))
    model = "{}.npz".format(model_name)
    datasets = [
        "../../data/word_sequence/premise_snli_1.0_train.txt",
        "../../data/word_sequence/hypothesis_snli_1.0_train.txt",
        "../../data/word_sequence/label_snli_1.0_train.txt",
    ]

    valid_datasets = [
        "../../data/word_sequence/premise_snli_1.0_dev.txt",
        "../../data/word_sequence/hypothesis_snli_1.0_dev.txt",
        "../../data/word_sequence/label_snli_1.0_dev.txt",
    ]

    test_datasets = [
        "../../data/word_sequence/premise_snli_1.0_test.txt",
        "../../data/word_sequence/hypothesis_snli_1.0_test.txt",
        "../../data/word_sequence/label_snli_1.0_test.txt",
    ]
    dictionary = "../../data/word_sequence/vocab_cased.pkl"

    # load model model_options
    with open("%s.pkl" % model, "rb") as f:
        options = pkl.load(f)

    print(options)
    # load dictionary and invert
    with open(dictionary, "rb") as f:
        word_dict = pkl.load(f)

    n_words = options["n_words"]
    valid_batch_size = options["valid_batch_size"]

    valid = TextIterator(
        valid_datasets[0],
        valid_datasets[1],
        valid_datasets[2],
        dictionary,
        n_words=n_words,
        batch_size=valid_batch_size,
        shuffle=False,
    )
    test = TextIterator(
        test_datasets[0],
        test_datasets[1],
        test_datasets[2],
        dictionary,
        n_words=n_words,
        batch_size=valid_batch_size,
        shuffle=False,
    )

    # allocate model parameters
    params = init_params(options, word_dict)

    # load model parameters and set theano shared variables
    params = load_params(model, params)
    tparams = init_tparams(params)

    trng, use_noise, x1, x1_mask, x2, x2_mask, y, opt_ret, cost, f_pred = build_model(
        tparams, options)

    use_noise.set_value(0.0)
    valid_acc = pred_acc(f_pred, prepare_data, options, valid)
    test_acc = pred_acc(f_pred, prepare_data, options, test)

    print("valid accuracy", valid_acc)
    print("test accuracy", test_acc)

    predict_labels_valid = pred_label(f_pred, prepare_data, valid)
    predict_labels_test = pred_label(f_pred, prepare_data, test)

    with open("predict_gold_samples_valid.txt", "w") as fw:
        with open(valid_datasets[0], "r") as f1:
            with open(valid_datasets[1], "r") as f2:
                with open(valid_datasets[-1], "r") as f3:
                    for a, b, c, d in zip(predict_labels_valid, f3, f1, f2):
                        fw.write(
                            str(a) + "\t" + b.rstrip() + "\t" + c.rstrip() +
                            "\t" + d.rstrip() + "\n")

    with open("predict_gold_samples_test.txt", "w") as fw:
        with open(test_datasets[0], "r") as f1:
            with open(test_datasets[1], "r") as f2:
                with open(test_datasets[-1], "r") as f3:
                    for a, b, c, d in zip(predict_labels_test, f3, f1, f2):
                        fw.write(
                            str(a) + "\t" + b.rstrip() + "\t" + c.rstrip() +
                            "\t" + d.rstrip() + "\n")

    print("Done")