Esempio n. 1
0
    num_common_tokens = len(tokenset2.intersection(tokenset1))
    proportion1 = num_common_tokens / len(tokenset1)
    proportion2 = num_common_tokens / len(tokenset2)

    return (proportion1, proportion2)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('train', help='XML file with training data')
    parser.add_argument('test', help='XML file with test data')
    parser.add_argument('output', help='Output tagged XML file')
    args = parser.parse_args()

    # extract features and labels
    train_pairs = read_xml(args.train, need_labels=True)
    features = extract_features(train_pairs)
    entailment_target = np.array([pair.entailment for pair in train_pairs])
    similarity_target = np.array([pair.similarity for pair in train_pairs])

    # train models
    classifier = LogisticRegression(class_weight='balanced')
    classifier.fit(features, entailment_target)
    regressor = LinearRegression()
    regressor.fit(features, similarity_target)

    # run models
    test_pairs = read_xml(args.test, need_labels=False)
    features = extract_features(test_pairs)
    predicted_entailment = classifier.predict(features)
    predicted_similarity = regressor.predict(features)
Esempio n. 2
0
    '''
    # check if there is an entailment value
    if pairs_sys[0].similarity is None:
        print()
        print('No similarity output to evaluate')
        return
    
    gold_values = np.array([p.similarity for p in pairs_gold])
    sys_values = np.array([p.similarity for p in pairs_sys])
    pearson = pearsonr(gold_values, sys_values)[0]
    absolute_diff = gold_values - sys_values
    mse = (absolute_diff ** 2).mean()
    
    print()
    print('Similarity evaluation')
    print('Pearson\t\tMean Squared Error')
    print('-------\t\t------------------')
    print('{:7.2f}\t\t{:18.2f}'.format(pearson, mse))

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('gold_file', help='Gold file')
    parser.add_argument('system_file', help='File produced by a system')
    args = parser.parse_args()
    
    pairs_gold = read_xml(args.gold_file, True)
    pairs_sys = read_xml(args.system_file, True)
    
    eval_rte(pairs_gold, pairs_sys)
    eval_similarity(pairs_gold, pairs_sys)
Esempio n. 3
0
    sys_values = np.array([p.similarity for p in pairs_sys])
    pearson = pearsonr(gold_values, sys_values)[0]
    absolute_diff = gold_values - sys_values
    mse = (absolute_diff**2).mean()

    print()
    print('Similarity evaluation')
    print('Pearson\t\tMean Squared Error')
    print('-------\t\t------------------')
    print('{:7.3f}\t\t{:18.2f}'.format(pearson, mse))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('gold_file', help='Gold file')
    parser.add_argument('system_file', help='File produced by a system')
    parser.add_argument(
        '-f',
        help='Force evaluation when some values are not set. Missing '
        'entailment labels are considered wrong and missing '
        'similarity values are replaced by 0',
        dest='force',
        action='store_true')
    args = parser.parse_args()

    pairs_gold = read_xml(args.gold_file, True, force=args.force)
    pairs_sys = read_xml(args.system_file, True, force=args.force)

    eval_rte(pairs_gold, pairs_sys)
    eval_similarity(pairs_gold, pairs_sys)
Esempio n. 4
0
    num_common_tokens = len(tokenset2.intersection(tokenset1))
    proportion1 = num_common_tokens / len(tokenset1)
    proportion2 = num_common_tokens / len(tokenset2)

    return (proportion1, proportion2)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('train', help='XML file with training data')
    parser.add_argument('test', help='XML file with test data')
    parser.add_argument('output', help='Output tagged XML file')
    args = parser.parse_args()

    # extract features and labels
    train_pairs = read_xml(args.train, need_labels=True)
    features = extract_features(train_pairs)
    entailment_target = np.array([pair.entailment for pair in train_pairs])
    similarity_target = np.array([pair.similarity for pair in train_pairs])

    # train models
    classifier = LogisticRegression(class_weight='balanced')
    classifier.fit(features, entailment_target)
    regressor = LinearRegression()
    regressor.fit(features, similarity_target)

    # run models
    test_pairs = read_xml(args.test, need_labels=False)
    features = extract_features(test_pairs)
    predicted_entailment = classifier.predict(features)
    predicted_similarity = regressor.predict(features)