Exemple #1
0
def main(options):
    logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
    connection = db.connect(options['<dbname>'])
    standard = Knowledge.load_from_csv(options['<gold_standard>'])
    logging.info("Loaded %d samples from gold standard", len(standard))
    k = int(options['--k'])

    success = total = 0
    confusion_matrix = [[[], []], [[], []]]
    logging.info("Splitting into %d subsamples", k)
    for subsample in range(k):
        logging.debug("Subsample = %d", subsample)
        train_data = Knowledge()
        test_data = []
        test_labels = []
        for i, (e, s) in enumerate(standard.items()):
            if i % k == subsample:
                test_data.append(e)
                test_labels.append(int(s))
            else:
                train_data[e] = s
        extractor = FactExtractorFactory(config, train_data)
        prediction = extractor.predict(test_data)
        assert len(prediction) == len(test_data)
        total += len(prediction)
        success += sum(1 for (p, e) in zip(prediction, test_labels) if p == e)
        for i, (p, e) in enumerate(zip(prediction, test_labels)):
            confusion_matrix[p][e].append(test_data[i])
    logging.info("%d values evaluated;", total)
    logging.info("%d accurate predictions (%d negative, %d positive)", success,
                 len(confusion_matrix[0][0]), len(confusion_matrix[1][1]))
    logging.info(
        "%d inaccurate predictions (%d actual positive, %d actual negative)",
        total - success, len(confusion_matrix[0][1]),
        len(confusion_matrix[1][0]))
    for e in confusion_matrix[0][1][:3]:
        logging.info("Predicted negative, actually positive: %s", e)
    for e in confusion_matrix[1][0][:3]:
        logging.info("Predicted positive, actually negative: %s", e)

    try:
        precision = len(confusion_matrix[1][1]) / len(confusion_matrix[1][0] +
                                                      confusion_matrix[1][1])
    except ZeroDivisionError:
        precision = None
    try:
        recall = len(confusion_matrix[1][1]) / len(confusion_matrix[0][1] +
                                                   confusion_matrix[1][1])
    except ZeroDivisionError:
        recall = None
    accuracy = success / total
    return accuracy, precision, recall
Exemple #2
0
"""
IEPY's result evaluator w.r.t. a reference corpus.

Usage:
    eval.py <dbname> <proposed_csv> <reference_csv>
    eval.py -h | --help | --version

Options:
  -h --help             Show this screen
  --version             Version number
"""
from docopt import docopt

from iepy.data.db import connect
from iepy.data.knowledge import Knowledge
from iepy.utils import evaluate


if __name__ == '__main__':
    opts = docopt(__doc__, version=0.1)
    connector = connect(opts['<dbname>'])
    proposed_csv = opts['<proposed_csv>']
    reference_csv = opts['<reference_csv>']

    proposed = Knowledge.load_from_csv(proposed_csv)
    reference = Knowledge.load_from_csv(reference_csv)
    result = evaluate(proposed, reference)

    print("Precision: %.2f" % result['precision'])
    print("Recall: %.2f" % result['recall'])
Exemple #3
0
Usage:
    generate_seeds.py <dbname> <relation_name> <kind_a> <kind_b> <output_filename>
    generate_seeds.py -h | --help | --version

Options:
  -h --help             Show this screen
  --version             Version number
"""
from docopt import docopt

from iepy.data.db import connect
from iepy.data.knowledge import Knowledge
from iepy.extraction.terminal import human_oracle
from iepy.utils import save_facts_to_csv


if __name__ == u'__main__':
    opts = docopt(__doc__, version=0.1)
    connect(opts[u'<dbname>'])

    relation_name = opts[u'<relation_name>']
    kind_a = opts[u'<kind_a>']
    kind_b = opts[u'<kind_b>']
    output_filename = opts[u'<output_filename>']

    kn = Knowledge()
    kn.extend_from_oracle(kind_a, kind_b, relation_name, human_oracle)
    facts = set([ev.fact for (ev, value) in kn.items() if value == 1])
    save_facts_to_csv(sorted(facts), output_filename)
Exemple #4
0
Options:
  -h --help             Show this screen
  --version             Version number
  --with-score         Shows colored scores
  --with-line-number    Shows each item numbered sequentially
"""
from docopt import docopt

from colorama import Back, Style

from iepy.data import db
from iepy.data.knowledge import Knowledge

if __name__ == '__main__':
    opts = docopt(__doc__, version=0.1)
    connection = db.connect(opts['<dbname>'])
    csv_file = opts['<csv_file>']
    evidence = Knowledge.load_from_csv(csv_file)

    for nr, (e, score) in enumerate(evidence.items()):
        fact = e.colored_fact()
        fact_line = []
        if opts['--with-line-number']:
            fact_line.append(str(nr + 1))
        if opts['--with-score']:
            if score == 0:
                score_color = Back.YELLOW
            elif score == 1:
                score_color = Back.MAGENTA
            else:
                score_color = Back.CYAN