def main(options): logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) connection = db.connect(options['<dbname>']) standard = Knowledge.load_from_csv(options['<gold_standard>']) logging.info("Loaded %d samples from gold standard", len(standard)) k = int(options['--k']) success = total = 0 confusion_matrix = [[[], []], [[], []]] logging.info("Splitting into %d subsamples", k) for subsample in range(k): logging.debug("Subsample = %d", subsample) train_data = Knowledge() test_data = [] test_labels = [] for i, (e, s) in enumerate(standard.items()): if i % k == subsample: test_data.append(e) test_labels.append(int(s)) else: train_data[e] = s extractor = FactExtractorFactory(config, train_data) prediction = extractor.predict(test_data) assert len(prediction) == len(test_data) total += len(prediction) success += sum(1 for (p, e) in zip(prediction, test_labels) if p == e) for i, (p, e) in enumerate(zip(prediction, test_labels)): confusion_matrix[p][e].append(test_data[i]) logging.info("%d values evaluated;", total) logging.info("%d accurate predictions (%d negative, %d positive)", success, len(confusion_matrix[0][0]), len(confusion_matrix[1][1])) logging.info( "%d inaccurate predictions (%d actual positive, %d actual negative)", total - success, len(confusion_matrix[0][1]), len(confusion_matrix[1][0])) for e in confusion_matrix[0][1][:3]: logging.info("Predicted negative, actually positive: %s", e) for e in confusion_matrix[1][0][:3]: logging.info("Predicted positive, actually negative: %s", e) try: precision = len(confusion_matrix[1][1]) / len(confusion_matrix[1][0] + confusion_matrix[1][1]) except ZeroDivisionError: precision = None try: recall = len(confusion_matrix[1][1]) / len(confusion_matrix[0][1] + confusion_matrix[1][1]) except ZeroDivisionError: recall = None accuracy = success / total return accuracy, precision, recall
""" IEPY's result evaluator w.r.t. a reference corpus. Usage: eval.py <dbname> <proposed_csv> <reference_csv> eval.py -h | --help | --version Options: -h --help Show this screen --version Version number """ from docopt import docopt from iepy.data.db import connect from iepy.data.knowledge import Knowledge from iepy.utils import evaluate if __name__ == '__main__': opts = docopt(__doc__, version=0.1) connector = connect(opts['<dbname>']) proposed_csv = opts['<proposed_csv>'] reference_csv = opts['<reference_csv>'] proposed = Knowledge.load_from_csv(proposed_csv) reference = Knowledge.load_from_csv(reference_csv) result = evaluate(proposed, reference) print("Precision: %.2f" % result['precision']) print("Recall: %.2f" % result['recall'])
Usage: generate_seeds.py <dbname> <relation_name> <kind_a> <kind_b> <output_filename> generate_seeds.py -h | --help | --version Options: -h --help Show this screen --version Version number """ from docopt import docopt from iepy.data.db import connect from iepy.data.knowledge import Knowledge from iepy.extraction.terminal import human_oracle from iepy.utils import save_facts_to_csv if __name__ == u'__main__': opts = docopt(__doc__, version=0.1) connect(opts[u'<dbname>']) relation_name = opts[u'<relation_name>'] kind_a = opts[u'<kind_a>'] kind_b = opts[u'<kind_b>'] output_filename = opts[u'<output_filename>'] kn = Knowledge() kn.extend_from_oracle(kind_a, kind_b, relation_name, human_oracle) facts = set([ev.fact for (ev, value) in kn.items() if value == 1]) save_facts_to_csv(sorted(facts), output_filename)
Options: -h --help Show this screen --version Version number --with-score Shows colored scores --with-line-number Shows each item numbered sequentially """ from docopt import docopt from colorama import Back, Style from iepy.data import db from iepy.data.knowledge import Knowledge if __name__ == '__main__': opts = docopt(__doc__, version=0.1) connection = db.connect(opts['<dbname>']) csv_file = opts['<csv_file>'] evidence = Knowledge.load_from_csv(csv_file) for nr, (e, score) in enumerate(evidence.items()): fact = e.colored_fact() fact_line = [] if opts['--with-line-number']: fact_line.append(str(nr + 1)) if opts['--with-score']: if score == 0: score_color = Back.YELLOW elif score == 1: score_color = Back.MAGENTA else: score_color = Back.CYAN