def main(options): logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) connection = db.connect(options['<dbname>']) standard = Knowledge.load_from_csv(options['<gold_standard>']) logging.info("Loaded %d samples from gold standard", len(standard)) k = int(options['--k']) success = total = 0 confusion_matrix = [[[], []], [[], []]] logging.info("Splitting into %d subsamples", k) for subsample in range(k): logging.debug("Subsample = %d", subsample) train_data = Knowledge() test_data = [] test_labels = [] for i, (e, s) in enumerate(standard.items()): if i % k == subsample: test_data.append(e) test_labels.append(int(s)) else: train_data[e] = s extractor = FactExtractorFactory(config, train_data) prediction = extractor.predict(test_data) assert len(prediction) == len(test_data) total += len(prediction) success += sum(1 for (p, e) in zip(prediction, test_labels) if p == e) for i, (p, e) in enumerate(zip(prediction, test_labels)): confusion_matrix[p][e].append(test_data[i]) logging.info("%d values evaluated;", total) logging.info("%d accurate predictions (%d negative, %d positive)", success, len(confusion_matrix[0][0]), len(confusion_matrix[1][1])) logging.info( "%d inaccurate predictions (%d actual positive, %d actual negative)", total - success, len(confusion_matrix[0][1]), len(confusion_matrix[1][0])) for e in confusion_matrix[0][1][:3]: logging.info("Predicted negative, actually positive: %s", e) for e in confusion_matrix[1][0][:3]: logging.info("Predicted positive, actually negative: %s", e) try: precision = len(confusion_matrix[1][1]) / len(confusion_matrix[1][0] + confusion_matrix[1][1]) except ZeroDivisionError: precision = None try: recall = len(confusion_matrix[1][1]) / len(confusion_matrix[0][1] + confusion_matrix[1][1]) except ZeroDivisionError: recall = None accuracy = success / total return accuracy, precision, recall
Options: -h --help Show this screen --version Version number """ from docopt import docopt import logging from iepy.core import BootstrappedIEPipeline from iepy import db from iepy.human_validation import TerminalInterviewer from iepy.knowledge import Knowledge from iepy.utils import load_facts_from_csv if __name__ == u'__main__': opts = docopt(__doc__, version=0.1) connection = db.connect(opts[u'<dbname>']) seed_facts = load_facts_from_csv(opts[u'<seeds_file>']) output_file = opts[u'<output_file>'] gold_standard_file = opts[u'--gold'] if gold_standard_file: gold_standard = Knowledge.load_from_csv(gold_standard_file) else: gold_standard = None p = BootstrappedIEPipeline(connection, seed_facts, gold_standard) logging.basicConfig( level=logging.DEBUG, format=u"%(asctime)s - %(name)s - %(levelname)s - %(message)s") STOP = u'STOP'
""" IEPY's result evaluator w.r.t. a reference corpus. Usage: eval.py <dbname> <proposed_csv> <reference_csv> eval.py -h | --help | --version Options: -h --help Show this screen --version Version number """ from docopt import docopt from iepy.db import connect from iepy.utils import load_evidence_from_csv, evaluate if __name__ == '__main__': opts = docopt(__doc__, version=0.1) connector = connect(opts['<dbname>']) proposed_csv = opts['<proposed_csv>'] reference_csv = opts['<reference_csv>'] proposed = load_evidence_from_csv(proposed_csv, connector) reference = load_evidence_from_csv(reference_csv, connector) result = evaluate(proposed, reference) print("Precision: %.2f" % result['precision']) print("Recall: %.2f" % result['recall'])
IEPY's seed generation utility. Usage: generate_seeds.py <dbname> <relation_name> <kind_a> <kind_b> <output_filename> generate_seeds.py -h | --help | --version Options: -h --help Show this screen --version Version number """ from docopt import docopt from iepy.db import connect from iepy.human_validation import human_oracle from iepy.knowledge import Knowledge from iepy.utils import save_facts_to_csv if __name__ == u'__main__': opts = docopt(__doc__, version=0.1) connect(opts[u'<dbname>']) relation_name = opts[u'<relation_name>'] kind_a = opts[u'<kind_a>'] kind_b = opts[u'<kind_b>'] output_filename = opts[u'<output_filename>'] kn = Knowledge() kn.extend_from_oracle(kind_a, kind_b, relation_name, human_oracle) facts = set([ev.fact for (ev, value) in kn.items() if value == 1]) save_facts_to_csv(sorted(facts), output_filename)
def setUpClass(cls): disconnect() connect(cls.mongodb_name) cls.manager = cls.ManagerClass()