def __init__(self, conf_file='configurations.json'): self.annotators = OrderedDict() self.datasets = OrderedDict() self.experiments = OrderedDict() with open(conf_file, 'r') as inputfile: conf = json.load(inputfile) # Load annotators for annotator_conf in conf["annotators"]: nickname = annotator_conf["alias"] module_name = annotator_conf["name"] annotator = create_annotator(str(module_name)) annotator.set_configuration(annotator_conf["configuration"]) self.annotators[nickname] = annotator # Load datasets for dataset_conf in conf["datasets"]: self.datasets[dataset_conf["name"]] = \ Dataset.load_tsv(dataset_conf["file"]) # Load experiments for exp_conf in conf["experiments"]: self.experiments[exp_conf["name"]] = \ create_experiment(str(exp_conf["name"]), exp_conf)
def reshape_dataset(annotator, src_file_name, output): """ Reshape dataset once and for all """ print "Reading dataset from %s" % src_file_name needs_reshape = True if src_file_name.endswith(".pkl"): dataset = Dataset.load(src_file_name) needs_reshape = False else: dataset = AIDADataset.read(src_file_name) print "Dataset loaded: %s" % dataset if needs_reshape: print "Reshaping dataset using %s" % annotator annotator.reshape(dataset) if output.endswith(".pkl"): Dataset.save(dataset, output) elif output.endswith(".xml"): Dataset.save_xml(dataset, output) else: Dataset.save_tsv(dataset, output) print "Dataset successfully saved in %s" % output
micro_msg = "[micro P: %.3f R: %.3f F1: %.3f]" % ( self.metrics.precision(), self.metrics.recall(), self.metrics.f1() ) if self.metrics.has_macro(): macro_msg = " [macro P: %.3f R: %.3f F1: %.3f]" % ( self.metrics.macro_precision(), self.metrics.macro_recall(), self.metrics.macro_f1() ) else: macro_msg = "" self.log.info("%s %s%s" % (count_msg, micro_msg, macro_msg)) if __name__ == "__main__": import sys from wikibench.dataset import Dataset from wikibench.utils import create_annotator, create_benchmark benchmark_name, annotator_name, dataset = sys.argv[1:4] dataset = Dataset.load(dataset) annotator = create_annotator(annotator_name) benchmark = create_benchmark(benchmark_name) benchmark.parse_arguments(sys.argv[4:]) benchmark.run(dataset, annotator) benchmark.summary()
def __init__(self): parser = OptionParser() parser.add_option("-c", "--conf", dest="configuration", help="Configuration file in json format", metavar="FILE", default="configurations.json") parser.add_option("-s", "--strong", dest="strong_match", default=False, action="store_true", help="Use strong mention match") parser.add_option("-b", "--best", dest="best", default=None, help="Optimize metrics looking for best threshold") parser.add_option("--threshold", dest="threshold", default=0.0, type="float", help="Threshold instances") parser.add_option("--optimize", dest="optimize", default="macro_f1", help="Target attribute to optimize") parser.add_option("--tablefmt", dest="tablefmt", default="simple", help="Format for the tables") parser.add_option("--recap", dest="recap", default=False, action="store_true", help="Report metrics for each document") parser.add_option("--detailed", dest="detailed", default=False, action="store_true", help="Print a detailed report for every document") (options, args) = parser.parse_args() self.print_recap = options.recap self.print_report = options.detailed self.use_strong_match = options.strong_match self.best = options.best self.threshold = options.threshold self.optimize = options.optimize self.tablefmt = options.tablefmt self.conf = conf = Configurations(options.configuration) if self.threshold > 0.0 and self.best is None: print "You did specify a threshold without best." self.threshold = 0.0 for experiment_name, experiment in conf.experiments.items(): rows = [[ u"Dataset", u"Annotator", u"Total", u"TP", u"TN", u"FP", u"FN", u"μP", u"μR", u"μF1", u"P", u"R", u"F1", ]] if self.best or self.threshold: rows[-1].insert(2, "Threshold") rows[-1].insert(2, "Attr") for dataset_name, dataset in conf.datasets.items(): for annotator_name, annotator in conf.annotators.items(): result_directory = os.path.join( experiment.file, dataset_name, annotator_name ) results = Dataset.load_results(result_directory) t, m = self.report_for(experiment, results, dataset.instances) rows.append([ dataset_name, annotator_name, m.tp + m.fn, m.tp, m.tn, m.fp, m.fn, m.precision(), m.recall(), m.f1(), m.macro_precision(), m.macro_recall(), m.macro_f1() ]) if self.best or self.threshold: rows[-1].insert(2, t) rows[-1].insert(2, self.best) print tabulate(rows, headers="firstrow", floatfmt=".3f", tablefmt=self.tablefmt)