def __init__(self, extractors, iob_test_file, culling_size=None, fold_number=10, evaluation_dir="./", label_index=-1): super(CrossEvaluator, self).__init__(extractors, iob_test_file, label_index=label_index) self.culling_size = culling_size self.fold_number = fold_number self.evaluation_dir = evaluation_dir import logging self.logger = init_logger(verbose=True, log_name='CREX.CROSSEVAL') if (self.culling_size is not None): self.logger.info("Culling set at %i" % self.culling_size) import random random.shuffle(self.test_instances) self.culled_instances = self.test_instances[:self.culling_size] else: self.logger.info("Culling not set.") self.logger.info("Evaluation type: %i-fold cross evaluations" % self.fold_number) self.logger.info("Training/Test set contains %i instances." % len(self.test_instances)) self.create_datasets()
def __init__(self,extractors,iob_test_file,culling_size=None,fold_number=10,evaluation_dir="./",label_index=-1): super(CrossEvaluator, self).__init__(extractors,iob_test_file,label_index=label_index) self.culling_size = culling_size self.fold_number = fold_number self.evaluation_dir = evaluation_dir import logging self.logger = init_logger(verbose=True,log_name='CREX.CROSSEVAL') if(self.culling_size is not None): self.logger.info("Culling set at %i"%self.culling_size) import random random.shuffle(self.test_instances) self.culled_instances = self.test_instances[:self.culling_size] else: self.logger.info("Culling not set.") self.logger.info("Evaluation type: %i-fold cross evaluations"%self.fold_number) self.logger.info("Training/Test set contains %i instances."%len(self.test_instances)) self.create_datasets()
# -*- coding: utf-8 -*- # author: Matteo Romanello, [email protected] """ Module containing classes and functions to perform the evaluation of the various steps of the pipeline (NER, RelEx, NED). %load_ext autoreload %autoreload 2 import logging import tabulate from citation_extractor.Utils.IO import init_logger init_logger(loglevel=logging.DEBUG) import pickle import codecs import pkg_resources import pandas as pd from citation_extractor.eval import evaluate_ned with codecs.open(pkg_resources.resource_filename("citation_extractor", "data/pickles/test_gold_dataframe.pkl"),"rb") as pickle_file: testset_gold_df = pd.read_pickle(pickle_file) with codecs.open(pkg_resources.resource_filename("citation_extractor", "data/pickles/test_target_dataframe_cm1.pkl"),"rb") as pickle_file: testset_target_df = pd.read_pickle(pickle_file) ann_dir = "/Users/rromanello/Documents/crex/citation_extractor/citation_extractor/data/aph_corpus/testset/ann/" scores, error_types, errors = evaluate_ned(testset_gold_df, ann_dir, testset_target_df, strict=True) """ from __future__ import division
# -*- coding: utf-8 -*- # author: Matteo Romanello, [email protected] """ Module containing classes and functions to perform the evaluation of the various steps of the pipeline (NER, RelEx, NED). %load_ext autoreload %autoreload 2 import logging import tabulate from citation_extractor.Utils.IO import init_logger init_logger(loglevel=logging.DEBUG) import pickle import codecs import pkg_resources import pandas as pd from citation_extractor.eval import evaluate_ned with codecs.open(pkg_resources.resource_filename("citation_extractor", "data/pickles/test_gold_dataframe.pkl"),"rb") as pickle_file: testset_gold_df = pd.read_pickle(pickle_file) with codecs.open(pkg_resources.resource_filename("citation_extractor", "data/pickles/test_target_dataframe_cm1.pkl"),"rb") as pickle_file: testset_target_df = pd.read_pickle(pickle_file) ann_dir = "/Users/rromanello/Documents/crex/citation_extractor/citation_extractor/data/aph_corpus/testset/ann/" scores, error_types, errors = evaluate_ned(testset_gold_df, ann_dir, testset_target_df, strict=True) """
pass valid_tasks = ["all", "ner", "ned", "relex"] try: task in valid_tasks except Exception, e: raise e # custom exception if task == "all": assert is_valid_configuration_ner( configuration_parameters) and is_valid_configuration_relex( configuration_parameters) and is_valid_configuration_ned( configuration_parameters) elif task == "ner": pass elif task == "relex": pass elif task == "ned": pass def run_pipeline(configuration_file): #TODO: implement pass if __name__ == "__main__": from docopt import docopt arguments = docopt(__doc__, version=citation_extractor.__version__) logger = init_logger() logger.info(arguments) # TODO: validate configuration file based on task at hand