Example #1
0
 def __init__(self,
              extractors,
              iob_test_file,
              culling_size=None,
              fold_number=10,
              evaluation_dir="./",
              label_index=-1):
     super(CrossEvaluator, self).__init__(extractors,
                                          iob_test_file,
                                          label_index=label_index)
     self.culling_size = culling_size
     self.fold_number = fold_number
     self.evaluation_dir = evaluation_dir
     import logging
     self.logger = init_logger(verbose=True, log_name='CREX.CROSSEVAL')
     if (self.culling_size is not None):
         self.logger.info("Culling set at %i" % self.culling_size)
         import random
         random.shuffle(self.test_instances)
         self.culled_instances = self.test_instances[:self.culling_size]
     else:
         self.logger.info("Culling not set.")
     self.logger.info("Evaluation type: %i-fold cross evaluations" %
                      self.fold_number)
     self.logger.info("Training/Test set contains %i instances." %
                      len(self.test_instances))
     self.create_datasets()
Example #2
0
 def __init__(self,extractors,iob_test_file,culling_size=None,fold_number=10,evaluation_dir="./",label_index=-1):
     super(CrossEvaluator, self).__init__(extractors,iob_test_file,label_index=label_index)
     self.culling_size = culling_size
     self.fold_number = fold_number
     self.evaluation_dir = evaluation_dir
     import logging
     self.logger = init_logger(verbose=True,log_name='CREX.CROSSEVAL')
     if(self.culling_size is not None):
         self.logger.info("Culling set at %i"%self.culling_size)
         import random
         random.shuffle(self.test_instances)
         self.culled_instances = self.test_instances[:self.culling_size]
     else:
         self.logger.info("Culling not set.")
     self.logger.info("Evaluation type: %i-fold cross evaluations"%self.fold_number)
     self.logger.info("Training/Test set contains %i instances."%len(self.test_instances))
     self.create_datasets()
Example #3
0
# -*- coding: utf-8 -*-
# author: Matteo Romanello, [email protected]
"""

Module containing classes and functions to perform the evaluation of the various steps of the pipeline (NER, RelEx, NED).

%load_ext autoreload
%autoreload 2

import logging
import tabulate
from citation_extractor.Utils.IO import init_logger
init_logger(loglevel=logging.DEBUG)
import pickle
import codecs
import pkg_resources
import pandas as pd
from citation_extractor.eval import evaluate_ned

with codecs.open(pkg_resources.resource_filename("citation_extractor", "data/pickles/test_gold_dataframe.pkl"),"rb") as pickle_file:
    testset_gold_df = pd.read_pickle(pickle_file)

with codecs.open(pkg_resources.resource_filename("citation_extractor", "data/pickles/test_target_dataframe_cm1.pkl"),"rb") as pickle_file:
    testset_target_df = pd.read_pickle(pickle_file)

ann_dir = "/Users/rromanello/Documents/crex/citation_extractor/citation_extractor/data/aph_corpus/testset/ann/"

scores, error_types, errors = evaluate_ned(testset_gold_df, ann_dir, testset_target_df, strict=True)

"""
from __future__ import division
Example #4
0
# -*- coding: utf-8 -*-
# author: Matteo Romanello, [email protected]

"""

Module containing classes and functions to perform the evaluation of the various steps of the pipeline (NER, RelEx, NED).

%load_ext autoreload
%autoreload 2

import logging
import tabulate
from citation_extractor.Utils.IO import init_logger
init_logger(loglevel=logging.DEBUG)
import pickle
import codecs
import pkg_resources
import pandas as pd
from citation_extractor.eval import evaluate_ned

with codecs.open(pkg_resources.resource_filename("citation_extractor", "data/pickles/test_gold_dataframe.pkl"),"rb") as pickle_file:
    testset_gold_df = pd.read_pickle(pickle_file)

with codecs.open(pkg_resources.resource_filename("citation_extractor", "data/pickles/test_target_dataframe_cm1.pkl"),"rb") as pickle_file:
    testset_target_df = pd.read_pickle(pickle_file)

ann_dir = "/Users/rromanello/Documents/crex/citation_extractor/citation_extractor/data/aph_corpus/testset/ann/"

scores, error_types, errors = evaluate_ned(testset_gold_df, ann_dir, testset_target_df, strict=True)

"""
Example #5
0
        pass

    valid_tasks = ["all", "ner", "ned", "relex"]
    try:
        task in valid_tasks
    except Exception, e:
        raise e  # custom exception
    if task == "all":
        assert is_valid_configuration_ner(
            configuration_parameters) and is_valid_configuration_relex(
                configuration_parameters) and is_valid_configuration_ned(
                    configuration_parameters)
    elif task == "ner":
        pass
    elif task == "relex":
        pass
    elif task == "ned":
        pass


def run_pipeline(configuration_file):  #TODO: implement
    pass


if __name__ == "__main__":
    from docopt import docopt
    arguments = docopt(__doc__, version=citation_extractor.__version__)
    logger = init_logger()
    logger.info(arguments)
    # TODO: validate configuration file based on task at hand