def __init__(self, extractors, iob_directories=[], iob_file=None, label_index=-1): """ Args: extractors: the list of canonical citation extractors to evaluate iob_test_file: the file in IOB format to be used for testing and evaluating the extactors """ # read the test instances from a list of directories containing the test data import logging self.logger = logging.getLogger("CREX.SIMPLEVAL") if (iob_file is None): self.logger.debug(iob_directories) data = [] for directory in iob_directories: data += IO.read_iob_files(directory, ".txt") self.test_instances = data else: self.test_instances = IO.file_to_instances(iob_file) self.logger.debug("Found %i instances for test" % len(self.test_instances)) self.extractors = extractors self.output = {} self.error_matrix = None self.label_index = label_index return
def __init__(self,extractors,iob_directories=[],iob_file=None,label_index=-1): """ Args: extractors: the list of canonical citation extractors to evaluate iob_test_file: the file in IOB format to be used for testing and evaluating the extactors """ # read the test instances from a list of directories containing the test data import logging self.logger = logging.getLogger("CREX.SIMPLEVAL") if(iob_file is None): self.logger.debug(iob_directories) data = [] for directory in iob_directories: data += IO.read_iob_files(directory,".txt") self.test_instances = data else: self.test_instances = IO.file_to_instances(iob_file) self.logger.debug("Found %i instances for test"%len(self.test_instances)) self.extractors = extractors self.output = {} self.error_matrix = None self.label_index = label_index return
def get_extractor(settings): """ Instantiate, train and return a Citation_Extractor. """ import sys import citation_extractor as citation_extractor_module from citation_extractor.core import citation_extractor from citation_extractor.Utils import IO ce = None try: logger.info("Using CitationExtractor v. %s" % citation_extractor_module.__version__) train_instances = [] for directory in settings.DATA_DIRS: train_instances += IO.read_iob_files(directory, extension=".txt") logger.info( "Training data: found %i directories containing %i sentences and %i tokens" % (len(settings.DATA_DIRS), len(train_instances), IO.count_tokens(train_instances))) if (settings.CLASSIFIER is None): ce = citation_extractor(settings) else: ce = citation_extractor(settings, settings.CLASSIFIER) except Exception, e: print e
def get_extractor(settings): """ Instantiate, train and return a Citation_Extractor. """ import sys import citation_extractor as citation_extractor_module from citation_extractor.core import citation_extractor from citation_extractor.eval import IO ce = None try: logger.info("Using CitationExtractor v. %s"%citation_extractor_module.__version__) train_instances = [] for directory in settings.DATA_DIRS: train_instances += IO.read_iob_files(directory,extension=".txt") logger.info("Training data: found %i directories containing %i sentences and %i tokens"%(len(settings.DATA_DIRS),len(train_instances),IO.count_tokens(train_instances))) ce = citation_extractor(settings) except Exception, e: print e
def read_instances(directories): result = [] for d in directories: result += IO.read_iob_files(d) return result