def __init__(self,extractors,iob_directories=[],iob_file=None,label_index=-1): """ Args: extractors: the list of canonical citation extractors to evaluate iob_test_file: the file in IOB format to be used for testing and evaluating the extactors """ # read the test instances from a list of directories containing the test data import logging self.logger = logging.getLogger("CREX.SIMPLEVAL") if(iob_file is None): self.logger.debug(iob_directories) data = [] for directory in iob_directories: data += IO.read_iob_files(directory,".txt") self.test_instances = data else: self.test_instances = IO.file_to_instances(iob_file) self.logger.debug("Found %i instances for test"%len(self.test_instances)) self.extractors = extractors self.output = {} self.error_matrix = None self.label_index = label_index return
def read_instances(directories): result = [] for d in directories: result += IO.read_iob_files(d) return result