def loadAllStandard(path): """Load all standard markup files from the provided directory. Returns a list.""" names = set([x.split('.')[0] for x in os.listdir(path)]) res = [] for name in names: res.append(Standard(name, path)) return sorted(res, key=lambda x: int(x.name[5:])) # book_XXX - sort by number
def _doEvaluate(self, std_dir, test_dir, is_locorg_allowed=True): """ Evaluate the submission by comparing it to the standard Returns a dictionary of metrics by mention type. Metrics include precision, recall, f1, true positive count, standard size and test size in that order. std_dir - standard markup files directory test_dir - response files directory is_locorg_allowed - enables/disables 'locorg' mention type """ unique_names = set() for filename in os.listdir(std_dir): parts = filename.split('.') unique_names.add('.'.join(parts[:-1])) names = list(unique_names) for filename in os.listdir(test_dir): parts = filename.split('.') if parts[-1] != 'task1': continue unique_names.remove('.'.join(parts[:-1])) # validate the submission integrity: if len(unique_names) > 0: raise Exception('Missing files in the response:\n' + '\n'.join(sorted(unique_names))) allowed_tags = ['per', 'org', 'loc'] if is_locorg_allowed: allowed_tags.append('locorg') doc_results = {} # calculate partial metrics: tp = dict([(x, 0.0) for x in allowed_tags]) n_std = dict([(x, 0.0) for x in allowed_tags]) n_test = dict([(x, 0.0) for x in allowed_tags]) for name in names: std = Standard(name, std_dir) test = Test(name, test_dir) res = self.evaluateDocument(std, test, is_locorg_allowed) doc_results[name] = res for tag in allowed_tags: tp[tag] += res[tag][3] n_std[tag] += res[tag][4] n_test[tag] += res[tag][5] # calculate global metrics: return dict([(tag, calcMetrics(tp[tag], n_std[tag], n_test[tag])) for tag in allowed_tags]), doc_results
def generate(self, std_dir, out_dir): """ Generate a response based on all the standard documents in the directory std_dir - standard markup directory out_dir - directory where the generated response will be saved to """ unique_names = set() for filename in os.listdir(std_dir): parts = filename.split('.') unique_names.add('.'.join(parts[:-1])) names = list(unique_names) for name in names: std = Standard(name, std_dir) self.generateDoc(std, os.path.join(out_dir, name + '.task1'))
def createResponse(self, std_path, test_path): os.makedirs(test_path, exist_ok=True) names = set([x.split('.')[0] for x in os.listdir(std_path)]) for name in names: s = Standard(name, std_path) self.createDocumentResponse(s, test_path)