def evaluate(self, std_path, test_path, output_path, is_silent=False): if not is_silent: print('Running evaluation, this might take a while...') std = loadAllStandard(std_path) test = loadAllTest(test_path) diff = set([x.name for x in std ]).symmetric_difference(set([y.name for y in test])) assert (len(diff) == 0) res = dict((x, Metrics()) for x in Evaluator.stat_tags) for i, s in enumerate(std): if not s.has_facts: # do not compare documents without a .facts file # this is just for convenience continue metrics = self.evaluateDocument(s, test[i]) self.printReport(s.name, output_path) for tag in Evaluator.stat_tags: res[tag].add(metrics[tag]) if not is_silent: print('TAG ' + Metrics.header()) for tag in Evaluator.stat_tags: print('{:15} '.format(tag) + res[tag].toLine()) return res
def evaluate(self, clusters): """Evaluate a set of clusters""" q_std = dict([(s, -1) for s in self.std]) q_test = dict([(t, -1) for t in self.test]) for c in clusters: c.calculateQuality() q_std[c.std] = c.quality for t in c.test: assert (q_test[t] == -1) q_test[t] = c.quality assert (not -1 in q_std.values()) assert (not -1 in q_test.values()) tp_std = sum(q_std.values()) tp_test = sum(q_test.values()) n_std = 0 n_test = 0 for cluster in clusters: if cluster.std == None: n_test += 1 continue if not cluster.std.is_ignored: n_std += 1 n_test += len(cluster.test) return Metrics.create(tp_std, tp_test, n_std, n_test)
def evaluate(self, std_path, test_path, output_path=''): """Run evaluation on all files in the given directories""" std = loadAllStandard(std_path) test = loadAllTest(test_path) diff = set([x.name for x in std ]).symmetric_difference(set([y.name for y in test])) if len(diff) > 0: print('WARNING: missing files:') print('\n'.join(sorted(diff, key=lambda x: int(x[5:])))) std_by_name = dict([(x.name, x) for x in std]) test_by_name = dict([(x.name, x) for x in test]) names = sorted(set([x.name for x in std ]).intersection(set([y.name for y in test])), key=lambda x: int(x[5:])) res = [] for tag in self.tags: res.append(Metrics()) for name in names: s = std_by_name[name] t = test_by_name[name] m_tuple = self.evaluateDocument(s, t) self.printReport(s.name, output_path) for i, val in enumerate(res): val.add(m_tuple[i]) print(self.buildMetricsTable(res))
def evaluate(self, pairs, unmatched_std, unmatched_test): """Evaluate the matching. Returns metrics""" tp = 0 matched_std_objects = set() for s, t in pairs: tp += self.quality(s, t) matched_std_objects.add(s) n_test = len(pairs) + len(unmatched_test) n_std = len(pairs) for obj in unmatched_std: is_relevant = True if not obj in matched_std_objects: # this is the logic used to skip unmatched embedded organizations in the # standard markup, but only if the larger organization is correctly # matched for parent in obj.parents: if parent in matched_std_objects: is_relevant = False # alternatively, check if the object has no valuable spans # unmatched objects with no spans marked by a positive number total_mark = sum([obj.mark(token) for token in obj.tokens]) if total_mark == 0.0: is_relevant = False if is_relevant: n_std += 1 return Metrics.createSimple(tp, n_std, n_test)
def evaluate(self, std_path, test_path, output_path='', is_silent=False): """Run evaluation on all files in the given directories If output_path is provided, evaluation reports will be written there. is_silent determines if the result is printed to the output""" std = loadAllStandard(std_path) test = loadAllTest(test_path) diff = set([x.name for x in std ]).symmetric_difference(set([y.name for y in test])) if len(diff) > 0: print('WARNING: missing files:') print('\n'.join(sorted(diff, key=lambda x: int(x[5:])))) std_by_name = dict([(x.name, x) for x in std]) test_by_name = dict([(x.name, x) for x in test]) names = sorted(set([x.name for x in std ]).intersection(set([y.name for y in test])), key=lambda x: int(x[5:])) res = dict((tag, Metrics()) for tag in self.tags) for name in names: s = std_by_name[name] t = test_by_name[name] m = self.evaluateDocument(s, t) self.metrics_dict = dict((x, m[x]) for x in self.tags) self.printReport(s.name, output_path) for key in res: res[key].add(self.metrics_dict[key]) if not is_silent: print(self.buildMetricsTable(res)) return res
def evaluate(self, std_path, test_path, output_path=''): """Run evaluation on all files in the given directories""" std = loadAllStandard(std_path) test = loadAllTest(test_path) diff = set([x.name for x in std ]).symmetric_difference(set([y.name for y in test])) if len(diff) > 0: print('Warning: missing files :\n {}'.format('\n '.join(diff))) std = [s for s in std if s.name not in diff] test = [t for t in test if t.name not in diff] res = [] for tag in Evaluator.stat_tags: res.append(Metrics()) for i, s in enumerate(std): if not s.has_coref: # do not compare documents without a .coref file # this is just for convenience continue m_tuple = self.evaluateDocument(s, test[i]) self.printReport(s.name, output_path) for i, val in enumerate(res): val.add(m_tuple[i]) print(self.buildMetricsTable(res))
def evaluate(self, clusters): """Evaluate a set of clusters""" q_std = dict([(s,-1) for s in self.std]) q_test = dict([(t,-1) for t in self.test]) for c in clusters: c.calculateQuality() q_std[c.std] = c.quality for t in c.test: assert(q_test[t] == -1) q_test[t] = c.quality assert(not -1 in q_std.values()) assert(not -1 in q_test.values()) tp_std = sum(q_std.values()) tp_test = sum(q_test.values()) n_std = 0 n_test = 0 for cluster in clusters: if cluster.std == None: n_test += 1 continue if not cluster.std.is_ignored: n_std += 1 n_test += len(cluster.test) return Metrics.create(tp_std, tp_test, n_std, n_test)
def evaluate(self, std_path, test_path, output_path, is_silent=False): if not is_silent: print('Running evaluation, this might take a while...') std = loadAllStandard(std_path) test = loadAllTest(test_path) diff = set([x.name for x in std]).symmetric_difference( set([y.name for y in test])) assert(len(diff) == 0) res = dict((x, Metrics()) for x in Evaluator.stat_tags) for i, s in enumerate(std): if not s.has_facts: # do not compare documents without a .facts file # this is just for convenience continue metrics = self.evaluateDocument(s, test[i]) self.printReport(s.name, output_path) for tag in Evaluator.stat_tags: res[tag].add(metrics[tag]) if not is_silent: print('TAG ' + Metrics.header()) for tag in Evaluator.stat_tags: print('{:15} '.format(tag) + res[tag].toLine()) return res
def evaluate(self, pairs, unmatched_std, unmatched_test): """Evaluate the matching. Returns metrics""" tp = 0 matching = {} for s, t in pairs: matching[s] = t matching[t] = s n_relevant_pairs = 0 for pair in pairs: if not self.isIgnored(pair[0], pair[1], matching): n_relevant_pairs += 1 tp += self.quality(pair[0], pair[1]) fn = len([ s for s in unmatched_std if not self.isStandardIgnored(s, matching) ]) fp = len( [t for t in unmatched_test if not self.isTestIgnored(t, matching)]) n_std = n_relevant_pairs + fn n_test = n_relevant_pairs + fp return Metrics.createSimple(tp, n_std, n_test)
def evaluate(self, std_path, test_path, output_path='', is_silent=False): """Run evaluation on all files in the given directories. If output_path is provided, evaluation reports will be written there. is_silent determines if the result is printed to the output""" std = loadAllStandard(std_path) test = loadAllTest(test_path) diff = set([x.name for x in std ]).symmetric_difference(set([y.name for y in test])) if len(diff) > 0: print('Warning: missing files :\n {}'.format('\n '.join(diff))) std = [s for s in std if s.name not in diff] test = [t for t in test if t.name not in diff] res = dict((tag, Metrics()) for tag in Evaluator.stat_tags) for i, s in enumerate(std): if not s.has_coref: # do not compare documents without a .coref file # this is just for convenience continue m_tuple = self.evaluateDocument(s, test[i]) self.printReport(s.name, output_path) for j, tag in enumerate(Evaluator.stat_tags): res[tag].add(m_tuple[j]) if not is_silent: print(self.buildMetricsTable(res)) return res
def evaluate(self, pairs, unmatched_std, unmatched_test): """Evaluate the matching. Returns metrics""" matching = {} for s, t in pairs: matching[s] = t matching[t] = s tp = 0 n_relevant_pairs = 0 matched_std_objects = set() for s, t in pairs: if not self.isIgnored(s, t, matching): tp += self.quality(s, t) matched_std_objects.add(s) n_relevant_pairs += 1 # in this task no unmatched test object can be ignored n_test = n_relevant_pairs + len(unmatched_test) n_std = n_relevant_pairs for obj in unmatched_std: if not obj in matched_std_objects: if not self.isStandardIgnored(obj, matching): n_std += 1 return Metrics.createSimple(tp, n_std, n_test)
def evaluate(self, pairs, unmatched_std, unmatched_test): """Evaluate the matching. Returns metrics""" matching = {} for s, t in pairs: matching[s] = t matching[t] = s tp = 0 n_relevant_pairs = 0 matched_std_objects = set() for s, t in pairs: if not self.isIgnored(s, t, matching): tp += self.quality(s, t) matched_std_objects.add(s) n_relevant_pairs += 1 # in this task no unmatched test object can be ignored n_test = n_relevant_pairs + len(unmatched_test) n_std = n_relevant_pairs for obj in unmatched_std: if not obj in matched_std_objects: if not self.isStandardIgnored(obj, matching): n_std += 1 return Metrics.createSimple(tp, n_std, n_test)
def evaluate(self, pairs, unmatched_std, unmatched_test): """Evaluate the matching. Returns metrics""" tp = 0 matched_std_objects = set() for s, t in pairs: tp += self.quality(s, t) matched_std_objects.add(s) n_test = len(pairs) + len(unmatched_test) n_std = len(pairs) for obj in unmatched_std: is_relevant = True if not obj in matched_std_objects: # this is the logic used to skip unmatched embedded organizations in the # standard markup, but only if the larger organization is correctly # matched for parent in obj.parents: if parent in matched_std_objects: is_relevant = False # alternatively, check if the object has no valuable spans # unmatched objects with no spans marked by a positive number total_mark = sum([obj.mark(token) for token in obj.tokens]) if total_mark == 0.0: is_relevant = False if is_relevant: n_std += 1 return Metrics.createSimple(tp, n_std, n_test)
def buildMetricsTable(self, metrics_list): """Build a table from the provided metrics for the output""" assert (len(metrics_list) == len(Evaluator.stat_tags)) res = 'Type ' + Metrics.header() for i, tag in enumerate(Evaluator.stat_tags): res += '\n{:8} '.format(tag.upper()) + metrics_list[i].toLine() return res
def buildMetricsTable(self, metrics_dict): """Build a table from the provided metrics for the output""" assert(len(metrics_dict.keys()) == len(Evaluator.stat_tags)) res = 'Type ' + Metrics.header() for tag in Evaluator.stat_tags: res += '\n{:8} '.format(tag.upper()) + metrics_dict[tag].toLine() return res
def buildMetricsTable(self, metrics_dict): """Build a table from the provided metrics for the output""" assert (len(metrics_dict.keys()) == len(Evaluator.stat_tags)) res = 'Type ' + Metrics.header() for tag in Evaluator.stat_tags: res += '\n{:8} '.format(tag.upper()) + metrics_dict[tag].toLine() return res
def buildMetricsTable(self, metrics_list): """Build a table from the provided metrics for the output""" assert(len(metrics_list) == len(self.tags)) res = 'Type ' + Metrics.header() for i, tag in enumerate(self.tags): res += '\n{:8} '.format(tag.upper()) + metrics_list[i].toLine() return res
def describeMatching(self): """Returns a string description of the matching this optimizer built""" res = '{:4}\t{:4}\t{:4}\t{:8}\n'.format('Res', 'Q_A', 'Q_Id', 'Facts') res += '\n'.join([c.toInlineString() for c in self.clusters]) res += '\n\n' res += '-------METRICS------\n' res += Metrics.header() + '\n' res += self.metrics.toLine() return res
def describeMatching(self): """Returns a string description of the matching this optimizer built""" res = '{:4}\t{:4}\t{:4}\t{:8}\n'.format('Res', 'Q_A', 'Q_Id', 'Facts') res += '\n'.join([c.toInlineString() for c in self.clusters]) res += '\n\n' res += '-------METRICS------\n' res += Metrics.header() + '\n' res += self.metrics.toLine() return res
def evaluate(self, pairs, unmatched_std, unmatched_test): """Evaluate the matching. Returns metrics""" tp = 0 for pair in pairs: tp += self.quality(pair[0], pair[1]) n_std = len(pairs) + len(unmatched_std) n_test = len(pairs) + len(unmatched_test) return Metrics.createSimple(tp, n_std, n_test)
def evaluate(self, pairs, unmatched_std, unmatched_test): """Evaluate the matching. Returns metrics""" tp = 0 for pair in pairs: tp += self.quality(pair[0], pair[1]) n_std = len(pairs) + len(unmatched_std) n_test = len(pairs) + len(unmatched_test) return Metrics.createSimple(tp, n_std, n_test)
def describeMatching(self, clusters, metrics): """Returns a string description of the matching this optimizer built""" res = '' for i, c in enumerate(clusters): res += '---- #{} ----\n'.format(i + 1) res += c.toInlineString() + '\n' res += '\n\n' res += '-------METRICS------\n' res += 'TAG ' + Metrics.header() + '\n' for tag in Evaluator.stat_tags: res += '{:15} '.format(tag) + metrics[tag].toLine() + '\n' return res
def describeMatching(self, clusters, metrics): """Returns a string description of the matching this optimizer built""" res = '' for i, c in enumerate(clusters): res += '---- #{} ----\n'.format(i+1) res += c.toInlineString() + '\n' res += '\n\n' res += '-------METRICS------\n' res += 'TAG ' + Metrics.header() + '\n' for tag in Evaluator.stat_tags: res += '{:15} '.format(tag) + metrics[tag].toLine() + '\n' return res
def evaluate(self, std_path, test_path, output_path): print('Running evaluation, this might take a while...') std = loadAllStandard(std_path) test = loadAllTest(test_path) diff = set([x.name for x in std]).symmetric_difference( set([y.name for y in test])) assert(len(diff) == 0) res = Metrics() for i, s in enumerate(std): if not s.has_facts: # do not compare documents without a .facts file # this is just for convenience continue metrics = self.evaluateDocument(s, test[i]) self.printReport(s.name, output_path) res.add(metrics) print(Metrics.header()) print(res.toLine())
def evaluateDocument(self, std, test): self.metrics = dict((x, Metrics()) for x in Evaluator.stat_tags) self.clusters = [] for tag in Evaluator.stat_tags: if tag == 'overall': continue tag_std = [s for s in std.facts if s.tag == tag] tag_test = [t for t in test.facts if t.tag == tag] self.optimizer = Optimizer(tag_std, tag_test, self.hard_mode) self.optimizer.findSolution() self.metrics[tag].add(self.optimizer.metrics) self.metrics['overall'].add(self.optimizer.metrics) self.clusters.extend(self.optimizer.clusters) return self.metrics
def evaluate(self, pairs, unmatched_std, unmatched_test): """Evaluate the matching. Returns metrics""" tp = 0 matching = {} for s,t in pairs: matching[s] = t matching[t] = s n_relevant_pairs = 0 for pair in pairs: if not self.isIgnored(pair[0], pair[1], matching): n_relevant_pairs += 1 tp += self.quality(pair[0], pair[1]) fn = len([s for s in unmatched_std if not self.isStandardIgnored(s, matching)]) fp = len([t for t in unmatched_test if not self.isTestIgnored(t, matching)]) n_std = n_relevant_pairs + fn n_test = n_relevant_pairs + fp return Metrics.createSimple(tp, n_std, n_test)
def evaluate(self, std_path, test_path, output_path): print('Running evaluation, this might take a while...') std = loadAllStandard(std_path) test = loadAllTest(test_path) diff = set([x.name for x in std ]).symmetric_difference(set([y.name for y in test])) assert (len(diff) == 0) res = Metrics() for i, s in enumerate(std): if not s.has_facts: # do not compare documents without a .facts file # this is just for convenience continue metrics = self.evaluateDocument(s, test[i]) self.printReport(s.name, output_path) res.add(metrics) print(Metrics.header()) print(res.toLine())