def f1(name, data, w): print print name f = F1() for (i, x) in enumerate(data): f.report(i, predict(w, x.features), x.label) f.scores()
def f1(data, name): print print 'Phrase-based F1:', name f1 = F1() for i, x in enumerate(iterview(data)): predict = extract_contiguous(model(x)) truth = extract_contiguous(x.truth) # (i,begin,end) uniquely identifies the span for (label, begins, ends) in truth: f1.add_relevant(label, (i, begins, ends)) for (label, begins, ends) in predict: f1.add_retrieved(label, (i, begins, ends)) print return f1.scores(verbose=True)
def _f1(name, data, c, verbose=True): if verbose: print print name f = F1() for (i, x) in enumerate(data): phi = dok_matrix((1, N_FEATURES)) for k in x.features: phi[0, alphabet[k] % N_FEATURES] = 1.0 [y] = c.predict(phi) f.report(i, y, x.label) f.scores(verbose=verbose) return f
def evaluate(self, predict, data, name, verbosity=1): if not data: return if verbosity: print() print('Phrase-based F1:', name) f1 = F1() for i, x in enumerate(iterview(data, msg='Eval %s' % name)): pred = extract_contiguous(predict(x)) gold = extract_contiguous(self.Y.lookup_many(x.tags)) # (i,begin,end) uniquely identifies the span for (label, begins, ends) in gold: f1.add_relevant(label, (i, begins, ends)) for (label, begins, ends) in pred: f1.add_retrieved(label, (i, begins, ends)) if verbosity: print() return f1.scores(verbose=verbosity >= 1)
def evaluate(self, predict, data, msg, verbosity=2): "Run predict `predict` function on data." if not data: return float('nan'), [] ff = F1() correct = Counter() total = Counter() for ii, x in enumerate(iterview(data, colors.blue % 'Eval (%s)' % msg)): y = predict(x) gold = self.Y.lookup_many(x.tags) for t, (got, want) in enumerate(zip(y, gold)): if verbosity >= 2: ff.report(instance=(ii, t), prediction=got, target=want) for c in self.error_classifications(x, t): if got == want: correct[c] += 1 total[c] += 1 #print 'sentences:', len(data), 'tokens:', total['overall'] c = 'overall' acc = '%s: %.2f' % (colors.light_yellow % c, 100 * correct[c] / total[c]) other = total.keys() other.remove(c) breakdown = ', '.join('%s: %.2f' % (c, 100 * correct[c] / total[c]) for c in sorted(other)) print '%s (%s)' % (acc, breakdown) if verbosity >= 2: print print 'F1 breakdown' print '============' ff.scores() return correct['overall'] / total['overall']