def test_spans_doesnt_change_tree(self): t = Tree.fromstring( """ (S (NP (Det el) (Noun gato)) (VP (Verb come) (NP (Noun pescado) (Adj crudo))) ) """) t2 = t.copy(deep=True) spans(t) self.assertEqual(t, t2)
def test_spans_with_unary(self): t = Tree.fromstring( """ (S (NP (Det el) (Noun gato)) (VP (Verb come) (NP (Noun pescado))) ) """) s = spans(t, unary=True) s2 = {('S', 0, 3), ('NP', 0, 1), ('VP', 2, 3), ('NP', 3, 3)} self.assertEqual(s, s2)
def test_spans(self): t = Tree.fromstring( """ (S (NP (Det el) (Noun gato)) (VP (Verb come) (NP (Noun pescado) (Adj crudo))) ) """) s = spans(t) s2 = {('S', 0, 4), ('NP', 0, 1), ('VP', 2, 4), ('NP', 3, 4)} self.assertEqual(s, s2)
corpus = SimpleAncoraCorpusReader('ancora/ancora-2.0/', files) parsed_sents = list(corpus.parsed_sents()) print('Parsing...') hits, total_gold, total_model = 0, 0, 0 n = len(parsed_sents) format_str = '{:3.1f}% ({}/{}) (P={:2.2f}%, R={:2.2f}%, F1={:2.2f}%)' progress(format_str.format(0.0, 0, n, 0.0, 0.0, 0.0)) for i, gold_parsed_sent in enumerate(parsed_sents): tagged_sent = gold_parsed_sent.pos() # parse model_parsed_sent = model.parse(tagged_sent) # compute labeled scores gold_spans = spans(gold_parsed_sent, unary=False) model_spans = spans(model_parsed_sent, unary=False) hits += len(gold_spans & model_spans) total_gold += len(gold_spans) total_model += len(model_spans) # compute labeled partial results prec = float(hits) / total_model * 100 rec = float(hits) / total_gold * 100 f1 = 2 * prec * rec / (prec + rec) progress( format_str.format(float(i + 1) * 100 / n, i + 1, n, prec, rec, f1)) print('') print('Parsed {} sentences'.format(n))
n = len(parsed_sents) format_str = '{:3.1f}% ({}/{}) | Labeled: (P={:2.2f}%, R={:2.2f}%, \ F1={:2.2f}%) | Unlabeled: (P={:2.2f}%, R={:2.2f}%, F1={:2.2f}%)' progress(format_str.format(0.0, 0, n, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) for i, gold_parsed_sent in enumerate(parsed_sents): tagged_sent = gold_parsed_sent.pos() # Parse model_parsed_sent = model.parse(tagged_sent) # Compute labeled scores labeled_gold_spans = spans(gold_parsed_sent, unary=False) labeled_model_spans = spans(model_parsed_sent, unary=False) # Compute unlabeled scores unlabeled_gold_spans = set() for element in labeled_gold_spans: unlabeled_gold_spans.add(element[1:]) unlabeled_model_spans = set() for element in labeled_model_spans: unlabeled_model_spans.add(element[1:]) # Compute hits labeled_hits += len(labeled_gold_spans & labeled_model_spans) unlabeled_hits += len(unlabeled_gold_spans & unlabeled_model_spans)