Beispiel #1
0
    def test_spans_doesnt_change_tree(self):
        t = Tree.fromstring(
            """
                (S
                    (NP (Det el) (Noun gato))
                    (VP (Verb come) (NP (Noun pescado) (Adj crudo)))
                )
            """)
        t2 = t.copy(deep=True)

        spans(t)

        self.assertEqual(t, t2)
Beispiel #2
0
    def test_spans_with_unary(self):
        t = Tree.fromstring(
            """
                (S
                    (NP (Det el) (Noun gato))
                    (VP (Verb come) (NP (Noun pescado)))
                )
            """)

        s = spans(t, unary=True)

        s2 = {('S', 0, 3), ('NP', 0, 1), ('VP', 2, 3), ('NP', 3, 3)}
        self.assertEqual(s, s2)
Beispiel #3
0
    def test_spans(self):
        t = Tree.fromstring(
            """
                (S
                    (NP (Det el) (Noun gato))
                    (VP (Verb come) (NP (Noun pescado) (Adj crudo)))
                )
            """)

        s = spans(t)

        s2 = {('S', 0, 4), ('NP', 0, 1), ('VP', 2, 4), ('NP', 3, 4)}
        self.assertEqual(s, s2)
Beispiel #4
0
    corpus = SimpleAncoraCorpusReader('ancora/ancora-2.0/', files)
    parsed_sents = list(corpus.parsed_sents())

    print('Parsing...')
    hits, total_gold, total_model = 0, 0, 0
    n = len(parsed_sents)
    format_str = '{:3.1f}% ({}/{}) (P={:2.2f}%, R={:2.2f}%, F1={:2.2f}%)'
    progress(format_str.format(0.0, 0, n, 0.0, 0.0, 0.0))
    for i, gold_parsed_sent in enumerate(parsed_sents):
        tagged_sent = gold_parsed_sent.pos()

        # parse
        model_parsed_sent = model.parse(tagged_sent)

        # compute labeled scores
        gold_spans = spans(gold_parsed_sent, unary=False)
        model_spans = spans(model_parsed_sent, unary=False)
        hits += len(gold_spans & model_spans)
        total_gold += len(gold_spans)
        total_model += len(model_spans)

        # compute labeled partial results
        prec = float(hits) / total_model * 100
        rec = float(hits) / total_gold * 100
        f1 = 2 * prec * rec / (prec + rec)

        progress(
            format_str.format(float(i + 1) * 100 / n, i + 1, n, prec, rec, f1))

    print('')
    print('Parsed {} sentences'.format(n))
Beispiel #5
0
    n = len(parsed_sents)

    format_str = '{:3.1f}% ({}/{}) | Labeled: (P={:2.2f}%, R={:2.2f}%, \
F1={:2.2f}%) | Unlabeled: (P={:2.2f}%, R={:2.2f}%, F1={:2.2f}%)'

    progress(format_str.format(0.0, 0, n, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))

    for i, gold_parsed_sent in enumerate(parsed_sents):
        tagged_sent = gold_parsed_sent.pos()

        # Parse
        model_parsed_sent = model.parse(tagged_sent)

        # Compute labeled scores
        labeled_gold_spans = spans(gold_parsed_sent, unary=False)
        labeled_model_spans = spans(model_parsed_sent, unary=False)

        # Compute unlabeled scores
        unlabeled_gold_spans = set()
        for element in labeled_gold_spans:
            unlabeled_gold_spans.add(element[1:])

        unlabeled_model_spans = set()
        for element in labeled_model_spans:
            unlabeled_model_spans.add(element[1:])

        # Compute hits
        labeled_hits += len(labeled_gold_spans & labeled_model_spans)
        unlabeled_hits += len(unlabeled_gold_spans & unlabeled_model_spans)