예제 #1
0
파일: conll.py 프로젝트: rgeorgi/intent
    def __init__(self):
        self.dep_acc_by_pos = TwoLevelCountDict()
        self.head_acc_by_pos = TwoLevelCountDict()

        self.long_sent_stats = CountDict()
        self.short_sent_stats = CountDict()

        self.fields = ["pos_acc", "ul_acc", "l_acc"]
예제 #2
0
    def parse_test(self):
        path = '/Users/rgeorgi/Documents/treebanks/LDC95T07/RAW/combined/wsj/00/wsj_0001.mrg'

        tc = CountDict()

        def count_tokens(tokens):
            for token in tokens:
                tc.add(token.label)

        process_wsj_file(path, count_tokens)

        # There should be 31 total tokens in this file.
        self.assertEqual(31, tc.total())

        self.assertEqual(tc['.'], 2)
예제 #3
0
파일: conll.py 프로젝트: rgeorgi/intent
class ConllEval(object):
    def __init__(self):
        self.dep_acc_by_pos = TwoLevelCountDict()
        self.head_acc_by_pos = TwoLevelCountDict()

        self.long_sent_stats = CountDict()
        self.short_sent_stats = CountDict()

        self.fields = ["pos_acc", "ul_acc", "l_acc"]

    def add(self, k, sent):
        self.long_sent_stats.add(k)
        if len(sent) < 10:
            self.short_sent_stats.add(k)

    def pos_stats(self):

        for pos in sorted(set(self.dep_acc_by_pos.keys()).union(set(self.head_acc_by_pos.keys()))):
            print(
                ",".join(
                    [
                        pos,
                        str(self.dep_acc_by_pos.sub_distribution(pos).get(True, 0.0)),
                        str(self.head_acc_by_pos.sub_distribution(pos).get(True, 0.0)),
                    ]
                )
            )

    def acc(self, d, k):
        return d[k] / d["words"] * 100

    def long_stats(self):
        return [self.acc(self.long_sent_stats, k) for k in self.fields]

    def short_stats(self):
        return [self.acc(self.short_sent_stats, k) for k in self.fields]

    def short_ul(self):
        return self.acc(self.short_sent_stats, "ul_acc")

    def short_ul_count(self):
        return self.short_sent_stats.get("ul_acc", 0)

    def short_words(self):
        return self.short_sent_stats.get("words", 0)

    def long_ul(self):
        return self.acc(self.long_sent_stats, "ul_acc")

    def long_ul_count(self):
        return self.long_sent_stats.get("ul_acc", 0)

    def long_words(self):
        return self.long_sent_stats.get("words", 0)
예제 #4
0
 def __init__(self, gold=None):
     CountDict.__init__(self)
     self._gold = gold