Ejemplo n.º 1
0
    def batchEvaluate(self, parsed_filename: str,
                      gold_filename: str) -> PARSEVALResult:
        parsed_file = open(parsed_filename, 'r')
        gold_file = open(gold_filename, 'r')

        evaluation_results = list()
        while True:
            parsed_brackets = parsed_file.readline()
            gold_brackets = gold_file.readline()

            if not parsed_brackets or not gold_brackets: break
            else:
                parsed = ParseTree(parsed_brackets)
                parsed.makeTree()
                gold = ParseTree(gold_brackets)
                gold.makeTree()
                evaluation_results.append(self.evaluate(parsed, gold))

        avgPrecision = 0.0
        avgRecall = 0.0
        for result in evaluation_results:
            avgPrecision += result.precision
            avgRecall += result.recall
        n = len(evaluation_results)

        return PARSEVALResult(avgPrecision / n,
                              avgRecall / n)  # Using macro evaluation
    def separate(self, output_train: str, output_test: str, test_instances=-1):
        train = open(output_train, 'w')
        test = open(output_test, 'w')

        count_test = 0
        for line in self.treebank:
            pt = ParseTree(line)
            pt.makeTree()
            wordTokens = pt.getWordQueue()
            if self.hasOOV(wordTokens):
                print(line, file=train, end='')
                for token in wordTokens:
                    self.vocabularies.add(token)
            else:
                print(line, file=test, end='')
                count_test += 1
                if test_instances != -1 and test_instances == count_test: break
Ejemplo n.º 3
0
    def __updateScoreText(self, gold_tree: str, parsed_tree: str, idx: int):
        GOLD = ParseTree(gold_tree)
        GOLD.makeTree()
        PARSED = ParseTree(parsed_tree)
        PARSED.makeTree()

        parseval = PARSEVALEvaluator()
        result = parseval.evaluate(PARSED, GOLD)
        self.SCORE.configure(text='[' + str(idx) + '] ' + str(result))
        self.SCORE.pack(fill='x')
Ejemplo n.º 4
0
    def __process_line(self, brackets):
        if brackets[0] == '(':
            brackets = brackets[1:len(brackets) - 1]

        pt = ParseTree(brackets)
        try:
            pt.makeTree()
            check_result = ParseTreeChecker.__checkTree(pt)
            tree_height = pt.getHeight()

            if check_result and tree_height > 2:
                print(brackets, file=self.accepted)
            else:
                self.count_rejected += 1
                print('[SAD] Found REJECTED:', self.count_rejected)
                print(brackets, file=self.rejected)

        except ValueError as err:
            self.count_rejected += 1
            print('[OH NO] Found EXCEPTION:', self.count_rejected)
            print(brackets, file=self.rejected)
            print('REASON: ', err, file=self.rejected)
Ejemplo n.º 5
0
def runVisualize(bracket: str, useNLTK: bool):
    if useNLTK:
        from parsetree.ParseTreeVisualizer import ParseTreeVisualizer
        visualizer = ParseTreeVisualizer(bracket)
        visualizer.start()
    else:
        from parsetree.ParseTree import ParseTree
        pt = ParseTree(bracket)
        pt.makeTree()
        pt.printPretty(4, 2)
Ejemplo n.º 6
0
    def getConstituents(self, pt: ParseTree) -> set:
        result = set()

        # Numbering words (leaf nodes)
        def numberingLeaves(pt: ParseTree):
            if pt.IS_LEAF:
                pt.BEGIN_NUM = numberingLeaves.leaf_num
                pt.END_NUM = numberingLeaves.leaf_num
                numberingLeaves.leaf_num += 1

        numberingLeaves.leaf_num = 0
        pt.dfs(numberingLeaves)

        # Numbering constituents (non-leaf nodes)
        def getBegin(pt: ParseTree) -> int:
            pt.BEGIN_NUM = getBegin(
                pt.CHILDREN[0]) if pt.BEGIN_NUM == -1 else pt.BEGIN_NUM
            return pt.BEGIN_NUM

        def getEnd(pt: ParseTree) -> int:
            pt.END_NUM = getEnd(
                pt.CHILDREN[len(pt.CHILDREN) -
                            1]) if pt.END_NUM == -1 else pt.END_NUM
            return pt.END_NUM

        pt.dfs(getBegin)
        pt.dfs(getEnd)

        # Extract constituents
        def extractConstituent(pt: ParseTree):
            if not pt.IS_LEAF:
                result.add(
                    PARSEVALConstituentTokens(pt.NODE_TAG, pt.BEGIN_NUM,
                                              pt.END_NUM))

        pt.dfs(extractConstituent)

        return result
Ejemplo n.º 7
0
 def getBegin(pt: ParseTree) -> int:
     pt.BEGIN_NUM = getBegin(
         pt.CHILDREN[0]) if pt.BEGIN_NUM == -1 else pt.BEGIN_NUM
     return pt.BEGIN_NUM
Ejemplo n.º 8
0
 def numberingLeaves(pt: ParseTree):
     if pt.IS_LEAF:
         pt.BEGIN_NUM = numberingLeaves.leaf_num
         pt.END_NUM = numberingLeaves.leaf_num
         numberingLeaves.leaf_num += 1
Ejemplo n.º 9
0
 def getEnd(pt: ParseTree) -> int:
     pt.END_NUM = getEnd(
         pt.CHILDREN[len(pt.CHILDREN) -
                     1]) if pt.END_NUM == -1 else pt.END_NUM
     return pt.END_NUM