Esempio n. 1
0
 def check(self, derivs_file, gs_file):
     with open(derivs_file) as f:
         with open(gs_file) as gs:
             file = f.readlines()
             gsdeps_file = gs.readlines()
         
             while file and gsdeps_file:
                 _, deriv = file.pop(0), file.pop(0)
                 gsdeps_line = gsdeps_file.pop(0)
                 
                 if deriv.startswith('#'): continue
                 
                 t = naive_label_derivation(parse_tree(deriv))
                 # only take the first two elements (filler lex, arg lex)
                 deps = set(imap(lambda v: tuple(e.split(IndexSeparator)[0] for e in v[0:2]), mkdeps(t)))
                 gsdeps = parse_gsdeps(gsdeps_line)
                 
                 try:
                     self.assertEqual(deps, gsdeps)
                 except AssertionError:
                     print "EXPECTED\n-------"
                     for depl, depr in sorted(gsdeps):
                         print depl, depr
                     print "GOT\n---"
                     for depl, depr in sorted(deps):
                         print depl, depr
                     print "DIFF\n----"
                     print "false negatives: %s" % ' '.join('|'.join((u, v)) for u, v in list(set(gsdeps) - set(deps)))
                     print "false positives: %s" % ' '.join('|'.join((u, v)) for u, v in list(set(deps) - set(gsdeps)))
                         
                     raise
Esempio n. 2
0
    def from_header_and_derivation(header, deriv_string):
        """Creates a Derivation object based on a header line and a derivation representation.
        This retrieves the section, document and derivation number from the header line,
        expecting it to be of the form 
        ID=wsj_SSDD.dd PARSER=GOLD NUMPARSE=1"""

        matches = re.match(r"ID=wsj_(\d\d)(\d\d).(\d+)", header)
        if matches and len(matches.groups()) == 3:
            sec_no, doc_no, der_no = [int(i) for i in matches.groups()]
            derivation = parse_tree(deriv_string)

            return Derivation(sec_no, doc_no, der_no, derivation)

        raise CCGbankParseException, "Malformed CCGbank header: %s" % header
Esempio n. 3
0
    def from_header_and_derivation(header, deriv_string):
        '''Creates a Derivation object based on a header line and a derivation representation.
        This retrieves the section, document and derivation number from the header line,
        expecting it to be of the form 
        ID=wsj_SSDD.dd PARSER=GOLD NUMPARSE=1'''

        matches = re.match(r'ID=wsj_(\d\d)(\d\d).(\d+)', header)
        if matches and len(matches.groups()) == 3:
            sec_no, doc_no, der_no = [int(i) for i in matches.groups()]
            derivation = parse_tree(deriv_string)

            return Derivation(sec_no, doc_no, der_no, derivation)

        raise CCGbankParseException, "Malformed CCGbank header: %s" % header
Esempio n. 4
0
    def check(self, derivs_file, gs_file):
        with open(derivs_file) as f:
            with open(gs_file) as gs:
                file = f.readlines()
                gsdeps_file = gs.readlines()

                while file and gsdeps_file:
                    _, deriv = file.pop(0), file.pop(0)
                    gsdeps_line = gsdeps_file.pop(0)

                    if deriv.startswith('#'): continue

                    t = naive_label_derivation(parse_tree(deriv))
                    # only take the first two elements (filler lex, arg lex)
                    deps = set(
                        imap(
                            lambda v: tuple(
                                e.split(IndexSeparator)[0] for e in v[0:2]),
                            mkdeps(t)))
                    gsdeps = parse_gsdeps(gsdeps_line)

                    try:
                        self.assertEqual(deps, gsdeps)
                    except AssertionError:
                        print "EXPECTED\n-------"
                        for depl, depr in sorted(gsdeps):
                            print depl, depr
                        print "GOT\n---"
                        for depl, depr in sorted(deps):
                            print depl, depr
                        print "DIFF\n----"
                        print "false negatives: %s" % ' '.join(
                            '|'.join((u, v))
                            for u, v in list(set(gsdeps) - set(deps)))
                        print "false positives: %s" % ' '.join(
                            '|'.join((u, v))
                            for u, v in list(set(deps) - set(gsdeps)))

                        raise
Esempio n. 5
0
 def testWriteDerivation(self):
     tree = parse_tree(self.from_ccgbank)
     write_graph(tree, 'ccg_deriv.dot')
     self.assert_(os.path.exists('ccg_deriv.dot'))
Esempio n. 6
0
 def testParseInverseParse(self):
     deriv_string = '(<T dummycat head_index child_count> (<L Ldummycat1 Lpos1 Lpos2 Llex Lcatfix>) (<L Rdummycat1 Rpos1 Rpos2 Rlex Rcatfix>) )'
     self.assertEqual(repr(self.n), deriv_string)
     self.assertEqual(self.n, parse_tree(deriv_string))
Esempio n. 7
0
 def testWriteDerivation(self):
     tree = parse_tree(self.from_ccgbank)
     write_graph(tree, 'ccg_deriv.dot')
     self.assert_(os.path.exists('ccg_deriv.dot'))
Esempio n. 8
0
 def testParseInverseParse(self):
     deriv_string = '(<T dummycat head_index child_count> (<L Ldummycat1 Lpos1 Lpos2 Llex Lcatfix>) (<L Rdummycat1 Rpos1 Rpos2 Rlex Rcatfix>) )'
     self.assertEqual(repr(self.n), deriv_string)
     self.assertEqual(self.n, parse_tree(deriv_string))