def check(self, derivs_file, gs_file): with open(derivs_file) as f: with open(gs_file) as gs: file = f.readlines() gsdeps_file = gs.readlines() while file and gsdeps_file: _, deriv = file.pop(0), file.pop(0) gsdeps_line = gsdeps_file.pop(0) if deriv.startswith('#'): continue t = naive_label_derivation(parse_tree(deriv)) # only take the first two elements (filler lex, arg lex) deps = set(imap(lambda v: tuple(e.split(IndexSeparator)[0] for e in v[0:2]), mkdeps(t))) gsdeps = parse_gsdeps(gsdeps_line) try: self.assertEqual(deps, gsdeps) except AssertionError: print "EXPECTED\n-------" for depl, depr in sorted(gsdeps): print depl, depr print "GOT\n---" for depl, depr in sorted(deps): print depl, depr print "DIFF\n----" print "false negatives: %s" % ' '.join('|'.join((u, v)) for u, v in list(set(gsdeps) - set(deps))) print "false positives: %s" % ' '.join('|'.join((u, v)) for u, v in list(set(deps) - set(gsdeps))) raise
def from_header_and_derivation(header, deriv_string): """Creates a Derivation object based on a header line and a derivation representation. This retrieves the section, document and derivation number from the header line, expecting it to be of the form ID=wsj_SSDD.dd PARSER=GOLD NUMPARSE=1""" matches = re.match(r"ID=wsj_(\d\d)(\d\d).(\d+)", header) if matches and len(matches.groups()) == 3: sec_no, doc_no, der_no = [int(i) for i in matches.groups()] derivation = parse_tree(deriv_string) return Derivation(sec_no, doc_no, der_no, derivation) raise CCGbankParseException, "Malformed CCGbank header: %s" % header
def from_header_and_derivation(header, deriv_string): '''Creates a Derivation object based on a header line and a derivation representation. This retrieves the section, document and derivation number from the header line, expecting it to be of the form ID=wsj_SSDD.dd PARSER=GOLD NUMPARSE=1''' matches = re.match(r'ID=wsj_(\d\d)(\d\d).(\d+)', header) if matches and len(matches.groups()) == 3: sec_no, doc_no, der_no = [int(i) for i in matches.groups()] derivation = parse_tree(deriv_string) return Derivation(sec_no, doc_no, der_no, derivation) raise CCGbankParseException, "Malformed CCGbank header: %s" % header
def check(self, derivs_file, gs_file): with open(derivs_file) as f: with open(gs_file) as gs: file = f.readlines() gsdeps_file = gs.readlines() while file and gsdeps_file: _, deriv = file.pop(0), file.pop(0) gsdeps_line = gsdeps_file.pop(0) if deriv.startswith('#'): continue t = naive_label_derivation(parse_tree(deriv)) # only take the first two elements (filler lex, arg lex) deps = set( imap( lambda v: tuple( e.split(IndexSeparator)[0] for e in v[0:2]), mkdeps(t))) gsdeps = parse_gsdeps(gsdeps_line) try: self.assertEqual(deps, gsdeps) except AssertionError: print "EXPECTED\n-------" for depl, depr in sorted(gsdeps): print depl, depr print "GOT\n---" for depl, depr in sorted(deps): print depl, depr print "DIFF\n----" print "false negatives: %s" % ' '.join( '|'.join((u, v)) for u, v in list(set(gsdeps) - set(deps))) print "false positives: %s" % ' '.join( '|'.join((u, v)) for u, v in list(set(deps) - set(gsdeps))) raise
def testWriteDerivation(self): tree = parse_tree(self.from_ccgbank) write_graph(tree, 'ccg_deriv.dot') self.assert_(os.path.exists('ccg_deriv.dot'))
def testParseInverseParse(self): deriv_string = '(<T dummycat head_index child_count> (<L Ldummycat1 Lpos1 Lpos2 Llex Lcatfix>) (<L Rdummycat1 Rpos1 Rpos2 Rlex Rcatfix>) )' self.assertEqual(repr(self.n), deriv_string) self.assertEqual(self.n, parse_tree(deriv_string))