def check(self, derivs_file, gs_file): with open(derivs_file) as f: with open(gs_file) as gs: file = f.readlines() gsdeps_file = gs.readlines() while file and gsdeps_file: _, deriv = file.pop(0), file.pop(0) gsdeps_line = gsdeps_file.pop(0) if deriv.startswith('#'): continue t = naive_label_derivation(parse_tree(deriv)) # only take the first two elements (filler lex, arg lex) deps = set(imap(lambda v: tuple(e.split(IndexSeparator)[0] for e in v[0:2]), mkdeps(t))) gsdeps = parse_gsdeps(gsdeps_line) try: self.assertEqual(deps, gsdeps) except AssertionError: print "EXPECTED\n-------" for depl, depr in sorted(gsdeps): print depl, depr print "GOT\n---" for depl, depr in sorted(deps): print depl, depr print "DIFF\n----" print "false negatives: %s" % ' '.join('|'.join((u, v)) for u, v in list(set(gsdeps) - set(deps))) print "false positives: %s" % ' '.join('|'.join((u, v)) for u, v in list(set(deps) - set(gsdeps))) raise
def check(self, derivs_file, gs_file): with open(derivs_file) as f: with open(gs_file) as gs: file = f.readlines() gsdeps_file = gs.readlines() while file and gsdeps_file: _, deriv = file.pop(0), file.pop(0) gsdeps_line = gsdeps_file.pop(0) if deriv.startswith('#'): continue t = naive_label_derivation(parse_tree(deriv)) # only take the first two elements (filler lex, arg lex) deps = set( imap( lambda v: tuple( e.split(IndexSeparator)[0] for e in v[0:2]), mkdeps(t))) gsdeps = parse_gsdeps(gsdeps_line) try: self.assertEqual(deps, gsdeps) except AssertionError: print "EXPECTED\n-------" for depl, depr in sorted(gsdeps): print depl, depr print "GOT\n---" for depl, depr in sorted(deps): print depl, depr print "DIFF\n----" print "false negatives: %s" % ' '.join( '|'.join((u, v)) for u, v in list(set(gsdeps) - set(deps))) print "false positives: %s" % ' '.join( '|'.join((u, v)) for u, v in list(set(deps) - set(gsdeps))) raise
def get_deps(root): return mkdeps(naive_label_derivation(root))
arg_index = int(arg_index) sec, doc, deriv_no = deriv_id_to_components(deriv_id) # read in the derivation if using_data_dir: reader = GuessReader(os.path.join(made_docs, deriv_id_to_filespec(deriv_id, with_deriv=True, with_section_dir=True))) else: reader = GuessReader(os.path.join('final', deriv_id_to_filespec(deriv_id, with_deriv=True, with_section_dir=True))) try: bundle = iter(reader).next() total_sents += 1 root = bundle.derivation # run mkmarked on the derivation root = naive_label_derivation(root) def extract_index(s): return int(s.split('`')[1]) # run mkdeps on the derivation try: deps = mkdeps(root, postprocessor=extract_index) except UnificationException: print "mkdeps failed on %s" % deriv_id traceback.print_exc(file=sys.stdout) deps = None def deps_match(head_index, arg_index, dep): return head_index==dep[0] and arg_index==dep[1] if deps: sys.stdout.write('%d %d -> ' % (head_index, arg_index))
opt = '9' long_opt = 'mkdeps' arg_names = 'OUTDIR' if __name__ == '__main__': try: import psyco psyco.full() except ImportError: pass from munge.ccg.parse import * file = "final/%s" % sys.argv[1] t=naive_label_derivation(parse_tree(open(file).readlines()[2*int(sys.argv[2])+1])) print t print "sent:" print "-----" print ' '.join(t.text()) deps = mkdeps(t) print "deps:" print "-----" for l, r in deps: print "%s|%s" % (l, r) print "leaves:" print "-------" for leaf in leaves(t): print leaf.lex, leaf.cat