def check(self, derivs_file, gs_file): with open(derivs_file) as f: with open(gs_file) as gs: file = f.readlines() gsdeps_file = gs.readlines() while file and gsdeps_file: _, deriv = file.pop(0), file.pop(0) gsdeps_line = gsdeps_file.pop(0) if deriv.startswith('#'): continue t = naive_label_derivation(parse_tree(deriv)) # only take the first two elements (filler lex, arg lex) deps = set( imap( lambda v: tuple( e.split(IndexSeparator)[0] for e in v[0:2]), mkdeps(t))) gsdeps = parse_gsdeps(gsdeps_line) try: self.assertEqual(deps, gsdeps) except AssertionError: print "EXPECTED\n-------" for depl, depr in sorted(gsdeps): print depl, depr print "GOT\n---" for depl, depr in sorted(deps): print depl, depr print "DIFF\n----" print "false negatives: %s" % ' '.join( '|'.join((u, v)) for u, v in list(set(gsdeps) - set(deps))) print "false positives: %s" % ' '.join( '|'.join((u, v)) for u, v in list(set(deps) - set(gsdeps))) raise
def check(self, derivs_file, gs_file): with open(derivs_file) as f: with open(gs_file) as gs: file = f.readlines() gsdeps_file = gs.readlines() while file and gsdeps_file: _, deriv = file.pop(0), file.pop(0) gsdeps_line = gsdeps_file.pop(0) if deriv.startswith('#'): continue t = naive_label_derivation(parse_tree(deriv)) # only take the first two elements (filler lex, arg lex) deps = set(imap(lambda v: tuple(e.split(IndexSeparator)[0] for e in v[0:2]), mkdeps(t))) gsdeps = parse_gsdeps(gsdeps_line) try: self.assertEqual(deps, gsdeps) except AssertionError: print "EXPECTED\n-------" for depl, depr in sorted(gsdeps): print depl, depr print "GOT\n---" for depl, depr in sorted(deps): print depl, depr print "DIFF\n----" print "false negatives: %s" % ' '.join('|'.join((u, v)) for u, v in list(set(gsdeps) - set(deps))) print "false positives: %s" % ' '.join('|'.join((u, v)) for u, v in list(set(deps) - set(gsdeps))) raise
if using_data_dir: reader = GuessReader(os.path.join(made_docs, deriv_id_to_filespec(deriv_id, with_deriv=True, with_section_dir=True))) else: reader = GuessReader(os.path.join('final', deriv_id_to_filespec(deriv_id, with_deriv=True, with_section_dir=True))) try: bundle = iter(reader).next() total_sents += 1 root = bundle.derivation # run mkmarked on the derivation root = naive_label_derivation(root) def extract_index(s): return int(s.split('`')[1]) # run mkdeps on the derivation try: deps = mkdeps(root, postprocessor=extract_index) except UnificationException: print "mkdeps failed on %s" % deriv_id traceback.print_exc(file=sys.stdout) deps = None def deps_match(head_index, arg_index, dep): return head_index==dep[0] and arg_index==dep[1] if deps: sys.stdout.write('%d %d -> ' % (head_index, arg_index)) try: anno_head_index, anno_arg_index = remap_indices(head_index, arg_index, deriv_id) print 'annotator: %d %d' % (anno_head_index, anno_arg_index)