def get_args(rel, comparison_arg): try: sbj = _get_sbj(rel)[0].most_specific_head() if len(comparison_arg) > 1: veh = comparison_arg[1].most_specific_head() else: veh = comparison_arg[0][1].most_specific_head() if comparison_arg.node == "ADJP": gnd = comparison_arg.head()[0].head() veh = comparison_arg[1, 0, 1, 0, 1, 0].head() elif comparison_arg[0].node == "ADJP": gnd = comparison_arg.most_specific_head() veh = comparison_arg[0, 1, 0, 1, 0, 1, 0].head() else: gnd = None # we accept partial matches that don't find the gnd except: sbj = GlarfTree("", []) veh = GlarfTree("", []) gnd = None return { "T": sbj[0].print_flat(False, False, False, False) if sbj else "", "E": rel.head, "C": comparison_arg.leaves()[0], "V": veh.print_flat(False, False, False, False) if veh else "", "P": gnd.print_flat(False, False, False, False) if gnd else "", }
def get_args(rel, comparison_arg): try: sbj = _get_sbj(rel)[0].most_specific_head() if len(comparison_arg) > 1: veh = comparison_arg[1].most_specific_head() else: veh = comparison_arg[0][1].most_specific_head() if comparison_arg.node == 'ADJP': gnd = comparison_arg.head()[0].head() veh = comparison_arg[1, 0, 1, 0, 1, 0].head() elif comparison_arg[0].node == 'ADJP': gnd = comparison_arg.most_specific_head() veh = comparison_arg[0, 1, 0, 1, 0, 1, 0].head() else: gnd = None # we accept partial matches that don't find the gnd except: sbj = GlarfTree('', []) veh = GlarfTree('', []) gnd = None return { 'T': sbj[0].print_flat(False, False, False, False) if sbj else "", 'E': rel.head, 'C': comparison_arg.leaves()[0], 'V': veh.print_flat(False, False, False, False) if veh else "", 'P': gnd.print_flat(False, False, False, False) if gnd else "", }
def __init__(self, glarf_parses, glarf_tuples=None): if not glarf_tuples: glarf_tuples = [[] for _ in glarf_parses] for s, t in zip(glarf_parses, glarf_tuples): tree = GlarfTree.glarf_parse(s, t) tree._forest = self self.append(tree)
# My wife, Mary, is beautiful. from pyglarf import GlarfTree apposite = GlarfTree( 'S', [GlarfTree('S-SBJ', [GlarfTree('NP', [GlarfTree('HEAD', [GlarfTree( 'NP', [GlarfTree('T-POS', [GlarfTree('PRP$', ['My', 'i', '0'])]), GlarfTree( 'HEAD', [GlarfTree('NN', ['wife', 'wife', '1'])]), GlarfTree('PTB2-POINTER', ['|0+1|'])])]), GlarfTree('PUNCTUATION', [GlarfTree('|,|', ['|,|', '', '2']) ]), GlarfTree('APPOSITE', [GlarfTree('NP', [GlarfTree('NAME', [GlarfTree( 'NNP', ['Mary', 'mary', '3'])]), GlarfTree('PTB2-POINTER', ['|3+1|']), GlarfTree('SEM-FEATURE', ['NHUMAN']), GlarfTree('NE-TYPE', ['PERSON']), GlarfTree('PATTERN', ['NAME']), GlarfTree('INDEX', ['5'])])]), GlarfTree( 'PTB2-POINTER', ['|0+2|']), GlarfTree('INDEX', ['4'])])]), GlarfTree( 'PUNCTUATION1', [GlarfTree('|,|', ['|,|', '', '4'])]), GlarfTree('PRD', [ GlarfTree('VP', [GlarfTree('L-SBJ', [GlarfTree('NP', [GlarfTree('EC-TYPE', ['INF']), GlarfTree('INDEX', ['4'])])]), GlarfTree('HEAD', [GlarfTree('VG', [GlarfTree('HEAD', [GlarfTree('VBZ', ['is', 'be', '5'])]), GlarfTree( 'P-ARG2', [GlarfTree('ADJP', [GlarfTree('EC-TYPE', ['PB']), GlarfTree( 'INDEX', ['7'])])]), GlarfTree('P-ARG1', [GlarfTree('NP', [GlarfTree( 'EC-TYPE', ['PB']), GlarfTree('INDEX', ['4'])])]), GlarfTree('INDEX', ['6']), GlarfTree('BASE', ['BE']), GlarfTree('VERB-SENSE', ['1']), GlarfTree('SENSE-NAME', ['"COPULA"'])])]), GlarfTree('PRD', [GlarfTree( 'ADJP', [GlarfTree('HEAD', [GlarfTree('JJ', ['beautiful', 'beautiful', '6']) ]), GlarfTree('PTB2-POINTER', ['|6+1|']), GlarfTree('INDEX', ['7'])])]), GlarfTree('PTB2-POINTER', ['|5+1|']), GlarfTree('TRANSPARENT', ['T'])])]), GlarfTree('PUNCTUATION2', [GlarfTree('|.|', ['|.|', '', '7'])]), GlarfTree( 'PTB2-POINTER', ['|0+3|']), GlarfTree('TREE-NUM', ['0']), GlarfTree( 'FILE-NAME', ['"tmp"']), GlarfTree('INDEX', ['0']), GlarfTree( 'SENTENCE-OFFSET', ['0'])]) nps = [np for np, _ in apposite.nps()]
"w", encoding="utf-8") matches = 0 dep_matches = 0 for ii, (sent, ctx, gf, gt, dep) in enumerate(zip(sents, ctxs, gfs, gts, dep_parse)): dep = dep[1] if ii % 20 == 0: print('.') if f: f.close() f = open('bnc_similes/{}/{:03d}.txt'.format(sys.argv[1], ii / 20), 'w', encoding='utf-8') try: tree = GlarfTree.glarf_parse(gf, gt) args = [get_args(*node) for node in find_comparison_nodes(tree)] args = [ arg_dict for arg_dict in args if arg_dict['C'].lower() == sys.argv[1] and 'V' in arg_dict and arg_dict['V'].strip() != "" ] except ValueError: args = [] continue dep_args = [ m for pat in patterns[:2] for m in match(dep, pat) if m['C'].form.lower() == sys.argv[1] ]
encoding="utf-8") only_dep = open("bnc_similes/{}/only_dep.txt".format(sys.argv[1]), "w", encoding="utf-8") matches = 0 dep_matches = 0 for ii, (sent, ctx, gf, gt, dep) in enumerate(zip(sents, ctxs, gfs, gts, dep_parse)): dep = dep[1] if ii % 20 == 0: print '.' if f: f.close() f = open('bnc_similes/{}/{:03d}.txt'.format(sys.argv[1], ii / 20), 'w', encoding='utf-8') try: tree = GlarfTree.glarf_parse(gf, gt) args = [get_args(*node) for node in find_comparison_nodes(tree)] args = [arg_dict for arg_dict in args if arg_dict['C'].lower() == sys.argv[1] and 'V' in arg_dict and arg_dict['V'].strip() != ""] except ValueError: args = [] continue dep_args = [m for pat in patterns[:2] for m in match(dep, pat) if m['C'].form.lower() == sys.argv[1]] if args: matches += 1 if dep_args: dep_matches += 1
def test_error_parse(): """Test that GLARF error parses raise exceptions""" GlarfTree.glarf_parse("((***ERROR***))")
def test_leaves(): """Test that all PTB leaves are captured""" assert_equal(GlarfTree.glarf_parse(test_sentence).print_flat( pos=False, lemma=False, structure=False), 'In/0 |2003|/1 |,|/2 Yahoo/3 !/4 acquired/5 Overture/6 for/7 ' '$/8 |1.63|/9 billion/10 |.|/11')
def test_unsuccessful_parse(): """...where the default parse fails""" GlarfTree.parse(test_sentence)
def test_successful_parse(): """Test that glarf_parse succeeds...""" GlarfTree.glarf_parse(test_sentence)