def post_process(x): "Converts list of lists set into nicely formatted `Tree` objects." if not isinstance(x, tuple): return x [body, label] = x if isinstance(body, tuple): return ImmutableTree(label, list(map(post_process, body))) else: return ImmutableTree(label, [body])
def derivation(x): """Post-process `Viterbi` and `Point` derivations into nicely formatted `Tree` objects. """ if not isinstance(x.d, (list, tuple)): return x.d assert len(x.d) == 2 label = x.d[1].d body = x.d[0].d if isinstance(body, (list, tuple)): return ImmutableTree(label, list(map(derivation, body))) else: return ImmutableTree(label, [body])
def main(): intree = ImmutableTree("(foo bar)") outtree = ImmutableTree("(bar foo)") mutable = Tree("(foo bar)") imm = immutable(mutable) print(imm) rules = loadrules("testrules.yaml") setofrules = set() for rule in rules: setofrules.add(rule) print(setofrules) setofrules = frozenset(setofrules) ## XXX(alexr): do we need to make rules be sets? ## ah geez. let's just use frozenrules. print(produce(intree, outtree, setofrules, "q0", (0), (1)))
def test_grammar(): grammar = RuleSet() grammar.add(BinarizedRule("S", "NP", "VP", 0, 0, (0, 0))) grammar.add_unary(UnaryRule("NP", "N")) grammar.add_unary(UnaryRule("VP", "V")) grammar.finish() tree = Tree.fromstring("(S^2 (NP^1 (N^1 dog^1)) (VP^2 (V^2 flies^2)))") encoder = LexicalizedCFGEncoder(["dog", "flies"], ["N", "V"], grammar) parts = encoder.transform_structure(tree) print parts tree2 = encoder.from_parts(parts) print tree print tree2 assert (tree == tree2)
def main(): """ a basic REPL for testing """ corpus = """(S (NP John) (VP (V likes) (NP Mary))) (S (NP Peter) (VP (V hates) (NP Susan))) (S (NP Harry) (VP (V eats) (NP pizza))) (S (NP Hermione) (VP (V eats)))""".splitlines() corpus = """(S (NP (DT The) (NN cat)) (VP (VBP saw) (NP (DT the) (JJ hungry) (NN dog)))) (S (NP (DT The) (JJ little) (NN mouse)) (VP (VBP ate) (NP (DT the) (NN cat))))""".splitlines() #corpus = """(S (NP mary) (VP walks) (AP quickly))""".splitlines() #(S (NP Harry) (VP (V likes) (NP Susan) (ADVP (RB very) (RB much)))) corpus = [Tree(a) for a in corpus] #d = GoodmanDOP(corpus, rootsymbol='S') from bitpar import BitParChartParser d = GoodmanDOP(corpus, rootsymbol='TOP', wrap='TOP', parser=BitParChartParser) #d = GoodmanDOP(corpus, rootsymbol='TOP', wrap='TOP') #print d.grammar print "corpus" for a in corpus: print a w = "foo!" while w: print "sentence:", w = raw_input().split() try: p = FreqDist() for n, a in enumerate(d.parser.nbest_parse(w)): if n > 1000: break print a p.inc(ImmutableTree.convert(removeids(a)), a.prob()) #for b, a in sorted((b,a) for (a,b) in p.items()): # print a, b print print 'best', p.max(), p[p.max()] #print d.parse(w) except Exception: # as e: print "error", #e
def immutable(tr): """Given a Tree, make it an ImmutableTree.""" str = tr.pprint(margin=10000) return ImmutableTree(str)
def test_PreterminalJapaneseTypedVarImmutableTree(self): tree = ImmutableTree.fromstring(u'(学生 hello)') subtree = tree_or_string(u'?x0|学生') self.assertTrue(TreeContains(tree, subtree))