def test_one_fcp_nt_context_exact1(self): grammar = cfg.load_from_file("../data/cfgs/abab2.cfg") nonterminal = "O" context = (("a1", ), ("b1", )) self.assertFalse( cfgfcp.test_one_fcp_nt_context_exact(grammar, nonterminal, context))
def test_parse_other_nt(self): grammar = cfg.load_from_file("../data/cfgs/cfg3.cfg") parser = earleyparser.EarleyParser(grammar) self.assertTrue(parser.parse(("ax", "c", "bx"))) self.assertFalse(parser.parse_start(("ax", "c", "bx"), "A")) self.assertTrue(parser.parse(("b", "b"))) self.assertTrue(parser.parse_start(("b", "b"), "A"))
def test_one_fcp_nt_context_exact2(self): grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") nonterminal = "S" context = (("ax", ), ("bx", )) self.assertTrue( cfgfcp.test_one_fcp_nt_context_exact(grammar, nonterminal, context))
def testconvert_cfg_lengths(self): grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") count = 100 length_distribution = [0, 1, 0, 10, 0, 10, 0] pcfg = generatepcfg.convert_cfg_lengths(grammar, length_distribution, count) el = pcfg.isConsistent() self.assertTrue(el < 5 and el > 3)
def test_trim2(self): grammar = cfg.load_from_file("../data/cfgs/trimerror.cfg") trimset = grammar.compute_trim_set() self.assertTrue("S" in trimset) trim = grammar.trim() self.assertTrue("S" in trim.nonterminals) self.assertFalse("Nc_0->0" in trim.nonterminals) self.assertFalse("Nc_0->0" in trim.nonterminals)
def test_smart_infix(self): grammar = cfg.load_from_file("../data/cfgs/abab2.cfg") # sample from the ones that don't have w1 = ("a1", "b1") nonterminal = "O" ig = grammar.infix_grammar_without_nt(w1, nonterminal) self.assertTrue(len(ig.nonterminals) > 0) sampler = uniformsampler.UniformSampler(ig, 20) self.assertEqual(sampler.get_total(3), 2)
def test_nullParser2(self): grammar = cfg.load_from_file("../data/cfgs/null2.cfg") parser = earleyparser.EarleyParser(grammar) self.assertTrue(parser.parse(())) self.assertTrue(parser.parse(("a", "b", "c"))) self.assertTrue(parser.parse(("b", "c"))) self.assertTrue(parser.parse(("a", ))) self.assertTrue(parser.parse(("a", "c"))) self.assertFalse(parser.parse(("c", "a")))
def test_uniformsampler2(self): grammar = cfg.load_from_file("../data/cfgs/count1.cfg") sampler = uniformsampler.UniformSampler(grammar, 10) #sampler.dump() self.assertEqual(sampler.get("S", 0), 1) self.assertEqual(sampler.get("S", 1), 2) self.assertEqual(sampler.get("S", 2), 1) self.assertEqual(sampler.get("S", 3), 0) self.assertEqual(sampler.get("S", 5), 0)
def test_count_parses(self): grammar = cfg.load_from_file("../data/cfgs/sigmaplus2.cfg") parser = earleyparser.EarleyParser(grammar) w = ("a", "b", "a", "a", "b", "a", "a", "a", "a", "b", "a", "a", "b", "a", "a", "a") x = parser.parse_forest(w) self.assertEqual(1, len(x)) self.assertEqual(x[0].count_trees(), 9694845.0)
def test_fkp_strong_1nt(self): """ Test finding strings for a given NT. """ grammar = cfg.load_from_file("../data/cfgs/abab2.cfg") parser = earleyparser.EarleyParser(grammar) sampler = uniformsampler.UniformSampler(grammar, 100) k = 2 n = 5 nonterminal = "O"
def test_fkp_strong_fkp_strings(self): """ Test whether given strings characterise a given nonterminal. """ grammar = cfg.load_from_file("../data/cfgs/abab2.cfg") parser = earleyparser.EarleyParser(grammar) w1 = ("a1", "b1") w2 = ("a1", "a1", "b1", "b1") w3 = ("a2", "b2") nonterminal = "O"
def test_intersect_contexts_of_strings(self): """ Test that we can intersect the contexts of a set of strings correctly. """ grammar = cfg.load_from_file("../data/cfgs/abab2.cfg") parser = earleyparser.EarleyParser(grammar) w1 = ("a1", "b1") w2 = ("a1", "a1", "b1", "b1") w3 = ("a1", "b1", "b1") w4 = ("a1", "a1", "b1", "b1", "b1", "b1")
def test_parser(self): """ Basic tests of the Earley parser. """ grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") parser = earleyparser.EarleyParser(grammar) self.assertTrue(parser.parse(("ax", "c", "bx"))) self.assertTrue(parser.parse(("c", ))) self.assertFalse(parser.parse(("ax", ))) self.assertFalse(parser.parse(()))
def test_nullable1(self): """ Test that it can load a file correctly. """ grammar = cfg.load_from_file("../data/cfgs/cfg2.cfg") self.assertEqual(len(grammar.terminals), 4) self.assertEqual(len(grammar.nonterminals), 1) n = grammar.compute_nullable() self.assertEqual(len(n), 1)
def test_uniformsampler(self): grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") sampler = uniformsampler.UniformSampler(grammar, 20) self.assertEqual(sampler.get("S", 0), 0) self.assertEqual(sampler.get("S", 1), 1) self.assertEqual(sampler.get("S", 2), 0) self.assertEqual(sampler.get("S", 3), 2) self.assertEqual(sampler.get("S", 5), 4) #print "about to sample." tree = sampler.sample(5) self.assertEqual(tree.width(), 5)
def test_parse_context(self): grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") parser = earleyparser.EarleyParser(grammar) #logging.basicConfig( filename='parser.log', level=logging.INFO) root = logging.getLogger() #root.setLevel(logging.INFO) #logging.basicConfig(filename='parser.log', level=logging.INFO) logging.info('Started test_parse_context') self.assertTrue( parser.parse_nonterminal_context(("ax", ), "S", ("bx", ))) logging.info('Ended test_parse_context')
def test_intersection1(self): grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") prefix = ("ax", ) pg = grammar.prefix_grammar(prefix) #print "Dump prefix grammar" #pg.dump() self.assertTrue("S" in pg.nonterminals) sampler = uniformsampler.UniformSampler(pg, 10) #print "Dumping intersected sampler" #sampler.dump() self.assertEqual(sampler.get("S", 0), 0)
def test_nullable2(self): """ Test that it can load a file correctly. """ grammar = cfg.load_from_file("../data/cfgs/cfg3.cfg") n = grammar.compute_nullable() self.assertEqual(len(n), 4) cor = grammar.compute_coreachable() self.assertEqual(len(cor), 4) trim = grammar.compute_trim_set() self.assertEqual(len(trim), 3)
def test_context_sampler1(self): grammar = cfg.load_from_file("../data/cfgs/cfg6.cfg") us = uniformsampler.UniformSampler(grammar, 5) cs = uniformsampler.ContextSampler(grammar, us, 5) self.assertEqual(cs.index["B"][0], 0) self.assertEqual(cs.index["S"][0], 1) self.assertEqual(cs.index["B"][2], 3) context0 = cs.sample_context("S", 0) self.assertEqual(context0, ((), ())) for i in xrange(100): l, r = cs.sample_context("B", 2) #print l, "---", r self.assertEqual(len(l) + len(r), 2)
def predictor(self, item): """ Incomplete item looking for a nonterminal. """ nonterminal = next_category(item) if nonterminal in self.nullable: logging.info("Nullable item %s", nonterminal) # Aycock and Horspool new_item = (item[0], item[1], item[2] + 1, item[3], item[4]) self.enqueue(new_item) for prod in self.grammar.productions: if prod[0] == nonterminal: new_item = (prod[0], prod[1], 0, item[4], item[4]) logging.info("Predicting %s", to_string(new_item)) self.enqueue(new_item) if __name__ == '__main__': parser = argparse.ArgumentParser(description=""" Interactive Earley parser for an arbitrary CFG. """) parser.add_argument("grammar", help="File containing pcfg") args = parser.parse_args() grammar = cfg.load_from_file(args.grammar) parser = EarleyParser(grammar) while True: line = raw_input("Type in line:") print parser.parse(tuple(line.split()))
if nonterminal in self.nullable: logging.info("Nullable item %s", nonterminal) # Aycock and Horspool new_item = (item[0],item[1],item[2]+1, item[3],item[4]) self.enqueue(new_item) for prod in self.grammar.productions: if prod[0] == nonterminal: new_item = (prod[0],prod[1],0,item[4],item[4]) logging.info("Predicting %s", to_string(new_item)) self.enqueue(new_item) if __name__ == '__main__': parser = argparse.ArgumentParser(description=""" Interactive Earley parser for an arbitrary CFG. """) parser.add_argument("grammar", help="File containing pcfg") args = parser.parse_args() grammar = cfg.load_from_file(args.grammar) parser = EarleyParser(grammar) while True: line = raw_input("Type in line:") print parser.parse(tuple(line.split()))
def test_errormay(self): grammar = cfg.load_from_file("../data/cfgs/parseerror1.cfg") parser = earleyparser.EarleyParser(grammar) w = ('pl', 'bc', 'zd', 'cl') self.assertTrue(parser.parse(w))
def test_rightrecursion(self): grammar = cfg.load_from_file("../data/cfgs/rr.cfg") parser = earleyparser.EarleyParser(grammar) self.assertFalse(parser.parse(())) self.assertTrue(parser.parse(("a", "x", "x", "b")))
def test_context_sampler(self): grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") cs = uniformsampler.CrudeContextSampler(grammar, "S", 20) #print cs.sampler #cs.sampler.dump() self.assertEqual(cs.sampler.get_total(1), 1)
def test_nullParser(self): grammar = cfg.load_from_file("../data/cfgs/null1.cfg") parser = earleyparser.EarleyParser(grammar) self.assertTrue(parser.parse(()))
def test_look_at_distribution_of_nt(self): grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") parser = earleyparser.EarleyParser(grammar) d = cfgfcp.look_at_distribution_of_nt(grammar, parser, "S", 10) self.assertTrue(d < 10)
def test_one_fkp_nt_string_exact1(self): grammar = cfg.load_from_file("../data/cfgs/abab2.cfg") nonterminal = "O" w = ("a1", "b1") self.assertFalse( cfgfcp.test_one_fkp_nt_string_exact(grammar, nonterminal, w))
def test_full_1fcp_exact2(self): grammar = cfg.load_from_file("../data/cfgs/abab2.cfg") self.assertFalse(cfgfcp.test_one_fcp_exact(grammar, 10))
def test_full_1fcp_exact(self): grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") self.assertTrue(cfgfcp.test_one_fcp_exact(grammar, 1))
def test_full_1fkp_exact3(self): grammar = cfg.load_from_file("../data/cfgs/atwice.cfg") self.assertTrue(cfgfcp.test_one_fkp_exact(grammar, 10))
def test_one_fkp_nt_string_exact3(self): grammar = cfg.load_from_file("../data/cfgs/cfg5.cfg") nonterminal = "S" w = ("ax", "bx") self.assertFalse( cfgfcp.test_one_fkp_nt_string_exact(grammar, nonterminal, w))