def test_parse_other_nt(self): grammar = cfg.load_from_file("../data/cfgs/cfg3.cfg") parser = earleyparser.EarleyParser(grammar) self.assertTrue(parser.parse(("ax", "c", "bx"))) self.assertFalse(parser.parse_start(("ax", "c", "bx"), "A")) self.assertTrue(parser.parse(("b", "b"))) self.assertTrue(parser.parse_start(("b", "b"), "A"))
def test_nullParser2(self): grammar = cfg.load_from_file("../data/cfgs/null2.cfg") parser = earleyparser.EarleyParser(grammar) self.assertTrue(parser.parse(())) self.assertTrue(parser.parse(("a", "b", "c"))) self.assertTrue(parser.parse(("b", "c"))) self.assertTrue(parser.parse(("a", ))) self.assertTrue(parser.parse(("a", "c"))) self.assertFalse(parser.parse(("c", "a")))
def test_parser(self): """ Basic tests of the Earley parser. """ grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") parser = earleyparser.EarleyParser(grammar) self.assertTrue(parser.parse(("ax", "c", "bx"))) self.assertTrue(parser.parse(("c", ))) self.assertFalse(parser.parse(("ax", ))) self.assertFalse(parser.parse(()))
def test_fkp_strong_1nt(self): """ Test finding strings for a given NT. """ grammar = cfg.load_from_file("../data/cfgs/abab2.cfg") parser = earleyparser.EarleyParser(grammar) sampler = uniformsampler.UniformSampler(grammar, 100) k = 2 n = 5 nonterminal = "O"
def test_fkp_strong_fkp_strings(self): """ Test whether given strings characterise a given nonterminal. """ grammar = cfg.load_from_file("../data/cfgs/abab2.cfg") parser = earleyparser.EarleyParser(grammar) w1 = ("a1", "b1") w2 = ("a1", "a1", "b1", "b1") w3 = ("a2", "b2") nonterminal = "O"
def test_intersect_contexts_of_strings(self): """ Test that we can intersect the contexts of a set of strings correctly. """ grammar = cfg.load_from_file("../data/cfgs/abab2.cfg") parser = earleyparser.EarleyParser(grammar) w1 = ("a1", "b1") w2 = ("a1", "a1", "b1", "b1") w3 = ("a1", "b1", "b1") w4 = ("a1", "a1", "b1", "b1", "b1", "b1")
def test_count_parses(self): grammar = cfg.load_from_file("../data/cfgs/sigmaplus2.cfg") parser = earleyparser.EarleyParser(grammar) w = ("a", "b", "a", "a", "b", "a", "a", "a", "a", "b", "a", "a", "b", "a", "a", "a") x = parser.parse_forest(w) self.assertEqual(1, len(x)) self.assertEqual(x[0].count_trees(), 9694845.0)
def test_parse_context(self): grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") parser = earleyparser.EarleyParser(grammar) #logging.basicConfig( filename='parser.log', level=logging.INFO) root = logging.getLogger() #root.setLevel(logging.INFO) #logging.basicConfig(filename='parser.log', level=logging.INFO) logging.info('Started test_parse_context') self.assertTrue( parser.parse_nonterminal_context(("ax", ), "S", ("bx", ))) logging.info('Ended test_parse_context')
def test_strong_fkp_full(grammar, k): """ Main entry point for the primal tester. Returns a tuple (True|False, map[ nonterminals to k-tuples of strings ]) """ result = dict() parser = earleyparser.EarleyParser(grammar) sampler = uniformsampler.UniformSampler(grammar, max_substring_length) ncontexts = 25 for nt in grammar.nonterminals: r = test_strong_fkp_nt(grammar, parser, sampler, nt, k, ncontexts, stop=True) if r: result[nt] = r else: return False return result
def test_strong_fcp_full(grammar, k): """ Main entry point for the dual tester. Method: """ result = dict() parser = earleyparser.EarleyParser(grammar) sampler = uniformsampler.UniformSampler(grammar, max_substring_length) contextsampler = uniformsampler.ContextSampler(grammar, sampler, max_context_length) ncontexts = 25 for nt in grammar.nonterminals: r = test_strong_fcp_nt(grammar, parser, sampler, contextsampler, nt, k, ncontexts, stop=True) if r: print "nt", nt, r result[nt] = r else: print "Fail ", nt return False return result
def string_density(self, length, samples): """ return an estimate of the proportion of strings of length n that are in the grammar. Do this by sampling uniformly from the derivations, and computing the number of derivations for each such string, and dividing. """ derivations = self.get_total(length) strings = 1.0 * self.vocab**length total = 0.0 parser = earleyparser.EarleyParser(self.grammar) for i in xrange(samples): tree = self.sample(length) w = tree.collectYield() print w #print w n = parser.count_parses(w) print n if n == 0: tree.dump() print "Bad number of parses", n raise ValueError(w) if n < 0: print "infinite number of trees for ", w self.grammar.dump() total += n # mean derivations per string. mean = total / samples if mean <= 0.0: raise ValueError(mean) # the mean number of derivations per string. print "l", length, "derivations ", derivations, "mean ", mean, " possible strings ", strings return (derivations / strings) / mean
def test_errormay(self): grammar = cfg.load_from_file("../data/cfgs/parseerror1.cfg") parser = earleyparser.EarleyParser(grammar) w = ('pl', 'bc', 'zd', 'cl') self.assertTrue(parser.parse(w))
def test_rightrecursion(self): grammar = cfg.load_from_file("../data/cfgs/rr.cfg") parser = earleyparser.EarleyParser(grammar) self.assertFalse(parser.parse(())) self.assertTrue(parser.parse(("a", "x", "x", "b")))
def test_nullParser(self): grammar = cfg.load_from_file("../data/cfgs/null1.cfg") parser = earleyparser.EarleyParser(grammar) self.assertTrue(parser.parse(()))
def test_look_at_distribution_of_nt(self): grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg") parser = earleyparser.EarleyParser(grammar) d = cfgfcp.look_at_distribution_of_nt(grammar, parser, "S", 10) self.assertTrue(d < 10)
def test_cfg4(self): grammar = cfg.load_from_file("../data/cfgs/cfg4.cfg") parser = earleyparser.EarleyParser(grammar) self.assertTrue(parser.parse(())) self.assertTrue(parser.parse(("b", "b")))