def test_to_text_cnf(self): cfg = CFG.from_text("S -> a S b | a b") cnf = cfg.to_normal_form() self.assertTrue(cnf.contains(["a", "b"])) new_text = cnf.to_text() print(new_text) new_cfg = CFG.from_text(new_text) self.assertTrue(new_cfg.contains(["a", "b"]))
def test_from_text2(self): text = """ S -> A B\n\rA -> a B -> b\r """ cfg = CFG.from_text(text) self.assertTrue(cfg.contains(["a", "b"]))
def cfg_from_text(source: str, start_symbol: Variable = Variable("S")) -> CFG: """Create a context-free grammar [1]_ from text. Parameters ---------- source : str The text with which the context-free grammar will be created. start_symbol : Variable Start symbol of a context-free grammar. Examples -------- >>> import cfpq_data >>> cfg = cfpq_data.cfg_from_text("S -> a S b S") >>> cfpq_data.cfg_to_text(cfg) 'S -> a S b S\\n' Returns ------- cfg : CFG Context-free grammar. References ---------- .. [1] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions """ return CFG.from_text(source, start_symbol)
def cfg_from_cnf(cnf: CFG) -> CFG: """Create a context-free grammar [2]_ from given context-free grammar in Chomsky normal form [1]_. Parameters ---------- cnf : CFG Context free grammar in Chomsky normal form. Examples -------- >>> import cfpq_data >>> cnf = cfpq_data.cnf_from_text("S -> a S b S | epsilon") >>> cfg = cfpq_data.cfg_from_cnf(cnf) >>> [cfg.contains(word) for word in ["", "ab", "aabb"]] [True, True, True] Returns ------- cfg : CFG Context-free grammar. References ---------- .. [1] https://en.wikipedia.org/wiki/Chomsky_normal_form .. [2] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions """ return CFG.from_text(cnf.to_text(), cnf.start_symbol)
def test_get_first_set2(self): # Example from: # https://www.geeksforgeeks.org/first-set-in-syntax-analysis/ text = """ S -> A C B | C b b | B a A -> d a | B C B -> g | Є C -> h | Є """ cfg = CFG.from_text(text) llone_parser = LLOneParser(cfg) first_set = llone_parser.get_first_set() self.assertEqual(first_set[Variable("S")], {Terminal(x) for x in {"d", "g", "h", "b", "a"} }.union({Epsilon()})) self.assertEqual(first_set[Variable("A")], {Terminal(x) for x in {"d", "g", "h"}}.union({Epsilon()})) self.assertEqual(first_set[Variable("B")], {Terminal(x) for x in {"g"}}.union({Epsilon()})) self.assertEqual(first_set[Variable("C")], {Terminal(x) for x in {"h"}}.union({Epsilon()}))
def test_get_follow_set(self): # Example from: # https://www.geeksforgeeks.org/follow-set-in-syntax-analysis/ text = """ E -> T E’ E’ -> + T E’ | Є T -> F T’ T’ -> * F T’ | Є F -> ( E ) | id """ cfg = CFG.from_text(text, start_symbol="E") llone_parser = LLOneParser(cfg) follow_set = llone_parser.get_follow_set() self.assertEqual(follow_set[Variable("E")], {"$", Terminal(")")}) self.assertEqual(follow_set[Variable("E’")], {"$", Terminal(")")}) self.assertEqual( follow_set[Variable("T")], {"$", Terminal("+"), Terminal(")")}) self.assertEqual( follow_set[Variable("T’")], {"$", Terminal("+"), Terminal(")")}) self.assertEqual( follow_set[Variable("F")], {"$", Terminal("+"), Terminal("*"), Terminal(")")})
def test_get_llone_table(self): # Example from: # https://www.geeksforgeeks.org/construction-of-ll1-parsing-table/ text = """ E -> T E’ E’ -> + T E’ | Є T -> F T’ T’ -> * F T’ | Є F -> ( E ) | id """ cfg = CFG.from_text(text, start_symbol="E") llone_parser = LLOneParser(cfg) parsing_table = llone_parser.get_llone_parsing_table() self.assertEqual( len( parsing_table.get(Variable("E"), dict()).get(Terminal("id"), [])), 1) self.assertEqual( len( parsing_table.get(Variable("E"), dict()).get(Terminal("+"), [])), 0) self.assertEqual( len( parsing_table.get(Variable("T’"), dict()).get(Terminal(")"), [])), 1) self.assertEqual( len( parsing_table.get(Variable("F"), dict()).get(Terminal("("), [])), 1) self.assertEqual( len( parsing_table.get(Variable("F"), dict()).get(Terminal("id"), [])), 1)
def test_infinite_recursion(self): cfg = CFG.from_text(""" S -> S E """) parser = RecursiveDecentParser(cfg) with self.assertRaises(RecursionError): parser.is_parsable([")"]) self.assertFalse(parser.is_parsable([")"], left=False))
def setUp(self) -> None: cfg = CFG.from_text(""" E -> S + S E -> S * S S -> ( E ) S -> int """) self.parser = RecursiveDecentParser(cfg)
def from_file(path: str, start_symbol=Variable("S"), is_reduced: bool = False): file = open(path) productions = '' for production in file.read().split('\n'): head, product = (production, 'epsilon') if production.find(' ') == -1 else (production.split(' ', 1)) productions += f'{head} -> {product}\n' file.close() cfg = CFG.from_text(productions, start_symbol) return CNF(cfg, is_reduced)
def test_is_not_normal_form(self): text = """ E -> T E’ E’ -> + T E’ | Є T -> F T’ T’ -> * F T’ | Є F -> ( E ) | id """ cfg = CFG.from_text(text, start_symbol="E") self.assertFalse(cfg.is_normal_form())
def test_is_normal_form(self): text = """ E -> T E’ E’ -> T E’ T -> F T’ T’ -> * F -> ( | id """ cfg = CFG.from_text(text, start_symbol="E") self.assertTrue(cfg.is_normal_form())
def test_from_text(self): text = """ S -> A B A -> Bobo r """ cfg = CFG.from_text(text) self.assertEqual(len(cfg.variables), 4) self.assertEqual(len(cfg.productions), 2) self.assertEqual(len(cfg.terminals), 1) self.assertEqual(cfg.start_symbol, Variable("S"))
def test_is_not_llone_parsable(self): # Example from: # https://www.geeksforgeeks.org/construction-of-ll1-parsing-table/ text = """ S -> A | a A -> a """ cfg = CFG.from_text(text, start_symbol="E") llone_parser = LLOneParser(cfg) self.assertFalse(llone_parser.is_llone_parsable())
def test_from_cfg(self): # g1: S -> a S b | a b rsa1_g1 = RecursiveAutomaton.from_cfg( CFG.from_text("S -> a S b | a b")) rsa2_g1 = RecursiveAutomaton.from_regex(Regex("a S b | a b"), Symbol("S")) self.assertEqual(rsa1_g1, rsa2_g1) # g2: S -> a V b # V -> c S d | c d rsa1_g2 = RecursiveAutomaton.from_cfg( CFG.from_text("S -> a V b\nV -> c S d | c d")) self.assertEqual(rsa1_g2.get_number_of_boxes(), 2) self.assertEqual(rsa1_g2.labels, {Symbol("S"), Symbol("V")}) dfa_S = Regex("a V b").to_epsilon_nfa().minimize() self.assertEqual(rsa1_g2.get_box(Symbol("S")), Box(dfa_S, Symbol("S"))) dfa_V = Regex("c S d | c d").to_epsilon_nfa().minimize() self.assertEqual(rsa1_g2.get_box(Symbol("V")), Box(dfa_V, Symbol("V")))
def read_grammar_from_file(file_path): productions = [] with open(file_path, 'r') as file: for line in file: raw_current_production = line.split() # "S a S b S" to "S -> a S b S" current_production = raw_current_production[ 0] + ' -> ' + ' '.join(raw_current_production[1:]) productions.append(current_production) productions = '\n'.join(productions) return CFG.from_text(productions)
def test_is_llone_parsable(self): # Example from: # https://www.geeksforgeeks.org/construction-of-ll1-parsing-table/ text = """ E -> T E’ E’ -> + T E’ | Є T -> F T’ T’ -> * F T’ | Є F -> ( E ) | id """ cfg = CFG.from_text(text, start_symbol="E") llone_parser = LLOneParser(cfg) self.assertTrue(llone_parser.is_llone_parsable())
def test_save_tree(self): text = """ E -> T E' E' -> + T E' | epsilon T -> F T' T' -> * F T' | epsilon F -> ( E ) | id """ cfg = CFG.from_text(text, start_symbol="E") llone_parser = LLOneParser(cfg) parse_tree = llone_parser.get_llone_parse_tree( ["id", "+", "id", "*", "id"]) parse_tree.write_as_dot("parse_tree.dot") self.assertTrue(path.exists("parse_tree.dot"))
def test_to_text(self): text = """E -> T E’ E’ -> T E’ T -> F T’ T’ -> * F -> ( | id """ text_result = CFG.from_text(text, start_symbol="E").to_text() self.assertIn("E -> T E’", text_result) self.assertIn("E’ -> T E’", text_result) self.assertIn("T -> F T’", text_result) self.assertIn("T’ -> *", text_result) self.assertIn("F -> (", text_result) self.assertIn("F -> id", text_result)
def test_get_llone_parse_tree(self): text = """ E -> T E’ E’ -> + T E’ | Є T -> F T’ T’ -> * F T’ | Є F -> ( E ) | id """ cfg = CFG.from_text(text, start_symbol="E") llone_parser = LLOneParser(cfg) parse_tree = llone_parser.get_llone_parse_tree( ["id", "+", "id", "*", "id"]) self.assertEqual(parse_tree.value, Variable("E")) self.assertEqual(len(parse_tree.sons), 2)
def test_from_text(automatic_suite): gr = automatic_suite cfg = CFG.from_text(gr) mycnf = MyCNF.from_text(gr) def check(): for word in cfg.get_words(13): if (len(word) == 0) and (mycnf.generate_epsilon() is not True): return False elif mycnf.contains(word) is not True: return False return True assert check()
def from_text(cls, text: str, start_symbol = "S") -> "ContextFreeQuery": res = cls() text = text.replace("eps", "$") lines = text.splitlines() heads = list(map(lambda line: line.split("->")[0].strip(), lines)) bodies = list(map(lambda line: line.split("->")[1].strip(), lines)) heads_and_bodies = dict() for i in range(len(lines)): if heads[i] in heads_and_bodies.keys(): heads_and_bodies[heads[i]] += " | " + bodies[i] else: heads_and_bodies[heads[i]] = bodies[i] for head, body in heads_and_bodies.items(): heads_and_bodies[head] = Regex(body) text = cls._from_heads_and_regexes(heads_and_bodies.items()) res._cfg = CFG.from_text(text, Variable(start_symbol)) return res
def test_from_txt(automatic_suite, tmp_path): gr = automatic_suite gr_file = tmp_path / 'grammar.txt' gr_file.write_text(gr.replace(' ->', '')) cfg = CFG.from_text(gr) mycnf = MyCNF.from_txt(gr_file) def check(): for word in cfg.get_words(13): if (len(word) == 0) and (mycnf.generate_epsilon() is not True): return False elif mycnf.contains(word) is not True: return False return True assert check()
def test_get_first_set(self): # Example from: # https://www.geeksforgeeks.org/first-set-in-syntax-analysis/ text = """ E -> T E’ E’ -> + T E’ | Є T -> F T’ T’ -> * F T’ | Є F -> ( E ) | id """ cfg = CFG.from_text(text) llone_parser = LLOneParser(cfg) first_set = llone_parser.get_first_set() self.assertEqual(first_set[Variable("E")], {Terminal("("), Terminal("id")}) self.assertEqual(first_set[Variable("E’")], {Terminal("+"), Epsilon()}) self.assertEqual(first_set[Variable("T")], {Terminal("("), Terminal("id")}) self.assertEqual(first_set[Variable("T’")], {Terminal("*"), Epsilon()}) self.assertEqual(first_set[Variable("F")], {Terminal("("), Terminal("id")})
def read_cfgrammar(name): file = open(name, 'r') s = '' cfg_from_regex = [] for line in file: if 'regexp' in line: line = line.replace('regexp', "") head = line.split(" -> ")[0] regex = Regex(line.split(" -> ")[1][:-1]) cfg_from_regex.append(regex.to_cfg(starting_symbol=head)) else: s += line file.close() cfg = CFG.from_text(s) for c in cfg_from_regex: cfg = CFG(cfg.variables.union(c.variables), cfg.terminals.union(c.terminals), cfg.start_symbol, cfg.productions.union(c.productions)) return cfg
def test_get_follow_set2(self): # Example from: # https://www.geeksforgeeks.org/follow-set-in-syntax-analysis/ text = """ S -> A C B | C b b | B a A -> d a | B C B -> g | Є C -> h | Є """ cfg = CFG.from_text(text) llone_parser = LLOneParser(cfg) follow_set = llone_parser.get_follow_set() print(follow_set) self.assertEqual(follow_set["S"], {"$"}) self.assertEqual(follow_set["A"], {"$", Terminal("h"), Terminal("g")}) self.assertEqual( follow_set["B"], {"$", Terminal("h"), Terminal("g"), Terminal("a")}) self.assertEqual( follow_set["C"], {"$", Terminal("h"), Terminal("g"), Terminal("b")})
def test_llone_table_non_llone(self): text = """ S -> A | a A -> a """ cfg = CFG.from_text(text, start_symbol="E") llone_parser = LLOneParser(cfg) parsing_table = llone_parser.get_llone_parsing_table() self.assertEqual( len( parsing_table.get(Variable("S"), dict()).get(Terminal("a"), [])), 2) self.assertEqual( len( parsing_table.get(Variable("A"), dict()).get(Terminal("a"), [])), 1) self.assertEqual( len( parsing_table.get(Variable("S"), dict()).get(Terminal("$"), [])), 0) self.assertEqual( len( parsing_table.get(Variable("A"), dict()).get(Terminal("$"), [])), 0)
"""Sample grammars for RDF part of CFPQ_Data dataset. Introduced in `"Context-Free Path Queries on RDF Graphs" <https://arxiv.org/abs/1506.00743>`_ """ from pyformlang.cfg import CFG __all__ = [ "g1", "g2", "geo", ] #: $S \, \rightarrow \, \overline{subClassOf} \, S \, subClassOf \, \mid \, #: \overline{subClassOf} \, subClassOf \, \\ #: S \, \rightarrow \, \overline{type} \, S \, type \, \mid \, #: \overline{type} \, type \, \\$ g1 = CFG.from_text("S -> sco_r S sco | t_r S t | sco_r sco | t_r t") #: $S \, \rightarrow \, \overline{subClassOf} \, S \, subClassOf \, \mid \, #: \overline{subClassOf} \, subClassOf \, \\$ g2 = CFG.from_text("S -> sco_r S sco | sco") #: $S \, \rightarrow \, broaderTransitive \, S \, \overline{broaderTransitive} \, \mid \, #: broaderTransitive \, \overline{broaderTransitive} \, \\$ geo = CFG.from_text("S -> bt S bt_r | bt bt_r")
def test_sentence_cfg(self): cfg = CFG.from_text(""" S -> NP VP PUNC PUNC -> . | ! VP -> V NP V -> buys | touches | sees NP -> georges | jacques | léo | Det N Det -> a | an | the N -> gorilla | sky | carrots """) regex = Regex("georges touches (a|an) (sky|gorilla) !") cfg_inter = cfg.intersection(regex) self.assertFalse(cfg_inter.is_empty()) self.assertTrue(cfg_inter.is_finite()) self.assertFalse( cfg_inter.contains(["georges", "sees", "a", "gorilla", "."])) self.assertTrue( cfg_inter.contains(["georges", "touches", "a", "gorilla", "!"])) self.assertFalse(cfg_inter.is_normal_form()) cnf = cfg.to_normal_form() self.assertTrue(cnf.is_normal_form()) llone_parser = LLOneParser(cfg) parse_tree = llone_parser.get_llone_parse_tree( ["georges", "sees", "a", "gorilla", "."]) self.assertEqual( parse_tree.get_leftmost_derivation(), [[Variable("S")], [Variable("NP"), Variable("VP"), Variable("PUNC")], [Terminal("georges"), Variable("VP"), Variable("PUNC")], [ Terminal("georges"), Variable("V"), Variable("NP"), Variable("PUNC") ], [ Terminal("georges"), Terminal("sees"), Variable("NP"), Variable("PUNC") ], [ Terminal("georges"), Terminal("sees"), Variable("Det"), Variable("N"), Variable("PUNC") ], [ Terminal("georges"), Terminal("sees"), Terminal("a"), Variable("N"), Variable("PUNC") ], [ Terminal("georges"), Terminal("sees"), Terminal("a"), Terminal("gorilla"), Variable("PUNC") ], [ Terminal("georges"), Terminal("sees"), Terminal("a"), Terminal("gorilla"), Terminal(".") ]]) parse_tree.write_as_dot("parse_tree.dot")
def test_get_llone_rightmost_derivation(self): text = """ E -> T E’ E’ -> + T E’ | Є T -> F T’ T’ -> * F T’ | Є F -> ( E ) | id """ cfg = CFG.from_text(text, start_symbol="E") llone_parser = LLOneParser(cfg) parse_tree = llone_parser.get_llone_parse_tree( ["id", "+", "id", "*", "id"]) self.assertEqual(parse_tree.get_rightmost_derivation(), [ [Variable("E")], [Variable("T"), Variable("E’")], [Variable("T"), Terminal("+"), Variable("T"), Variable("E’")], [Variable("T"), Terminal("+"), Variable("T")], [Variable("T"), Terminal("+"), Variable("F"), Variable("T’")], [ Variable("T"), Terminal("+"), Variable("F"), Terminal("*"), Variable("F"), Variable("T’") ], [ Variable("T"), Terminal("+"), Variable("F"), Terminal("*"), Variable("F") ], [ Variable("T"), Terminal("+"), Variable("F"), Terminal("*"), Terminal("id") ], [ Variable("T"), Terminal("+"), Terminal("id"), Terminal("*"), Terminal("id") ], [ Variable("F"), Variable("T’"), Terminal("+"), Terminal("id"), Terminal("*"), Terminal("id") ], [ Variable("F"), Terminal("+"), Terminal("id"), Terminal("*"), Terminal("id") ], [ Terminal("id"), Terminal("+"), Terminal("id"), Terminal("*"), Terminal("id") ], ])