def _test_profiling_intersection(self): size = 50 states = [State(i) for i in range(size * 2 + 1)] symb_a = Symbol("a") symb_b = Symbol("b") dfa = DeterministicFiniteAutomaton(states, {symb_a, symb_b}, start_state=states[0], final_states={states[-1]}) for i in range(size): dfa.add_transition(states[i], symb_a, states[i + 1]) for i in range(size, size * 2): dfa.add_transition(states[i], symb_b, states[i + 1]) ter_a = Terminal("a") ter_b = Terminal("b") var_s = Variable("S") var_s1 = Variable("S1") var_l = Variable("L") productions = [ Production(var_s, [var_l, var_s1]), Production(var_l, [Epsilon()]), Production(var_s1, [ter_a, var_s1, ter_b]), Production(var_s1, [ter_b, var_s1, ter_a]), Production(var_s1, []) ] cfg = CFG(productions=productions, start_symbol=var_s) cfg_i = cfg.intersection(dfa) self.assertFalse(cfg_i.is_empty()) self.assertTrue(cfg_i.contains([ter_a] * size + [ter_b] * size)) self.assertFalse(cfg_i.contains([]))
def test_intersection_dfa2(self): state0 = State(0) symb_a = Symbol("a") symb_b = Symbol("b") dfa = DeterministicFiniteAutomaton({state0}, {symb_a, symb_b}, start_state=state0, final_states={state0}) dfa.add_transition(state0, symb_a, state0) dfa.add_transition(state0, symb_b, state0) self.assertTrue(dfa.accepts([symb_a, symb_a, symb_b, symb_b])) ter_a = Terminal("a") ter_b = Terminal("b") var_s = Variable("S") var_s1 = Variable("S1") var_l = Variable("L") productions = { Production(var_s, [var_l, var_s1]), Production(var_l, [Epsilon()]), Production(var_s1, [ter_a, var_s1, ter_b]), Production(var_s1, [ter_b, var_s1, ter_a]), Production(var_s1, []) } cfg = CFG(productions=productions, start_symbol=var_s) self.assertTrue(cfg.contains([ter_a, ter_a, ter_b, ter_b])) self.assertFalse(cfg.contains([ter_a, ter_a, ter_b])) cfg_i = cfg.intersection(dfa) self.assertFalse(cfg_i.is_empty()) self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b])) self.assertTrue(cfg_i.contains([]))
def cfg_from_cnf(cnf: CFG) -> CFG: """Create a context-free grammar [2]_ from given context-free grammar in Chomsky normal form [1]_. Parameters ---------- cnf : CFG Context free grammar in Chomsky normal form. Examples -------- >>> import cfpq_data >>> cnf = cfpq_data.cnf_from_text("S -> a S b S | epsilon") >>> cfg = cfpq_data.cfg_from_cnf(cnf) >>> [cfg.contains(word) for word in ["", "ab", "aabb"]] [True, True, True] Returns ------- cfg : CFG Context-free grammar. References ---------- .. [1] https://en.wikipedia.org/wiki/Chomsky_normal_form .. [2] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions """ return CFG.from_text(cnf.to_text(), cnf.start_symbol)
def part2(file='input_test.txt'): rules, messages = get_input(file) # 8: 42 | 42 8 # 11: 42 31 | 42 11 31 rules['8'] = '42 | 42 8' rules['11'] = '42 31 | 42 11 31' rule_variables = set() rule_products = set() for rule in rules: subs = rules[rule].split(' | ') rule_variables.add(Variable(rule)) for sub in subs: if sub == '"a"' or sub == '"b"': rule_products.add( Production(Variable(rule), [Terminal(sub.replace('"', ''))])) else: rule_products.add( Production(Variable(rule), [Variable(x) for x in sub.split(' ')])) cfg = CFG(rule_variables, {Terminal('a'), Terminal('b')}, Variable('0'), rule_products) count = 0 for message in messages: if cfg.contains(message): count += 1 print('Part 2: Solution {}'.format(count))
def __init__(self, rules: Iterable[str], patch: bool = False): start_var: Variable vars: set[Variable] = set() terminals: Set[Terminal] = set() productions: Set[Production] = set() for rule in rules: i, r = rule.split(": ") var = Variable(i) if i == "0": start_var = var if r[0] == '"': ter = Terminal(r[1]) terminals.add(ter) productions.add(Production(var, [ter])) continue if patch: if i == "8": r = "42 | 42 8" if i == "11": r = "42 31 | 42 11 31" rr = r.split(" | ") for r in rr: productions.add( Production(var, [Variable(x) for x in r.split(" ")])) self.CFG = CFG(vars, terminals, start_var, productions)
def _create_cfg_from_regex(cls, head: Variable, regex: Regex, variables=None) -> CFG: dfa = regex.to_epsilon_nfa().to_deterministic().minimize() transitions = dfa._transition_function._transitions state_to_var: Dict[State, Variable] = {} productions, terms, vars = set(), set(), set() for state in dfa.states: state_to_var[state] = Variable(f'{state}:{cls.__var_state_counter}') cls.__var_state_counter += 1 vars.update(state_to_var.values()) for start_state in dfa.start_states: productions.add(Production(head, [state_to_var[start_state]])) for state_from in transitions: for edge_symb in transitions[state_from]: state_to = transitions[state_from][edge_symb] current_prod_head = state_to_var[state_from] current_prod_body = [] if (not variables and edge_symb.value.isupper() or variables and edge_symb.value in variables): var = Variable(edge_symb.value) vars.add(var) current_prod_body.append(var) else: term = Terminal(edge_symb.value) terms.add(term) current_prod_body.append(term) current_prod_body.append(state_to_var[state_to]) productions.add(Production(current_prod_head, current_prod_body)) if state_to in dfa.final_states: productions.add(Production(state_to_var[state_to], [])) if not productions: return CFG(vars, terms, head, {Production(head, [])}) return CFG(vars, terms, head, productions)
def test_derivation_empty(self): var_s = Variable("S") productions = [Production(var_s, [Epsilon()])] cfg = CFG(productions=productions, start_symbol=var_s) parse_tree = cfg.get_cnf_parse_tree([]) derivation = parse_tree.get_rightmost_derivation() self.assertEqual([[var_s], []], derivation)
def test_to_pda(self): """ Tests the conversion to PDA """ var_e = Variable("E") var_i = Variable("I") ter_a = Terminal("a") ter_b = Terminal("b") ter_0 = Terminal("0") ter_1 = Terminal("1") ter_par_open = Terminal("(") ter_par_close = Terminal(")") ter_mult = Terminal("*") ter_plus = Terminal("+") productions = { Production(var_e, [var_i]), Production(var_e, [var_e, ter_plus, var_e]), Production(var_e, [var_e, ter_mult, var_e]), Production(var_e, [ter_par_open, var_e, ter_par_close]), Production(var_i, [ter_a]), Production(var_i, [ter_b]), Production(var_i, [var_i, ter_a]), Production(var_i, [var_i, ter_b]), Production(var_i, [var_i, ter_0]), Production(var_i, [var_i, ter_1]), Production(var_i, [var_i, Epsilon()]) } cfg = CFG({var_e, var_i}, { ter_a, ter_b, ter_0, ter_1, ter_par_open, ter_par_close, ter_mult, ter_plus }, var_e, productions) pda = cfg.to_pda() self.assertEqual(len(pda.states), 1) self.assertEqual(len(pda.final_states), 0) self.assertEqual(len(pda.input_symbols), 8) self.assertEqual(len(pda.stack_symbols), 10) self.assertEqual(pda.get_number_transitions(), 19)
def part2(): rules, words = open("in.txt").read().split("\n\n") rules = rules.replace("8: 42", "8: 42 | 42 8") rules = rules.replace("11: 42 31", "11: 42 31 | 42 11 31") variables = set() productions = set() terminals = set() for line in rules.split("\n"): left, right = line.split(":") left = Variable(left) variables.add(left) for expression in right.split("|"): if '"' in expression: # Terminal expression expression = expression.strip('" ') right = [Terminal(expression)] terminals.add(Terminal(expression)) productions.add(Production(left, right)) else: right = [ Variable(token) for token in expression.strip().split() ] productions.add(Production(left, right)) cfg = CFG(variables, terminals, Variable("0"), productions) count = sum(map(lambda x: 1 if cfg.contains(x) else 0, words.split("\n"))) print(count)
def test_to_text_cnf(self): cfg = CFG.from_text("S -> a S b | a b") cnf = cfg.to_normal_form() self.assertTrue(cnf.contains(["a", "b"])) new_text = cnf.to_text() print(new_text) new_cfg = CFG.from_text(new_text) self.assertTrue(new_cfg.contains(["a", "b"]))
def test_derivation_does_not_exist(self): var_s = Variable("S") ter_a = Terminal("a") ter_b = Terminal("b") cfg = CFG(productions=[], start_symbol=var_s) with self.assertRaises(DerivationDoesNotExist): parse_tree = cfg.get_cnf_parse_tree([ter_a, ter_b]) parse_tree.get_rightmost_derivation()
def cfpq_matrix_product(graph: Graph, grammar: CFG): if graph.vertices_count == 0: return False result = dict() terminal_productions = set() non_terminal_productions = set() if grammar.generate_epsilon(): matrix = Matrix.sparse(BOOL, graph.vertices_count, graph.vertices_count) matrix += Matrix.identity(BOOL, graph.vertices_count) result[grammar.start_symbol] = matrix cfg = grammar.to_normal_form() for production in cfg.productions: if len(production.body) == 1: terminal_productions.add(production) else: non_terminal_productions.add(production) for t, matrix in graph.label_matrices.items(): for production in terminal_productions: if production.body == [Terminal(t)]: if production.head not in result: result[production.head] = matrix.dup() else: result[production.head] += matrix.dup() old_changed = set() new_changed = cfg.variables while len(new_changed) > 0: old_changed = new_changed new_changed = set() for production in non_terminal_productions: if production.body[0] not in result or production.body[ 1] not in result: continue if (production.body[0] in old_changed or production.body[1] in old_changed): matrix = result.get( production.head, Matrix.sparse(BOOL, graph.vertices_count, graph.vertices_count)) old_nvals = matrix.nvals result[production.head] = matrix + (result[ production.body[0]] @ result[production.body[1]]) if result[production.head].nvals != old_nvals: new_changed.add(production.head) return result.get( cfg.start_symbol, Matrix.sparse(BOOL, graph.vertices_count, graph.vertices_count))
def cfpq_matrix_multiplication(grammar: CFG, graph: BMGraph): res = dict() terminal_prods = set() nonterminal_prods = set() if grammar.generate_epsilon(): matrix = Matrix.sparse(BOOL, graph.states_amount, graph.states_amount) for i in range(graph.states_amount): matrix[i, i] = True res[grammar.start_symbol] = matrix cfg = grammar.to_normal_form() for prod in cfg.productions: if len(prod.body) == 1: terminal_prods.add(prod) else: nonterminal_prods.add(prod) with semiring.LOR_LAND_BOOL: for t, matrix in graph.matrices.items(): for prod in terminal_prods: if prod.body == [Terminal(t)]: if prod.head not in res: res[prod.head] = matrix.dup() else: res[prod.head] += matrix.dup() with semiring.LOR_LAND_BOOL: old_changed = set() new_changed = cfg.variables while len(new_changed) > 0: old_changed = new_changed new_changed = set() for prod in nonterminal_prods: if prod.body[0] not in res or prod.body[1] not in res: continue if (prod.body[0] in old_changed or prod.body[1] in old_changed): matrix = res.get( prod.head, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount)) old_nvals = matrix.nvals res[prod.head] = matrix + \ (res[prod.body[0]] @ res[prod.body[1]]) if (res[prod.head].nvals != old_nvals): new_changed.add(prod.head) return res.get( cfg.start_symbol, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount))
def test_emptiness(self): """ Tests the emptiness of a CFG """ # pylint: disable=too-many-locals var_s = Variable("S") ter_a = Terminal("a") ter_b = Terminal("b") prod0 = Production(var_s, [ter_a, var_s, ter_b]) prod1 = Production(var_s, []) cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1}) self.assertFalse(cfg.is_empty())
def test_union(self): """ Tests the union of two cfg """ var_s = Variable("S") ter_a = Terminal("a") ter_b = Terminal("b") prod0 = Production(var_s, [ter_a, var_s, ter_b]) prod1 = Production(var_s, []) cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1}) new_cfg = cfg.union(cfg) self.assertEqual(len(new_cfg.variables), 3) self.assertEqual(len(new_cfg.terminals), 2) self.assertEqual(len(new_cfg.productions), 6) self.assertFalse(new_cfg.is_empty()) self.assertTrue(new_cfg.contains([ter_a, ter_a, ter_b, ter_b]))
def test_reverse(self): """ Test the reversal of a CFG """ var_s = Variable("S") ter_a = Terminal("a") ter_b = Terminal("b") prod0 = Production(var_s, [ter_a, var_s, ter_b]) prod1 = Production(var_s, []) cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1}) new_cfg = cfg.reverse() self.assertEqual(len(new_cfg.variables), 1) self.assertEqual(len(new_cfg.terminals), 2) self.assertEqual(len(new_cfg.productions), 2) self.assertFalse(new_cfg.is_empty()) self.assertTrue(new_cfg.contains([ter_b, ter_b, ter_a, ter_a]))
def _test_profiling_conversions(): """ Tests multiple conversions """ ter_a = Terminal("a") ter_b = Terminal("b") ter_c = Terminal("c") var_s = Variable("S") productions = { Production(var_s, [ter_a, var_s, ter_b]), Production(var_s, [ter_c]) } cfg = CFG(productions=productions, start_symbol=var_s) cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
def test_nullable_object(self): """ Tests the finding of nullable objects """ var_a = Variable("A") var_b = Variable("B") ter_a = Terminal("a") ter_b = Terminal("b") start = Variable("S") prod0 = Production(start, [var_a, var_b]) prod1 = Production(var_a, [ter_a, var_a, var_a]) prod2 = Production(var_a, [Epsilon()]) prod3 = Production(var_b, [ter_b, var_b, var_b]) prod4 = Production(var_b, [Epsilon()]) cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start, {prod0, prod1, prod2, prod3, prod4}) self.assertEqual(cfg.get_nullable_symbols(), {var_a, var_b, start})
def test_substitution(self): """ Tests substitutions in a CFG """ var_s = Variable("S") ter_a = Terminal("a") ter_b = Terminal("b") prod0 = Production(var_s, [ter_a, var_s, ter_b]) prod1 = Production(var_s, []) cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1}) new_cfg = cfg.substitute({ter_a: cfg}) self.assertEqual(len(new_cfg.variables), 2) self.assertEqual(len(new_cfg.terminals), 2) self.assertEqual(len(new_cfg.productions), 4) self.assertFalse(new_cfg.is_empty()) self.assertTrue( new_cfg.contains([ter_a, ter_b, ter_a, ter_b, ter_b, ter_b]))
def __init__(self, start_symbol=None, productions=None): cfg = CFG(start_symbol=start_symbol, productions=productions) cnf = cfg.to_normal_form() # needed for language preservation if cfg.generate_epsilon(): cnf._productions.add(Production(cnf._start_symbol, [])) self.pair_productions = set() for p in cnf._productions: if len(p.body) == 2: self.pair_productions.add(p) super(GrammarCNF, self).__init__(start_symbol=cnf._start_symbol, productions=cnf._productions)
def test_get_llone_table(self): # Example from: # https://www.geeksforgeeks.org/construction-of-ll1-parsing-table/ text = """ E -> T E’ E’ -> + T E’ | Є T -> F T’ T’ -> * F T’ | Є F -> ( E ) | id """ cfg = CFG.from_text(text, start_symbol="E") llone_parser = LLOneParser(cfg) parsing_table = llone_parser.get_llone_parsing_table() self.assertEqual( len( parsing_table.get(Variable("E"), dict()).get(Terminal("id"), [])), 1) self.assertEqual( len( parsing_table.get(Variable("E"), dict()).get(Terminal("+"), [])), 0) self.assertEqual( len( parsing_table.get(Variable("T’"), dict()).get(Terminal(")"), [])), 1) self.assertEqual( len( parsing_table.get(Variable("F"), dict()).get(Terminal("("), [])), 1) self.assertEqual( len( parsing_table.get(Variable("F"), dict()).get(Terminal("id"), [])), 1)
def cfg_to_text(cfg: CFG) -> str: """Turns a context-free grammar [1]_ into its text representation. Parameters ---------- cfg : CFG Context-free grammar. Examples -------- >>> import cfpq_data >>> cfg = cfpq_data.cfg_from_text("S -> a S b S") >>> cfpq_data.cfg_to_text(cfg) 'S -> a S b S\\n' Returns ------- text : str Context-free grammar text representation. References ---------- .. [1] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions """ return cfg.to_text()
def cfg_from_text(source: str, start_symbol: Variable = Variable("S")) -> CFG: """Create a context-free grammar [1]_ from text. Parameters ---------- source : str The text with which the context-free grammar will be created. start_symbol : Variable Start symbol of a context-free grammar. Examples -------- >>> import cfpq_data >>> cfg = cfpq_data.cfg_from_text("S -> a S b S") >>> cfpq_data.cfg_to_text(cfg) 'S -> a S b S\\n' Returns ------- cfg : CFG Context-free grammar. References ---------- .. [1] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions """ return CFG.from_text(source, start_symbol)
def from_cfg(cls, cfg: CFG): """ Build RSA from a given cfpq_data context-free grammar @param cfg: CFG on which RSA is built @return: initialized class """ grammar = cfg.to_text() productions = dict() for line in grammar.split("\n")[:-1]: part_line = line.split(" -> ") right = part_line[1] if right == "": right = "epsilon" if part_line[0] in productions: productions[part_line[0]] += " | " + right else: productions[part_line[0]] = right grammar_new = "" for nonterminal in productions: grammar_new += nonterminal + " -> " + productions[nonterminal] + "\n" grammar_new = grammar_new[:-1] return RecursiveAutomaton.from_rsm(rsm_from_text(grammar_new))
def change_terminals_in_cfg(cfg: CFG, spec: Dict[str, str]) -> CFG: """Change terminals of a context-free grammar [1]_. Parameters ---------- cfg : CFG Context-free grammar. spec: Dict Terminals mapping. Examples -------- >>> import cfpq_data >>> cfg = cfpq_data.cfg_from_text("S -> a S b S") >>> new_cfg = cfpq_data.change_terminals_in_cfg(cfg, {"a": "b", "b": "c"}) >>> new_cfg.to_text() 'S -> b S c S\\n' Returns ------- cfg : CFG Context-free grammar with changed terminals. References ---------- .. [1] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions """ regex = re.compile("|".join(map(re.escape, spec.keys()))) text = regex.sub(lambda match: spec[match.group(0)], cfg.to_text()) return cfg_from_text(text)
def test_get_follow_set(self): # Example from: # https://www.geeksforgeeks.org/follow-set-in-syntax-analysis/ text = """ E -> T E’ E’ -> + T E’ | Є T -> F T’ T’ -> * F T’ | Є F -> ( E ) | id """ cfg = CFG.from_text(text, start_symbol="E") llone_parser = LLOneParser(cfg) follow_set = llone_parser.get_follow_set() self.assertEqual(follow_set[Variable("E")], {"$", Terminal(")")}) self.assertEqual(follow_set[Variable("E’")], {"$", Terminal(")")}) self.assertEqual( follow_set[Variable("T")], {"$", Terminal("+"), Terminal(")")}) self.assertEqual( follow_set[Variable("T’")], {"$", Terminal("+"), Terminal(")")}) self.assertEqual( follow_set[Variable("F")], {"$", Terminal("+"), Terminal("*"), Terminal(")")})
def test_get_first_set2(self): # Example from: # https://www.geeksforgeeks.org/first-set-in-syntax-analysis/ text = """ S -> A C B | C b b | B a A -> d a | B C B -> g | Є C -> h | Є """ cfg = CFG.from_text(text) llone_parser = LLOneParser(cfg) first_set = llone_parser.get_first_set() self.assertEqual(first_set[Variable("S")], {Terminal(x) for x in {"d", "g", "h", "b", "a"} }.union({Epsilon()})) self.assertEqual(first_set[Variable("A")], {Terminal(x) for x in {"d", "g", "h"}}.union({Epsilon()})) self.assertEqual(first_set[Variable("B")], {Terminal(x) for x in {"g"}}.union({Epsilon()})) self.assertEqual(first_set[Variable("C")], {Terminal(x) for x in {"h"}}.union({Epsilon()}))
def rsm_from_cnf(cnf: CFG) -> RSM: """Create a Recursive State Machine [2]_ from context-free grammar in Chomsky normal form [1]_. Parameters ---------- cnf : CFG Context-free grammar in Chomsky normal form. Examples -------- >>> import cfpq_data >>> cnf = cfpq_data.cnf_from_text("S -> a S b S | epsilon") >>> rsm = cfpq_data.rsm_from_cnf(cnf) >>> [rsm.contains(word) for word in ["", "ab", "aabb"]] [True, True, True] Returns ------- rsm : RSM Recursive State Machine. References ---------- .. [1] https://en.wikipedia.org/wiki/Chomsky_normal_form .. [2] Alur R., Etessami K., Yannakakis M. (2001) Analysis of Recursive State Machines. In: Berry G., Comon H., Finkel A. (eds) Computer Aided Verification. CAV 2001. Lecture Notes in Computer Science, vol 2102. Springer, Berlin, Heidelberg. https://doi.org/10.1007/3-540-44585-4_18 """ return rsm_from_text(cnf.to_text(), cnf.start_symbol)
def read_grammar(cls, name): terminals, variables, productions = set(), set(), set() start_symb = None with open(name, 'r') as file: productions_txt = file.readlines() for production_txt in productions_txt: head, *body = production_txt.strip().split() if start_symb is None: start_symb = Variable(head) body_cfg = [] for letter in body: if letter.isupper(): variable = Variable(letter) variables.add(variable) body_cfg.append(variable) else: terminal = Terminal(letter) terminals.add(terminal) body_cfg.append(terminal) productions.add(Production(Variable(head), body_cfg)) cfg = CFG(variables, terminals, start_symb, productions) return cfg
def read_grammar_with_regex(cls, name): id = 0 terminals, variables, productions = set(), set(), set() start_symb = None with open(name, 'r') as file: productions_txt = file.readlines() for production_txt in productions_txt: line = production_txt.strip().split() head, body = line[0], ' '.join(line[1:]) head = Variable(head) if start_symb is None: start_symb = head new_productions, new_variables, new_terminals, id = CFGrammar.read_production_regex( head, Regex(body), id) productions |= new_productions variables |= new_variables terminals |= new_terminals cfg = CFG(variables, terminals, start_symb, productions) return cfg