def __add_single_movement_restore_configs(self, lba: TuringMachine): """ Add productions for movements of Linear Bounded Automaton on a one-character tape $t# from accepted states to Context Sensitive Grammar Add production ([q,$,g,t,#] -> t) for all t in lba.sigma, g in lba.gamma Add production ([$,q,g,t,#] -> t) for all t in lba.sigma, g in lba.gamma Add production ([$,g,t,q,#] -> t) for all t in lba.sigma, g in lba.gamma $ --- left end marker of lba, # --- right end marker of lba :param lba: The Linear Bounded Automaton from which the Context Sensitive Grammar is built :return: None """ for accept_state in lba.accept_states: for sigma_symbol in lba.sigma: for gamma_symbol in lba.gamma: self.productions.append( Production((cfg.Variable( f'[{accept_state},$,{gamma_symbol},{sigma_symbol},#]' ), ), (cfg.Terminal(f'{sigma_symbol}'), ))) self.productions.append( Production((cfg.Variable( f'[$,{accept_state},{gamma_symbol},{sigma_symbol},#]' ), ), (cfg.Terminal(f'{sigma_symbol}'), ))) self.productions.append( Production((cfg.Variable( f'[$,{gamma_symbol},{sigma_symbol},{accept_state},#]' ), ), (cfg.Terminal(f'{sigma_symbol}'), )))
def _create_cfg_from_regex(cls, head, regex, track_variables): dfa = regex.to_epsilon_nfa().to_deterministic().minimize() transitions = dfa._transition_function._transitions state_to_var = {} productions, terminals, variables = set(), set(), set() for state in dfa.states: state_to_var[state] = cfg.Variable(f'{state}:{cls.__var_state_counter}') cls.__var_state_counter += 1 variables.update(state_to_var.values()) for start_state in dfa.start_states: productions.add(cfg.Production(head, [state_to_var[start_state]])) for state_from in transitions: for edge_symb in transitions[state_from]: state_to = transitions[state_from][edge_symb] current_prod_head = state_to_var[state_from] current_prod_body = [] if any(letter.isupper() for letter in edge_symb.value) and not track_variables: var = cfg.Variable(edge_symb.value) variables.add(var) current_prod_body.append(var) elif edge_symb.value != 'eps': term = cfg.Terminal(edge_symb.value) terminals.add(term) current_prod_body.append(term) current_prod_body.append(state_to_var[state_to]) productions.add(cfg.Production(current_prod_head, current_prod_body)) if state_to in dfa.final_states: productions.add(cfg.Production(state_to_var[state_to], [])) if not productions: return cfg.CFG(variables, terminals, head, {cfg.Production(head, [])}) return cfg.CFG(variables, terminals, head, productions)
def from_lba(cls, lba: TuringMachine): """ Build a Context Sensitive Grammar by a Linear Bounded Automaton :param lba: The Linear Bounded Automaton from which the Context Sensitive Grammar is built :return: The Context Sensitive Grammar builded by lba """ grammar = ContextSensitiveGrammar() grammar.terminals = {cfg.Terminal(x) for x in lba.sigma} grammar.start_symbol = cfg.Variable('S1') grammar.__add_initial_configs_single(lba, 'S1') grammar.__add_single_movement_configs(lba) grammar.__add_single_movement_restore_configs(lba) grammar.__add_initial_configs_general(lba, 'S1', 'S2') grammar.__add_general_movement_configs_left(lba) grammar.__add_general_movement_configs_center(lba) grammar.__add_general_movement_configs_right(lba) grammar.__add_general_movement_restore_configs(lba) grammar.__add_general_movement_restore_word(lba) grammar = grammar.nonterminals_optimization() while True: prev = len(grammar.productions) grammar = grammar.deep_optimization(max_cnt=5) grammar = grammar.substitutions_optimization() if prev == len(grammar.productions): break return grammar
def from_turing_machine(cls, turing_machine: TuringMachine): """ Build a Unrestricted Grammar by a Turing Machine :param turing_machine: The Turing Machine from which the Unrestricted Grammar is built :return: The Unrestricted Grammar builded by lba """ grammar = UnrestrictedGrammar() grammar.__add_initial_configs(turing_machine, 'S1', 'S2') grammar.__add_movement_configs(turing_machine) grammar.__add_restore_configs(turing_machine) grammar.terminals = {cfg.Terminal(x) for x in turing_machine.sigma} grammar.start_symbol = cfg.Variable('S1') grammar = grammar.nonterminals_optimization() while True: prev = len(grammar.productions) grammar = grammar.deep_optimization(max_cnt=4) grammar = grammar.substitutions_optimization() if prev == len(grammar.productions): break return grammar
def from_txt(cls, path: pathlib.Path): """ Loads an instance of a Grammar from a txt file :param path: Path to a txt file :return: Grammar instance """ grammar = Grammar() with open(path, 'r') as input_file: grammar.start_symbol = cfg.Variable( input_file.readline().strip().replace('start_symbol: ', '')) grammar.nonterminals = { cfg.Variable(x) for x in input_file.readline().strip().replace( 'nonterminals: ', '').split() } grammar.terminals = { cfg.Terminal(x) for x in input_file.readline().strip().replace( 'terminals: ', '').split() } for production in input_file: head, body = production.split(' -> ') grammar.productions.append( Production( tuple( cfg.Terminal(x) if cfg.Terminal(x) in grammar.terminals else cfg.Variable(x) for x in head.split()), tuple( cfg.Terminal(x) if cfg.Terminal(x) in grammar.terminals else cfg.Variable(x) for x in body.split()))) return grammar
def _read_line(cls, line, productions, terminals, variables): prod_s = line.split(' ', 1) head_s = prod_s[0] if len(prod_s) > 1: body_s = prod_s[1] else: body_s = '' head = cfg.Variable(head_s.strip()) variables.add(head) body = [] for body_component in body_s.split(): if body_component[0] in string.ascii_uppercase: body_var = cfg.Variable(body_component) variables.add(body_var) body.append(body_var) else: body_ter = cfg.Terminal(body_component) terminals.add(body_ter) body.append(body_ter) productions.add(cfg.Production(head, body))
def accepts( self, word: str ) -> Tuple[List[Production], List[Tuple[Union[cfg.Variable, cfg.Terminal], ...]]]: """ Returns whether the Context Sensitive Grammar generates the given word :param word: Tuple from grammar terminals :return: Tuple(used productions, sentences) if Context Sensitive Grammar generates the given word else empty tuple """ word = tuple(cfg.Terminal(x) for x in word) used: Dict[Tuple[Union[cfg.Variable, cfg.Terminal], ...], List[Production]] = dict() parent: Dict[Tuple[Union[cfg.Variable, cfg.Terminal], ...], Tuple[Union[cfg.Variable, cfg.Terminal], ...]] = dict() queue: Deque[Tuple[Union[cfg.Variable, cfg.Terminal], ...]] = \ deque([(cfg.Variable(self.start_symbol),)]) while len(queue) != 0: sentence: Tuple[Union[cfg.Variable, cfg.Terminal], ...] = queue.popleft() if sentence not in used: used[sentence] = list() if all(isinstance(x, cfg.Terminal) for x in sentence): if sentence == word: trace = list() prev = word while prev in parent: trace.append(prev) prev = parent[prev] trace.append(prev) trace.reverse() return used[word], trace if len(sentence) > len(word): return tuple() for production in self.productions: for i in range(len(sentence) - len(production.head) + 1): if production.head == sentence[i:i + len(production.head)]: new_sentence: Tuple[Union[cfg.Variable, cfg.Terminal], ...] = \ sentence[:i] + production.body + sentence[i + len(production.head):] if new_sentence not in used: used[new_sentence] = used[sentence].copy() + [ production ] parent[new_sentence] = sentence if any( isinstance(x, cfg.Terminal) for x in new_sentence): queue.appendleft(new_sentence) else: queue.append(new_sentence) queue = deque( sorted(queue, key=lambda y: sum(1 for x in y if isinstance(x, cfg.Variable)))) return tuple()
def _prepend_input_symbol_to_the_bodies(bodies, transition): to_prepend = cfg.Terminal(transition[INPUT][INPUT_SYMBOL].value) for body in bodies: body.insert(0, to_prepend)
def __add_general_movement_restore_word(self, lba: TuringMachine): """ Add productions to restore word accepted by Linear Bounded Automaton to Context Sensitive Grammar Add production (l [g,t] -> l t) for all t,l in lba.sigma, g in lba.gamma Add production (l [g,t,#] -> l t) for all t,l in lba.sigma, g in lba.gamma Add production ([g,l] t -> l t) for all t,l in lba.sigma, g in lba.gamma Add production ([$,g,l] t -> l t) for all t,l in lba.sigma, g in lba.gamma $ --- left end marker of lba, # --- right end marker of lba :param lba: The Linear Bounded Automaton from which the Context Sensitive Grammar is built :return: None """ for gamma_symbol, sigma_symbol_1, sigma_symbol_2 in itertools.product( lba.gamma, lba.sigma, lba.sigma): self.productions.append( Production(( cfg.Terminal(f'{sigma_symbol_1}'), cfg.Variable(f'[{gamma_symbol},{sigma_symbol_2}]'), ), ( cfg.Terminal(f'{sigma_symbol_1}'), cfg.Terminal(f'{sigma_symbol_2}'), ))) self.productions.append( Production(( cfg.Terminal(f'{sigma_symbol_1}'), cfg.Variable(f'[{gamma_symbol},{sigma_symbol_2},#]'), ), ( cfg.Terminal(f'{sigma_symbol_1}'), cfg.Terminal(f'{sigma_symbol_2}'), ))) self.productions.append( Production(( cfg.Variable(f'[{gamma_symbol},{sigma_symbol_1}]'), cfg.Terminal(f'{sigma_symbol_2}'), ), ( cfg.Terminal(f'{sigma_symbol_1}'), cfg.Terminal(f'{sigma_symbol_2}'), ))) self.productions.append( Production(( cfg.Variable(f'[$,{gamma_symbol},{sigma_symbol_1}]'), cfg.Terminal(f'{sigma_symbol_2}'), ), ( cfg.Terminal(f'{sigma_symbol_1}'), cfg.Terminal(f'{sigma_symbol_2}'), )))
def __add_restore_configs(self, turing_machine: TuringMachine): """ Add productions to restore word accepted by Turing Machine to Unrestricted Grammar Add production ([g,l] q -> q g q) for all q in turing_machine.accepted_states for all g in turing_machine.sigma + epsilon for all l in turing_machine.gamma Add production (q [g,l] -> q g q) for all q in turing_machine.accepted_states for all g in turing_machine.sigma + epsilon for all l in turing_machine.gamma Add production ([_,l] q -> q g q) for all q in turing_machine.accepted_states for all l in turing_machine.gamma Add production (q [_,l] -> q g q) for all q in turing_machine.accepted_states for all l in turing_machine.gamma Add production (q g -> g) for all q in turing_machine.accepted_states for all g in turing_machine.sigma Add production (g q -> g) for all q in turing_machine.accepted_states for all g in turing_machine.sigma :param turing_machine: The Turing Machine from which the Unrestricted Grammar is built :return: None """ for accept_state in turing_machine.accept_states: for sigma_symbol, gamma_symbol in itertools.product( turing_machine.sigma | {''}, turing_machine.gamma): self.productions.append( Production( (cfg.Variable(f'[{sigma_symbol},{gamma_symbol}]'), cfg.Variable(accept_state)), (cfg.Variable(accept_state), cfg.Terminal(sigma_symbol), cfg.Variable(accept_state)))) self.productions.append( Production( (cfg.Variable(accept_state), cfg.Variable(f'[{sigma_symbol},{gamma_symbol}]')), (cfg.Variable(accept_state), cfg.Terminal(sigma_symbol), cfg.Variable(accept_state)))) for gamma_symbol in turing_machine.gamma: self.productions.append( Production((cfg.Variable(f'[,{gamma_symbol}]'), cfg.Variable(accept_state)), (cfg.Variable(accept_state), ))) self.productions.append( Production((cfg.Variable(accept_state), cfg.Variable(f'[,{gamma_symbol}]')), (cfg.Variable(accept_state), ))) for sigma_symbol in turing_machine.sigma: self.productions.append( Production((cfg.Variable(accept_state), cfg.Terminal(sigma_symbol)), (cfg.Terminal(sigma_symbol), ))) self.productions.append( Production((cfg.Terminal(sigma_symbol), cfg.Variable(accept_state)), (cfg.Terminal(sigma_symbol), )))