def _create_cfg_from_regex(cls, head, regex, track_variables): dfa = regex.to_epsilon_nfa().to_deterministic().minimize() transitions = dfa._transition_function._transitions state_to_var = {} productions, terminals, variables = set(), set(), set() for state in dfa.states: state_to_var[state] = cfg.Variable(f'{state}:{cls.__var_state_counter}') cls.__var_state_counter += 1 variables.update(state_to_var.values()) for start_state in dfa.start_states: productions.add(cfg.Production(head, [state_to_var[start_state]])) for state_from in transitions: for edge_symb in transitions[state_from]: state_to = transitions[state_from][edge_symb] current_prod_head = state_to_var[state_from] current_prod_body = [] if any(letter.isupper() for letter in edge_symb.value) and not track_variables: var = cfg.Variable(edge_symb.value) variables.add(var) current_prod_body.append(var) elif edge_symb.value != 'eps': term = cfg.Terminal(edge_symb.value) terminals.add(term) current_prod_body.append(term) current_prod_body.append(state_to_var[state_to]) productions.add(cfg.Production(current_prod_head, current_prod_body)) if state_to in dfa.final_states: productions.add(cfg.Production(state_to_var[state_to], [])) if not productions: return cfg.CFG(variables, terminals, head, {cfg.Production(head, [])}) return cfg.CFG(variables, terminals, head, productions)
def to_normal_form(self): new = super().to_normal_form() if self.is_eps_reachable: new.productions.add( cfg.Production(self.start_symbol, []) ) return custom_CFG.from_CFG(new)
def _initialize_production_from_start_in_to_cfg(self, start): productions = [] for state in self._states: productions.append( cfg.Production(start, [ self._cfg_variable_converter.to_cfg_combined_variable( self._start_state, self._start_stack_symbol, state) ])) return productions
def _process_transition_and_state_to_cfg_safe(self, productions, state, transition): head = self._get_head_from_state_and_transition(state, transition) bodies = self._get_all_bodies_from_state_and_transition( state, transition) if transition[INPUT][INPUT_SYMBOL] != Epsilon(): _prepend_input_symbol_to_the_bodies(bodies, transition) for body in bodies: productions.append(cfg.Production(head, body, filtering=False))
def _read_line(cls, line, productions, terminals, variables): prod_s = line.split(' ', 1) head_s = prod_s[0] if len(prod_s) > 1: body_s = prod_s[1] else: body_s = '' head = cfg.Variable(head_s.strip()) variables.add(head) body = [] for body_component in body_s.split(): if body_component[0] in string.ascii_uppercase: body_var = cfg.Variable(body_component) variables.add(body_var) body.append(body_var) else: body_ter = cfg.Terminal(body_component) terminals.add(body_ter) body.append(body_ter) productions.add(cfg.Production(head, body))