def test__linearize_regular_expression(self): re1 = parse_regular_expression('a (a + b + ε)* a') lin1, idx1 = _linearize_regular_expression(re1) self.assertEqual(idx1, 4) self.assertEqual(len(lin1.alphabet()), 4) self.assertEqual( repr(lin1).replace(' ', ''), 'CONCAT(CONCAT(a0, STAR(PLUS(PLUS(a1, b2), ε))), a3)' .replace(' ', '') ) re2 = parse_regular_expression('a a a a a a a') lin2, idx2 = _linearize_regular_expression(re2) self.assertEqual(idx2, 7) self.assertEqual(len(lin2.alphabet()), 7)
def residual_automaton( regular_expression: RegularExpression) -> FiniteAutomaton: """From a regular expression, constructs an equivalent finite automaton using the residuals method """ initial_state = _state_identifier(regular_expression) accepting_states = [] alphabet = regular_expression.alphabet() transitions: Dict[State, List[Tuple[Letter, State]]] = {} unexplored_states = [initial_state] while unexplored_states: state = unexplored_states.pop(0) state_re = parse_regular_expression(state) if state_re.accepts_epsilon(): accepting_states.append(state) transitions[state] = [] for letter in alphabet: next_residual = residual(state_re, letter) next_state = _state_identifier(next_residual) if next_residual is not None: transitions[state].append((letter, next_state)) # Equivalent re can have different string representations... if next_state not in transitions: transitions[next_state] = [] unexplored_states.append(next_state) return FiniteAutomaton(alphabet=alphabet, states=set(transitions.keys()), initial_states={initial_state}, accepting_states=set(accepting_states), transitions=transitions)
def test_thompson_letter(self): alphabet = {'a', 'b'} automaton = thompson(parse_regular_expression('a'), alphabet) automaton.draw(name='ThompsonTest.test_thompson_letter').render( directory='out/', format='pdf') self.assertTrue(automaton.read('a')) self.assertFalse(automaton.read('b')) self.assertFalse(automaton.read(''))
def test_glushkov(self): aut1 = glushkov(parse_regular_expression('a b')) aut1.draw( name='GlushkovTest.test_glushkov.1' ).render(directory='out/', format='pdf') self.assertTrue(aut1.read("ab")) self.assertFalse(aut1.read("")) self.assertFalse(aut1.read("a")) self.assertFalse(aut1.read("b")) self.assertFalse(aut1.read("ba")) self.assertFalse(aut1.read("aba")) aut2 = glushkov(parse_regular_expression('(a b)* (c + ε) d')) aut2.draw( name='GlushkovTest.test_glushkov.2' ).render(directory='out/', format='pdf') self.assertTrue(aut2.read("d")) self.assertTrue(aut2.read("abababd")) self.assertTrue(aut2.read("ababcd")) self.assertFalse(aut2.read("")) self.assertFalse(aut2.read("ab")) self.assertFalse(aut2.read("abccd")) self.assertFalse(aut2.read("ccd")) aut3 = glushkov(parse_regular_expression('(a (ab)*)*')) aut3.draw( name='GlushkovTest.test_glushkov.3' ).render(directory='out/', format='pdf') self.assertTrue(aut3.read("")) self.assertTrue(aut3.read("aaaaaa")) self.assertTrue(aut3.read("aababaaaabab")) self.assertFalse(aut3.read("b")) self.assertFalse(aut3.read("bab")) aut4 = glushkov(parse_regular_expression('(a (b + bbabb)* c)*')) aut4.draw( name='GlushkovTest.test_glushkov.4' ).render(directory='out/', format='pdf') self.assertTrue(aut4.read("")) self.assertTrue(aut4.read("ac")) self.assertTrue(aut4.read("acac")) self.assertTrue(aut4.read("abbcabbabbc")) self.assertFalse(aut4.read("aac")) self.assertFalse(aut4.read("abbabc"))
def test_accepts_epsilon(self): self.assertTrue(parse_regular_expression('ε').accepts_epsilon()) self.assertTrue(parse_regular_expression('a*').accepts_epsilon()) self.assertTrue(parse_regular_expression('a + ε').accepts_epsilon()) self.assertTrue(parse_regular_expression('(a + b)*').accepts_epsilon()) self.assertFalse(parse_regular_expression('a').accepts_epsilon()) self.assertFalse(parse_regular_expression('a* b').accepts_epsilon())
def test_successors(self): re1 = parse_regular_expression('a b') self.assertEqual(re1.successors('a'), {'b'}) self.assertEqual(re1.successors('b'), set()) self.assertEqual(re1.successors('c'), set()) re2 = parse_regular_expression('(a + b)*') self.assertEqual(re2.successors('a'), {'a', 'b'}) self.assertEqual(re2.successors('b'), {'a', 'b'}) self.assertEqual(re2.successors('c'), set()) re3 = parse_regular_expression('a b a c') self.assertEqual(re3.successors('a'), {'b', 'c'}) self.assertEqual(re3.successors('b'), {'a'}) self.assertEqual(re3.successors('c'), set()) re4 = parse_regular_expression('(a b)* (c + ε) d') self.assertEqual(re4.successors('a'), {'b'}) self.assertEqual(re4.successors('b'), {'a', 'c', 'd'}) self.assertEqual(re4.successors('c'), {'d'}) self.assertEqual(re4.successors('d'), set()) re5 = parse_regular_expression('(a+ε)(b+ε)(c+ε)(d+ε)') self.assertEqual(re5.successors('a'), {'b', 'c', 'd'}) self.assertEqual(re5.successors('b'), {'c', 'd'}) self.assertEqual(re5.successors('c'), {'d'}) self.assertEqual(re5.successors('d'), set()) re6 = parse_regular_expression('(a (bc)*)*') self.assertEqual(re6.successors('a'), {'a', 'b'}) self.assertEqual(re6.successors('b'), {'c'}) self.assertEqual(re6.successors('c'), {'a', 'b'})
def test_alphabet(self): self.assertEqual(parse_regular_expression('ε').alphabet(), set()) self.assertEqual(parse_regular_expression('a*').alphabet(), {'a'}) self.assertEqual(parse_regular_expression('a + ε').alphabet(), {'a'}) self.assertEqual( parse_regular_expression('(a + b)*').alphabet(), {'a', 'b'}) self.assertEqual(parse_regular_expression('a').alphabet(), {'a'}) self.assertEqual( parse_regular_expression('a* b').alphabet(), {'a', 'b'})
def test_brozozwski_minimize(self): alphabet = {'a', 'b', 'c', 'd'} automaton1 = brozozwski_minimize( thompson(parse_regular_expression('abcd'), alphabet) ) automaton1.draw( name='BrozozwskiMinimizeTest.test_brozozwski_minimize.automaton1' ).render(directory='out/', format='pdf') self.assertEqual(len(automaton1.states), 5) self.assertTrue(automaton1.read('abcd')) self.assertFalse(automaton1.read('a')) self.assertFalse(automaton1.read('ab')) self.assertFalse(automaton1.read('abc')) self.assertFalse(automaton1.read('bcda')) self.assertFalse(automaton1.read('dcbaa'))
def test_initial_letters(self): self.assertEqual( parse_regular_expression("ε").initial_letters(), set()) self.assertEqual( parse_regular_expression("a").initial_letters(), {'a'}) self.assertEqual( parse_regular_expression("a b").initial_letters(), {'a'}) self.assertEqual( parse_regular_expression("a + b").initial_letters(), {'a', 'b'}) self.assertEqual( parse_regular_expression("a* b").initial_letters(), {'a', 'b'}) self.assertEqual( parse_regular_expression("a b*").initial_letters(), {'a'}) self.assertEqual( parse_regular_expression("(a + b)* (c + ε)").initial_letters(), {'a', 'b', 'c'}) self.assertEqual( parse_regular_expression("(c + ε) (a + b)*").initial_letters(), {'a', 'b', 'c'})
def test_parse(self): problems = [("ε", "ε"), ("a", "a"), ("a b", "CONCAT(a, b)"), ("ab", "CONCAT(a, b)"), ("ε b", "b"), ("b ε", "b"), ("ε b ε", "b"), ("ε ε ε", "ε"), ("b ε ε", "b"), ("a*", "STAR(a)"), ("a**", "STAR(a)"), ("a + b", "PLUS(a, b)"), ("(a + b)", "PLUS(a, b)"), ("a + ab", "PLUS(a, CONCAT(a, b))"), ("ba + ab", "PLUS(CONCAT(b, a), CONCAT(a, b))"), ("a (a + ε) b", "CONCAT(CONCAT(a, PLUS(a, ε)), b)"), ("(a + b)*", "STAR(PLUS(a, b))"), ("(a + b*)aa b", "CONCAT(CONCAT(CONCAT(PLUS(a, STAR(b)), a), a), b)"), ("a (a + b*)*", "CONCAT(a, STAR(PLUS(a, STAR(b))))"), ("((a))", "a")] for problem, solution in problems: self.assertEqual( repr(parse_regular_expression(problem)).replace(" ", ""), solution.replace(" ", ""), f'Failed regular expression: {problem}')
def brozozwski(automaton: FiniteAutomaton) -> RegularExpression: """Implementation of Brozozwski's algorithm for regular expressions """ q_init = 'init' q_acc = 'acc' table: Dict[State, Dict[State, str]] = {q_acc: {}, q_init: {}} for state in automaton.initial_states: table[q_init][state] = 'ε' for state in automaton.states: table[state] = {} for letter, next_state in automaton.transitions.get(state, []): if next_state in table[state]: table[state][next_state] += '+' + letter else: table[state][next_state] = letter for state in automaton.accepting_states: table[state][q_acc] = 'ε' states_to_remove = list(automaton.states) states_to_remove.sort() while states_to_remove: q_i = states_to_remove.pop() for q_k in states_to_remove + [q_init, q_acc]: if q_k == q_i: continue for q_l in states_to_remove + [q_init, q_acc]: if q_l == q_i: continue e_kl = table[q_k].get(q_l, '') e_ki = table[q_k].get(q_i, '') e_ii = table[q_i].get(q_i, '') e_il = table[q_i].get(q_l, '') table[q_k][q_l] = _plus( e_kl, _concat(_concat(e_ki, _star(e_ii)), e_il)) return parse_regular_expression(table[q_init].get(q_acc, ''))
def test_residual_automaton(self): automaton1 = residual_automaton(parse_regular_expression('a')) automaton1.draw( name='ResidualTest.test_residual_automaton.automaton1').render( directory='out/', format='pdf') self.assertTrue(automaton1.read('a')) self.assertFalse(automaton1.read('')) self.assertFalse(automaton1.read('aa')) self.assertFalse(automaton1.read('b')) self.assertFalse(automaton1.read('ab')) automaton2 = residual_automaton(parse_regular_expression('ab')) automaton2.draw( name='ResidualTest.test_residual_automaton.automaton2').render( directory='out/', format='pdf') self.assertTrue(automaton2.read('ab')) self.assertFalse(automaton2.read('')) self.assertFalse(automaton2.read('a')) self.assertFalse(automaton2.read('aa')) self.assertFalse(automaton2.read('b')) self.assertFalse(automaton2.read('aba')) automaton3 = residual_automaton(parse_regular_expression('a*')) automaton3.draw( name='ResidualTest.test_residual_automaton.automaton3').render( directory='out/', format='pdf') self.assertTrue(automaton3.read('')) self.assertTrue(automaton3.read('a')) self.assertTrue(automaton3.read('aa')) self.assertTrue(automaton3.read('aaa')) self.assertFalse(automaton3.read('b')) self.assertFalse(automaton3.read('aaaab')) automaton4 = residual_automaton(parse_regular_expression('a + b')) automaton4.draw( name='ResidualTest.test_residual_automaton.automaton4').render( directory='out/', format='pdf') self.assertTrue(automaton4.read('a')) self.assertTrue(automaton4.read('b')) self.assertFalse(automaton4.read('')) self.assertFalse(automaton4.read('ab')) self.assertFalse(automaton4.read('ba')) automaton5 = residual_automaton( parse_regular_expression('(ab + c)* d')) automaton5.draw( name='ResidualTest.test_residual_automaton.automaton5').render( directory='out/', format='pdf') self.assertTrue(automaton5.read('d')) self.assertTrue(automaton5.read('abd')) self.assertTrue(automaton5.read('cd')) self.assertTrue(automaton5.read('abcd')) self.assertTrue(automaton5.read('ababd')) self.assertFalse(automaton5.read('')) self.assertFalse(automaton5.read('ab')) self.assertFalse(automaton5.read('ad')) self.assertFalse(automaton5.read('abad')) automaton6 = residual_automaton(parse_regular_expression('(a b b*)*')) automaton6.draw( name='ResidualTest.test_residual_automaton.automaton6').render( directory='out/', format='pdf') self.assertTrue(automaton6.read('')) self.assertTrue(automaton6.read('ab')) self.assertTrue(automaton6.read('abb')) self.assertTrue(automaton6.read('abab')) self.assertTrue(automaton6.read('abbab')) self.assertFalse(automaton6.read('a')) self.assertFalse(automaton6.read('b')) self.assertFalse(automaton6.read('c')) self.assertFalse(automaton6.read('abababababaabababab'))
def test_residual(self): self.assertIsNone(residual(None, '')) self.assertIsNone(residual(None, 'a')) self.assertEqual(str(residual(parse_regular_expression('a'), 'a')), 'ε') self.assertIsNone(residual(parse_regular_expression('a'), 'b')) self.assertEqual(str(residual(parse_regular_expression('a b'), 'a')), 'b') self.assertIsNone(residual(parse_regular_expression('a b'), 'b')) self.assertEqual(str(residual(parse_regular_expression('a + b'), 'a')), 'ε') self.assertEqual(str(residual(parse_regular_expression('a + b'), 'b')), 'ε') self.assertEqual( str(residual(parse_regular_expression('(a + b) c'), 'a')), 'c') self.assertEqual( str(residual(parse_regular_expression('(a + b) c'), 'b')), 'c') self.assertIsNone(residual(parse_regular_expression('(a + b) c'), 'c')) self.assertEqual( str(residual(parse_regular_expression('(aa + bb) cc'), 'a')).replace(' ', ''), 'acc') self.assertEqual( str(residual(parse_regular_expression('(aa + bb) cc'), 'b')).replace(' ', ''), 'bcc') self.assertIsNone( residual(parse_regular_expression('(aa + bb) cc'), 'c')) self.assertEqual( str(residual(parse_regular_expression('(aa + ab) cc'), 'a')).replace(' ', ''), '(a+b)cc') self.assertEqual( str(residual(parse_regular_expression('(a + b)* a'), 'a')).replace(' ', ''), '(a + b)* a + ε'.replace(' ', '')) self.assertEqual( str(residual(parse_regular_expression('(a + b)* a'), 'b')).replace(' ', ''), '(a + b)* a'.replace(' ', '')) self.assertIsNone(residual(parse_regular_expression('(a + b)* a'), 'c')) self.assertEqual( str(residual(parse_regular_expression('(abc)* a'), 'a')).replace(' ', ''), 'b c (abc)* a + ε'.replace(' ', '')) self.assertEqual( str(residual(parse_regular_expression('(abc)* a'), 'ab')).replace(' ', ''), 'c (abc)* a'.replace(' ', '')) self.assertEqual( str(residual(parse_regular_expression('(a+ε)(b+ε)(c+ε)(d+ε)'), 'a')).replace(' ', ''), '(b + ε)(c + ε)(d + ε)'.replace(' ', '')) self.assertEqual( str(residual(parse_regular_expression('(a+ε)(b+ε)(c+ε)(d+ε)'), 'b')).replace(' ', ''), '(c + ε)(d + ε)'.replace(' ', '')) self.assertEqual( str(residual(parse_regular_expression('(a+ε)(b+ε)(c+ε)(d+ε)'), 'c')).replace(' ', ''), 'd + ε'.replace(' ', '')) self.assertEqual( str( residual(parse_regular_expression('(a+ε)(b+ε)(c+ε)(d+ε)'), 'ab')).replace(' ', ''), '(c + ε) (d + ε)'.replace(' ', ''))
def test___eq__(self): self.assertEqual(parse_regular_expression('a'), parse_regular_expression('a')) self.assertNotEqual(parse_regular_expression('a'), parse_regular_expression('b')) self.assertEqual(parse_regular_expression('ε'), parse_regular_expression('ε')) self.assertNotEqual(parse_regular_expression('ε'), parse_regular_expression('b')) self.assertEqual(parse_regular_expression('a + b'), parse_regular_expression('a + b')) self.assertNotEqual(parse_regular_expression('a'), parse_regular_expression('a + b')) self.assertNotEqual(parse_regular_expression('b + a'), parse_regular_expression('a + b')) self.assertEqual(parse_regular_expression('(a + b)* c'), parse_regular_expression('(a + b)* c')) self.assertNotEqual(parse_regular_expression('(a + b)* c'), parse_regular_expression('a* c')) self.assertNotEqual(parse_regular_expression('(a + b)* c'), parse_regular_expression('(a + b) c'))