def test__linearize_regular_expression(self):
     re1 = parse_regular_expression('a (a + b + ε)* a')
     lin1, idx1 = _linearize_regular_expression(re1)
     self.assertEqual(idx1, 4)
     self.assertEqual(len(lin1.alphabet()), 4)
     self.assertEqual(
         repr(lin1).replace(' ', ''),
         'CONCAT(CONCAT(a0, STAR(PLUS(PLUS(a1, b2), ε))), a3)'
         .replace(' ', '')
     )
     re2 = parse_regular_expression('a a a a a a a')
     lin2, idx2 = _linearize_regular_expression(re2)
     self.assertEqual(idx2, 7)
     self.assertEqual(len(lin2.alphabet()), 7)
Example #2
0
def residual_automaton(
        regular_expression: RegularExpression) -> FiniteAutomaton:
    """From a regular expression, constructs an equivalent finite automaton
    using the residuals method
    """

    initial_state = _state_identifier(regular_expression)
    accepting_states = []
    alphabet = regular_expression.alphabet()
    transitions: Dict[State, List[Tuple[Letter, State]]] = {}
    unexplored_states = [initial_state]

    while unexplored_states:
        state = unexplored_states.pop(0)
        state_re = parse_regular_expression(state)
        if state_re.accepts_epsilon():
            accepting_states.append(state)
        transitions[state] = []
        for letter in alphabet:
            next_residual = residual(state_re, letter)
            next_state = _state_identifier(next_residual)
            if next_residual is not None:
                transitions[state].append((letter, next_state))
                # Equivalent re can have different string representations...
                if next_state not in transitions:
                    transitions[next_state] = []
                    unexplored_states.append(next_state)

    return FiniteAutomaton(alphabet=alphabet,
                           states=set(transitions.keys()),
                           initial_states={initial_state},
                           accepting_states=set(accepting_states),
                           transitions=transitions)
 def test_thompson_letter(self):
     alphabet = {'a', 'b'}
     automaton = thompson(parse_regular_expression('a'), alphabet)
     automaton.draw(name='ThompsonTest.test_thompson_letter').render(
         directory='out/', format='pdf')
     self.assertTrue(automaton.read('a'))
     self.assertFalse(automaton.read('b'))
     self.assertFalse(automaton.read(''))
    def test_glushkov(self):
        aut1 = glushkov(parse_regular_expression('a b'))
        aut1.draw(
            name='GlushkovTest.test_glushkov.1'
        ).render(directory='out/', format='pdf')
        self.assertTrue(aut1.read("ab"))
        self.assertFalse(aut1.read(""))
        self.assertFalse(aut1.read("a"))
        self.assertFalse(aut1.read("b"))
        self.assertFalse(aut1.read("ba"))
        self.assertFalse(aut1.read("aba"))

        aut2 = glushkov(parse_regular_expression('(a b)* (c + ε) d'))
        aut2.draw(
            name='GlushkovTest.test_glushkov.2'
        ).render(directory='out/', format='pdf')
        self.assertTrue(aut2.read("d"))
        self.assertTrue(aut2.read("abababd"))
        self.assertTrue(aut2.read("ababcd"))
        self.assertFalse(aut2.read(""))
        self.assertFalse(aut2.read("ab"))
        self.assertFalse(aut2.read("abccd"))
        self.assertFalse(aut2.read("ccd"))

        aut3 = glushkov(parse_regular_expression('(a (ab)*)*'))
        aut3.draw(
            name='GlushkovTest.test_glushkov.3'
        ).render(directory='out/', format='pdf')
        self.assertTrue(aut3.read(""))
        self.assertTrue(aut3.read("aaaaaa"))
        self.assertTrue(aut3.read("aababaaaabab"))
        self.assertFalse(aut3.read("b"))
        self.assertFalse(aut3.read("bab"))

        aut4 = glushkov(parse_regular_expression('(a (b + bbabb)* c)*'))
        aut4.draw(
            name='GlushkovTest.test_glushkov.4'
        ).render(directory='out/', format='pdf')
        self.assertTrue(aut4.read(""))
        self.assertTrue(aut4.read("ac"))
        self.assertTrue(aut4.read("acac"))
        self.assertTrue(aut4.read("abbcabbabbc"))
        self.assertFalse(aut4.read("aac"))
        self.assertFalse(aut4.read("abbabc"))
 def test_accepts_epsilon(self):
     self.assertTrue(parse_regular_expression('ε').accepts_epsilon())
     self.assertTrue(parse_regular_expression('a*').accepts_epsilon())
     self.assertTrue(parse_regular_expression('a + ε').accepts_epsilon())
     self.assertTrue(parse_regular_expression('(a + b)*').accepts_epsilon())
     self.assertFalse(parse_regular_expression('a').accepts_epsilon())
     self.assertFalse(parse_regular_expression('a* b').accepts_epsilon())
 def test_successors(self):
     re1 = parse_regular_expression('a b')
     self.assertEqual(re1.successors('a'), {'b'})
     self.assertEqual(re1.successors('b'), set())
     self.assertEqual(re1.successors('c'), set())
     re2 = parse_regular_expression('(a + b)*')
     self.assertEqual(re2.successors('a'), {'a', 'b'})
     self.assertEqual(re2.successors('b'), {'a', 'b'})
     self.assertEqual(re2.successors('c'), set())
     re3 = parse_regular_expression('a b a c')
     self.assertEqual(re3.successors('a'), {'b', 'c'})
     self.assertEqual(re3.successors('b'), {'a'})
     self.assertEqual(re3.successors('c'), set())
     re4 = parse_regular_expression('(a b)* (c + ε) d')
     self.assertEqual(re4.successors('a'), {'b'})
     self.assertEqual(re4.successors('b'), {'a', 'c', 'd'})
     self.assertEqual(re4.successors('c'), {'d'})
     self.assertEqual(re4.successors('d'), set())
     re5 = parse_regular_expression('(a+ε)(b+ε)(c+ε)(d+ε)')
     self.assertEqual(re5.successors('a'), {'b', 'c', 'd'})
     self.assertEqual(re5.successors('b'), {'c', 'd'})
     self.assertEqual(re5.successors('c'), {'d'})
     self.assertEqual(re5.successors('d'), set())
     re6 = parse_regular_expression('(a (bc)*)*')
     self.assertEqual(re6.successors('a'), {'a', 'b'})
     self.assertEqual(re6.successors('b'), {'c'})
     self.assertEqual(re6.successors('c'), {'a', 'b'})
 def test_alphabet(self):
     self.assertEqual(parse_regular_expression('ε').alphabet(), set())
     self.assertEqual(parse_regular_expression('a*').alphabet(), {'a'})
     self.assertEqual(parse_regular_expression('a + ε').alphabet(), {'a'})
     self.assertEqual(
         parse_regular_expression('(a + b)*').alphabet(), {'a', 'b'})
     self.assertEqual(parse_regular_expression('a').alphabet(), {'a'})
     self.assertEqual(
         parse_regular_expression('a* b').alphabet(), {'a', 'b'})
 def test_brozozwski_minimize(self):
     alphabet = {'a', 'b', 'c', 'd'}
     automaton1 = brozozwski_minimize(
         thompson(parse_regular_expression('abcd'), alphabet)
     )
     automaton1.draw(
         name='BrozozwskiMinimizeTest.test_brozozwski_minimize.automaton1'
     ).render(directory='out/', format='pdf')
     self.assertEqual(len(automaton1.states), 5)
     self.assertTrue(automaton1.read('abcd'))
     self.assertFalse(automaton1.read('a'))
     self.assertFalse(automaton1.read('ab'))
     self.assertFalse(automaton1.read('abc'))
     self.assertFalse(automaton1.read('bcda'))
     self.assertFalse(automaton1.read('dcbaa'))
 def test_initial_letters(self):
     self.assertEqual(
         parse_regular_expression("ε").initial_letters(), set())
     self.assertEqual(
         parse_regular_expression("a").initial_letters(), {'a'})
     self.assertEqual(
         parse_regular_expression("a b").initial_letters(), {'a'})
     self.assertEqual(
         parse_regular_expression("a + b").initial_letters(), {'a', 'b'})
     self.assertEqual(
         parse_regular_expression("a* b").initial_letters(), {'a', 'b'})
     self.assertEqual(
         parse_regular_expression("a b*").initial_letters(), {'a'})
     self.assertEqual(
         parse_regular_expression("(a + b)* (c + ε)").initial_letters(),
         {'a', 'b', 'c'})
     self.assertEqual(
         parse_regular_expression("(c + ε) (a + b)*").initial_letters(),
         {'a', 'b', 'c'})
 def test_parse(self):
     problems = [("ε", "ε"), ("a", "a"), ("a b", "CONCAT(a, b)"),
                 ("ab", "CONCAT(a, b)"), ("ε b", "b"), ("b ε", "b"),
                 ("ε b ε", "b"), ("ε ε ε", "ε"), ("b ε ε", "b"),
                 ("a*", "STAR(a)"), ("a**", "STAR(a)"),
                 ("a + b", "PLUS(a, b)"), ("(a + b)", "PLUS(a, b)"),
                 ("a + ab", "PLUS(a, CONCAT(a, b))"),
                 ("ba + ab", "PLUS(CONCAT(b, a), CONCAT(a, b))"),
                 ("a (a + ε) b", "CONCAT(CONCAT(a, PLUS(a, ε)), b)"),
                 ("(a + b)*", "STAR(PLUS(a, b))"),
                 ("(a + b*)aa b",
                  "CONCAT(CONCAT(CONCAT(PLUS(a, STAR(b)), a), a), b)"),
                 ("a (a + b*)*", "CONCAT(a, STAR(PLUS(a, STAR(b))))"),
                 ("((a))", "a")]
     for problem, solution in problems:
         self.assertEqual(
             repr(parse_regular_expression(problem)).replace(" ", ""),
             solution.replace(" ", ""),
             f'Failed regular expression: {problem}')
Example #11
0
def brozozwski(automaton: FiniteAutomaton) -> RegularExpression:
    """Implementation of Brozozwski's algorithm for regular expressions
    """

    q_init = 'init'
    q_acc = 'acc'
    table: Dict[State, Dict[State, str]] = {q_acc: {}, q_init: {}}
    for state in automaton.initial_states:
        table[q_init][state] = 'ε'
    for state in automaton.states:
        table[state] = {}
        for letter, next_state in automaton.transitions.get(state, []):
            if next_state in table[state]:
                table[state][next_state] += '+' + letter
            else:
                table[state][next_state] = letter
    for state in automaton.accepting_states:
        table[state][q_acc] = 'ε'

    states_to_remove = list(automaton.states)
    states_to_remove.sort()
    while states_to_remove:
        q_i = states_to_remove.pop()
        for q_k in states_to_remove + [q_init, q_acc]:
            if q_k == q_i:
                continue
            for q_l in states_to_remove + [q_init, q_acc]:
                if q_l == q_i:
                    continue
                e_kl = table[q_k].get(q_l, '')
                e_ki = table[q_k].get(q_i, '')
                e_ii = table[q_i].get(q_i, '')
                e_il = table[q_i].get(q_l, '')
                table[q_k][q_l] = _plus(
                    e_kl, _concat(_concat(e_ki, _star(e_ii)), e_il))

    return parse_regular_expression(table[q_init].get(q_acc, ''))
Example #12
0
    def test_residual_automaton(self):
        automaton1 = residual_automaton(parse_regular_expression('a'))
        automaton1.draw(
            name='ResidualTest.test_residual_automaton.automaton1').render(
                directory='out/', format='pdf')
        self.assertTrue(automaton1.read('a'))
        self.assertFalse(automaton1.read(''))
        self.assertFalse(automaton1.read('aa'))
        self.assertFalse(automaton1.read('b'))
        self.assertFalse(automaton1.read('ab'))

        automaton2 = residual_automaton(parse_regular_expression('ab'))
        automaton2.draw(
            name='ResidualTest.test_residual_automaton.automaton2').render(
                directory='out/', format='pdf')
        self.assertTrue(automaton2.read('ab'))
        self.assertFalse(automaton2.read(''))
        self.assertFalse(automaton2.read('a'))
        self.assertFalse(automaton2.read('aa'))
        self.assertFalse(automaton2.read('b'))
        self.assertFalse(automaton2.read('aba'))

        automaton3 = residual_automaton(parse_regular_expression('a*'))
        automaton3.draw(
            name='ResidualTest.test_residual_automaton.automaton3').render(
                directory='out/', format='pdf')
        self.assertTrue(automaton3.read(''))
        self.assertTrue(automaton3.read('a'))
        self.assertTrue(automaton3.read('aa'))
        self.assertTrue(automaton3.read('aaa'))
        self.assertFalse(automaton3.read('b'))
        self.assertFalse(automaton3.read('aaaab'))

        automaton4 = residual_automaton(parse_regular_expression('a + b'))
        automaton4.draw(
            name='ResidualTest.test_residual_automaton.automaton4').render(
                directory='out/', format='pdf')
        self.assertTrue(automaton4.read('a'))
        self.assertTrue(automaton4.read('b'))
        self.assertFalse(automaton4.read(''))
        self.assertFalse(automaton4.read('ab'))
        self.assertFalse(automaton4.read('ba'))

        automaton5 = residual_automaton(
            parse_regular_expression('(ab + c)* d'))
        automaton5.draw(
            name='ResidualTest.test_residual_automaton.automaton5').render(
                directory='out/', format='pdf')
        self.assertTrue(automaton5.read('d'))
        self.assertTrue(automaton5.read('abd'))
        self.assertTrue(automaton5.read('cd'))
        self.assertTrue(automaton5.read('abcd'))
        self.assertTrue(automaton5.read('ababd'))
        self.assertFalse(automaton5.read(''))
        self.assertFalse(automaton5.read('ab'))
        self.assertFalse(automaton5.read('ad'))
        self.assertFalse(automaton5.read('abad'))

        automaton6 = residual_automaton(parse_regular_expression('(a b b*)*'))
        automaton6.draw(
            name='ResidualTest.test_residual_automaton.automaton6').render(
                directory='out/', format='pdf')
        self.assertTrue(automaton6.read(''))
        self.assertTrue(automaton6.read('ab'))
        self.assertTrue(automaton6.read('abb'))
        self.assertTrue(automaton6.read('abab'))
        self.assertTrue(automaton6.read('abbab'))
        self.assertFalse(automaton6.read('a'))
        self.assertFalse(automaton6.read('b'))
        self.assertFalse(automaton6.read('c'))
        self.assertFalse(automaton6.read('abababababaabababab'))
Example #13
0
    def test_residual(self):

        self.assertIsNone(residual(None, ''))
        self.assertIsNone(residual(None, 'a'))

        self.assertEqual(str(residual(parse_regular_expression('a'), 'a')),
                         'ε')
        self.assertIsNone(residual(parse_regular_expression('a'), 'b'))

        self.assertEqual(str(residual(parse_regular_expression('a b'), 'a')),
                         'b')
        self.assertIsNone(residual(parse_regular_expression('a b'), 'b'))

        self.assertEqual(str(residual(parse_regular_expression('a + b'), 'a')),
                         'ε')
        self.assertEqual(str(residual(parse_regular_expression('a + b'), 'b')),
                         'ε')

        self.assertEqual(
            str(residual(parse_regular_expression('(a + b) c'), 'a')), 'c')
        self.assertEqual(
            str(residual(parse_regular_expression('(a + b) c'), 'b')), 'c')
        self.assertIsNone(residual(parse_regular_expression('(a + b) c'), 'c'))

        self.assertEqual(
            str(residual(parse_regular_expression('(aa + bb) cc'),
                         'a')).replace(' ', ''), 'acc')
        self.assertEqual(
            str(residual(parse_regular_expression('(aa + bb) cc'),
                         'b')).replace(' ', ''), 'bcc')
        self.assertIsNone(
            residual(parse_regular_expression('(aa + bb) cc'), 'c'))
        self.assertEqual(
            str(residual(parse_regular_expression('(aa + ab) cc'),
                         'a')).replace(' ', ''), '(a+b)cc')

        self.assertEqual(
            str(residual(parse_regular_expression('(a + b)* a'),
                         'a')).replace(' ', ''),
            '(a + b)* a + ε'.replace(' ', ''))
        self.assertEqual(
            str(residual(parse_regular_expression('(a + b)* a'),
                         'b')).replace(' ', ''), '(a + b)* a'.replace(' ', ''))
        self.assertIsNone(residual(parse_regular_expression('(a + b)* a'),
                                   'c'))

        self.assertEqual(
            str(residual(parse_regular_expression('(abc)* a'),
                         'a')).replace(' ', ''),
            'b c (abc)* a + ε'.replace(' ', ''))

        self.assertEqual(
            str(residual(parse_regular_expression('(abc)* a'),
                         'ab')).replace(' ', ''),
            'c (abc)* a'.replace(' ', ''))

        self.assertEqual(
            str(residual(parse_regular_expression('(a+ε)(b+ε)(c+ε)(d+ε)'),
                         'a')).replace(' ', ''),
            '(b + ε)(c + ε)(d + ε)'.replace(' ', ''))
        self.assertEqual(
            str(residual(parse_regular_expression('(a+ε)(b+ε)(c+ε)(d+ε)'),
                         'b')).replace(' ', ''),
            '(c + ε)(d + ε)'.replace(' ', ''))
        self.assertEqual(
            str(residual(parse_regular_expression('(a+ε)(b+ε)(c+ε)(d+ε)'),
                         'c')).replace(' ', ''), 'd + ε'.replace(' ', ''))
        self.assertEqual(
            str(
                residual(parse_regular_expression('(a+ε)(b+ε)(c+ε)(d+ε)'),
                         'ab')).replace(' ', ''),
            '(c + ε) (d + ε)'.replace(' ', ''))
 def test___eq__(self):
     self.assertEqual(parse_regular_expression('a'),
                      parse_regular_expression('a'))
     self.assertNotEqual(parse_regular_expression('a'),
                         parse_regular_expression('b'))
     self.assertEqual(parse_regular_expression('ε'),
                      parse_regular_expression('ε'))
     self.assertNotEqual(parse_regular_expression('ε'),
                         parse_regular_expression('b'))
     self.assertEqual(parse_regular_expression('a + b'),
                      parse_regular_expression('a + b'))
     self.assertNotEqual(parse_regular_expression('a'),
                         parse_regular_expression('a + b'))
     self.assertNotEqual(parse_regular_expression('b + a'),
                         parse_regular_expression('a + b'))
     self.assertEqual(parse_regular_expression('(a + b)* c'),
                      parse_regular_expression('(a + b)* c'))
     self.assertNotEqual(parse_regular_expression('(a + b)* c'),
                         parse_regular_expression('a* c'))
     self.assertNotEqual(parse_regular_expression('(a + b)* c'),
                         parse_regular_expression('(a + b) c'))