Example #1
0
 def test_matches_complex_1(self):
     nfa = from_ast(parse_regex('a?b*(c|d)'))
     self.assertTrue(nfa.matches('abc'))
     self.assertTrue(nfa.matches('abd'))
     self.assertTrue(nfa.matches('bc'))
     self.assertTrue(nfa.matches('bd'))
     self.assertTrue(nfa.matches('c'))
     self.assertTrue(nfa.matches('d'))
     self.assertTrue(nfa.matches('bbbc'))
     self.assertTrue(nfa.matches('bbbd'))
Example #2
0
 def test_matches_complex_2(self):
     nfa = from_ast(parse_regex('(ab)?(cde)*'))
     self.assertTrue(nfa.matches(''))
     self.assertTrue(nfa.matches('ab'))
     self.assertTrue(nfa.matches('cde'))
     self.assertTrue(nfa.matches('abcde'))
     self.assertTrue(nfa.matches('abcdecde'))
     self.assertTrue(nfa.matches('abcdecdecde'))
     self.assertFalse(nfa.matches('a'))
     self.assertFalse(nfa.matches('c'))
     self.assertFalse(nfa.matches('cd'))
     self.assertFalse(nfa.matches('de'))
    def test_to_regex(self):
        REGEX_TO_TEST = 1000

        if PRINT_TESTS:
            print('-----TEST: parse_regex, to_regex-----')
        for _ in range(REGEX_TO_TEST):
            ast = generators.ast()
            regex = ast.to_regex()
            try:
                # Can't test regex itself because the first parse will make optimisations
                regex_round_trip = parse_regex(regex).to_regex()
                regex_double_round_trip = parse_regex(
                    regex_round_trip).to_regex()
            except:
                # Keep in mind it could have failed on the first or second round trip
                print("Can't parse: " + regex)
                print('Came from ast: ' + constructor_str(ast))
                raise

            if PRINT_TESTS:
                print(regex_round_trip + ' against ' + regex_double_round_trip)
            self.assertEqual(regex_round_trip, regex_double_round_trip)
Example #4
0
    def test_parser(self):
        self.assertEqual(parse_regex(''), Epsilon())

        self.assertEqual(parse_regex('abcd'), char_sequence_from_str('abcd'),
                         'Simple')

        self.assertEqual(
            parse_regex('ab|cd'),
            Or(char_sequence_from_str('ab'), char_sequence_from_str('cd')),
            'Or')

        self.assertEqual(
            parse_regex('a(b|c)d'),
            sequence_tree_from_regexes(
                [Char('a'), Or(Char('b'), Char('c')),
                 Char('d')]), 'Or with precedence')

        self.assertEqual(
            parse_regex('a(b*|c)d'),
            sequence_tree_from_regexes(
                [Char('a'),
                 Or(ZeroOrMore(Char('b')), Char('c')),
                 Char('d')]), 'Kleene star')

        self.assertEqual(
            parse_regex('a(b+|c)d'),
            sequence_tree_from_regexes([
                Char('a'),
                Or(Sequence(Char('b'), ZeroOrMore(Char('b'))), Char('c')),
                Char('d')
            ]), 'Kleene plus')

        self.assertEqual(
            parse_regex('a(b*|.)d'),
            sequence_tree_from_regexes(
                [Char('a'),
                 Or(ZeroOrMore(Char('b')), AnyChar()),
                 Char('d')]), 'Any character')

        self.assertEqual(
            parse_regex('a(b*|\.)d'),
            sequence_tree_from_regexes(
                [Char('a'),
                 Or(ZeroOrMore(Char('b')), Char('.')),
                 Char('d')]), 'Escaping: period character')

        self.assertEqual(parse_regex('\+-_\.\(\)'),
                         char_sequence_from_str('+-_.()'), 'Escaping: lots')

        self.assertEqual(parse_regex('a{15}'),
                         sequence_tree_from_regexes([Char('a')] * 15),
                         'Parse {15} quantifier')

        self.assertEqual(
            parse_regex('a{15,}'),
            sequence_tree_from_regexes([Char('a')] * 15 +
                                       [ZeroOrMore(Char('a'))]),
            'Parse {15,} quantifier')

        self.assertEqual(
            parse_regex('a{15,17}'),
            sequence_tree_from_regexes([Char('a')] * 15 +
                                       [Optional(Char('a'))] * 2),
            'Parse {15,17} quantifier')
 def test_derivative_matches(self):
     ast = parse_regex('o|oa')
     for i in range(100):
         assert ast.matches('o'), 'derivative.matches attempt: {0}'.format(i)
 def test_full_process(self):
     for i in range(100):
         ast = parse_regex('o|oa')
         nfa_ = nfa.from_ast(ast)
         dfa_ = dfa.from_nfa(nfa_)
         assert dfa_.matches('o'), 'full_process attempt: {0}'.format(i)
 def test_dfa_from_nfa(self):
     ast = parse_regex('o|oa')
     nfa_ = nfa.from_ast(ast)
     for i in range(100):
         dfa_ = dfa.from_nfa(nfa_)
         assert dfa_.matches('o'), 'dfa.from_nfa attempt: {0}'.format(i)
 def test_nfa_from_ast(self):
     ast = parse_regex('o|oa')
     for i in range(100):
         nfa_ = nfa.from_ast(ast)
         assert nfa_.matches('o'), 'nfa.from_ast attempt: {0}'.format(i)
 def test_parse_regex(self):
     for i in range(100):
         ast = parse_regex('o|oa')
         assert ast.matches('o'), 'parse_regex attempt: {0}'.format(i)
Example #10
0
 def __init__(self, unittest, regex):
     self.regex = regex
     self.ast = parse_regex(regex)
     self.nfa = nfa.from_ast(self.ast)
     self.dfa = dfa.from_nfa(self.nfa)
     self.unittest = unittest
Example #11
0
#!/usr/bin/env python3
import parser
import sys
import nfa
import dfa
from utils import constructor_str

if len(sys.argv) != 3:
    print('Usage: main.py "<regular expression>" "<string to match>"')
    sys.exit(1)
regex_str = sys.argv[1]
match_str = sys.argv[2]

print('InputRegex:  ' + regex_str)
ast = parser.parse_regex(regex_str)
print('ParsedRegex: ' + ast.to_regex())
print('AST: ' + constructor_str(ast))
print('English: ' + ast.to_str_english())
print('Full match (derivative): ' + str(ast.matches(match_str)))
nfa_ = nfa.from_ast(ast)
print('Full match (NFA): ' + str(nfa_.matches(match_str)))
dfa_ = dfa.from_nfa(nfa_)
print('Full match (DFA): ' + str(dfa_.matches(match_str)))
print('Subsets matched: ' + str(dfa_.find_subset_matches(match_str)))