def test_matching_str(self): REGEX_TO_TEST = 25 MATCHING_STR_TO_TEST = 100 for _ in range(REGEX_TO_TEST): ast = generators.ast() nfa_ = nfa.from_ast(ast) dfa_ = dfa.from_nfa(nfa_) if PRINT_TESTS: print('-----TEST: matching_str-----') for _ in range(MATCHING_STR_TO_TEST): matching_str = generators.matching_str(ast) if PRINT_TESTS: print(ast.to_regex() + ' against ' + matching_str) self.assertTrue( ast.matches(matching_str), "Regex: '{0}' and String: '{1}' ".format( ast.to_regex(), matching_str) + "were generated as a matching pair, but they were determined not to match by the derivative method." ) self.assertTrue( nfa_.matches(matching_str), "Regex: '{0}' and String: '{1}' ".format( ast.to_regex(), matching_str) + "were generated as a matching pair, but they were determined not to match by the NFA method." ) self.assertTrue( dfa_.matches(matching_str), "Regex: '{0}' and String: '{1}' ".format( ast.to_regex(), matching_str) + "were generated as a matching pair, but they were determined not to match by the DFA method." )
def compile(regex): i = 0 argument = None prosite_nfa = nfa.NFA() create_matcher_func = None append_to_state = [prosite_nfa.start_state] while i < len(regex) and regex[i] is not '.': while regex[i] == '-': i += 1 if regex[i].isupper(): i, argument = parse_uppercase(regex, i) create_matcher_func = prosite_nfa.create_word_matcher elif regex[i] == 'x': i, argument = parse_any(regex, i) create_matcher_func = prosite_nfa.create_any_matcher elif regex[i] == '[': i, argument = parse_alternative(regex, i) create_matcher_func = prosite_nfa.create_any_matcher elif regex[i] == '{': i, argument = parse_negation(regex, i) create_matcher_func = prosite_nfa.create_any_matcher # else: # Error - not known symbol # return None i += 1 rep = range(1, 2) if i < len(regex) and regex[i] == '(': i, repetition_range = parse_repetition(regex, i) begin = int(repetition_range[0]) end = int(repetition_range[-1]) + 1 rep = range(begin, end) i += 1 append_to_state = prosite_nfa.create_repetition_matcher_experimental(rep, create_matcher_func, argument, append_to_state) for state in append_to_state: prosite_nfa.accept.add(state) return dfa.minimize(dfa.from_nfa(prosite_nfa))
def test_full_process(self): for i in range(100): ast = parse_regex('o|oa') nfa_ = nfa.from_ast(ast) dfa_ = dfa.from_nfa(nfa_) assert dfa_.matches('o'), 'full_process attempt: {0}'.format(i)
def test_dfa_from_nfa(self): ast = parse_regex('o|oa') nfa_ = nfa.from_ast(ast) for i in range(100): dfa_ = dfa.from_nfa(nfa_) assert dfa_.matches('o'), 'dfa.from_nfa attempt: {0}'.format(i)
def __init__(self, unittest, regex): self.regex = regex self.ast = parse_regex(regex) self.nfa = nfa.from_ast(self.ast) self.dfa = dfa.from_nfa(self.nfa) self.unittest = unittest
#!/usr/bin/env python3 import parser import sys import nfa import dfa from utils import constructor_str if len(sys.argv) != 3: print('Usage: main.py "<regular expression>" "<string to match>"') sys.exit(1) regex_str = sys.argv[1] match_str = sys.argv[2] print('InputRegex: ' + regex_str) ast = parser.parse_regex(regex_str) print('ParsedRegex: ' + ast.to_regex()) print('AST: ' + constructor_str(ast)) print('English: ' + ast.to_str_english()) print('Full match (derivative): ' + str(ast.matches(match_str))) nfa_ = nfa.from_ast(ast) print('Full match (NFA): ' + str(nfa_.matches(match_str))) dfa_ = dfa.from_nfa(nfa_) print('Full match (DFA): ' + str(dfa_.matches(match_str))) print('Subsets matched: ' + str(dfa_.find_subset_matches(match_str)))