def compile_regex(regex): tokens = lex.lexical_analysis(regex) ast = syntax.syntactic_analysis(tokens) nfa = automata.NFA.from_ast(ast) nfa.epsilon_elimination() dfa = automata.DFA.from_nfa(nfa) return dfa
def test_chars(self): regex = 'ab{' actual_tokens = lex.lexical_analysis(regex) expected_tokens = [ lex.CharToken('a'), lex.CharToken('b'), lex.CharToken('{'), ] self.check_tokens(actual_tokens, expected_tokens)
def test_brackets(self): regex = 'a[a-z].' actual_tokens = lex.lexical_analysis(regex) expected_tokens = [ lex.CharToken('a'), lex.CharToken('[a-z]'), lex.CharToken('.'), ] self.check_tokens(actual_tokens, expected_tokens)
def test_quantification(self): regex = '.*' actual_tokens = lex.lexical_analysis(regex) actual_tree = syntax.syntactic_analysis(actual_tokens) expected_tree = syntax.QuantificationNode( lb=0, ub=float('inf'), operand=syntax.CharNode({chr(x) for x in range(128)})) self.check_tree(actual_tree, expected_tree)
def test_concat(self): regex = '[a-c]b.' actual_tokens = lex.lexical_analysis(regex) actual_tree = syntax.syntactic_analysis(actual_tokens) expected_tree = syntax.ConcatenationNode([ syntax.CharNode({'a', 'b', 'c'}), syntax.CharNode({'b'}), syntax.CharNode({chr(x) for x in range(128)}), ]) self.check_tree(actual_tree, expected_tree)
def test_union(self): regex = '(a|b)|c+' actual_tokens = lex.lexical_analysis(regex) actual_tree = syntax.syntactic_analysis(actual_tokens) expected_tree = \ syntax.UnionNode([ syntax.UnionNode([ syntax.CharNode({'a'}), syntax.CharNode({'b'}) ]), syntax.QuantificationNode(lb=1, ub=float('inf'), operand=\ syntax.CharNode({'c'}))]) self.check_tree(actual_tree, expected_tree)
def test_parens(self): regex = '([a-z\]]{1,2}b.)+\+' actual_tokens = lex.lexical_analysis(regex) expected_tokens = [ lex.OpenParToken('('), lex.CharToken('[a-z\]]'), lex.QuantificationToken('{1,2}'), lex.CharToken('b'), lex.CharToken('.'), lex.CloseParToken(')'), lex.QuantificationToken('+'), lex.CharToken('+'), ] self.check_tokens(actual_tokens, expected_tokens)
def test_grouping(self): regex = '((ab?)c)+' actual_tokens = lex.lexical_analysis(regex) actual_tree = syntax.syntactic_analysis(actual_tokens) expected_tree = \ syntax.QuantificationNode( lb=1, ub=float('inf'), operand=syntax.ConcatenationNode([ syntax.ConcatenationNode([ syntax.CharNode({'a'}), syntax.QuantificationNode(lb=0, ub=1, operand=\ syntax.CharNode({'b'}))]), syntax.CharNode({'c'})]), ) self.check_tree(actual_tree, expected_tree)
def test_empty(self): regex = '' actual_tokens = lex.lexical_analysis(regex) expected_tokens = [] self.check_tokens(actual_tokens, expected_tokens)
def test_empty(self): regex = '' actual_tokens = lex.lexical_analysis(regex) actual_tree = syntax.syntactic_analysis(actual_tokens) expected_tree = syntax.EpsilonNode() self.check_tree(actual_tree, expected_tree)
def __init__(self): self.text = "" self.lex = lex.lexical_analysis() self.input = [] self.parse = [0] self.table = []