def test_tokenize_complex(self): self.assertListEqual( tokenize("^ENS(NEWS|)SSSEEN(WNSE|)EE(|SWEN)NNN$"), [ "ENS", "(", "NEWS", "|", "", ")", "SSSEEN", "(", "WNSE", "|", "", ")", "EE", "(", "", "|", "SWEN", ")", "NNN", ], )
def parse(regex: str): tokens = tokenize(regex) stack: List[Node] = [Node()] for token in tokens: if token == "(": stack.append(Node()) elif token == ")": finished_node = stack.pop() stack[len(stack) - 1].children.append(finished_node) elif token == "|": # before_or = stack[len(stack) - 1].children.pop() stack[len(stack) - 1].children.append(token) # stack[len(stack) - 1].children.append(before_or) else: stack[len(stack) - 1].children.append(token) return stack.pop()
def test_tokenize_partly_empty_bracket(self): self.assertListEqual(tokenize("^ENWW(NE|)$"), ["ENWW", "(", "NE", "|", "", ")"])
def test_tokenize_brackets(self): self.assertListEqual(tokenize("^ENWW(NE|EW)$"), ["ENWW", "(", "NE", "|", "EW", ")"])
def test_tokenize_simple(self): self.assertListEqual(tokenize("^ENWW$"), ["ENWW"])