Beispiel #1
0
 def test_tokenize_complex(self):
     self.assertListEqual(
         tokenize("^ENS(NEWS|)SSSEEN(WNSE|)EE(|SWEN)NNN$"),
         [
             "ENS",
             "(",
             "NEWS",
             "|",
             "",
             ")",
             "SSSEEN",
             "(",
             "WNSE",
             "|",
             "",
             ")",
             "EE",
             "(",
             "",
             "|",
             "SWEN",
             ")",
             "NNN",
         ],
     )
Beispiel #2
0
def parse(regex: str):
    tokens = tokenize(regex)
    stack: List[Node] = [Node()]
    for token in tokens:
        if token == "(":
            stack.append(Node())
        elif token == ")":
            finished_node = stack.pop()
            stack[len(stack) - 1].children.append(finished_node)
        elif token == "|":
            # before_or = stack[len(stack) - 1].children.pop()
            stack[len(stack) - 1].children.append(token)
            # stack[len(stack) - 1].children.append(before_or)
        else:
            stack[len(stack) - 1].children.append(token)
    return stack.pop()
Beispiel #3
0
 def test_tokenize_partly_empty_bracket(self):
     self.assertListEqual(tokenize("^ENWW(NE|)$"), ["ENWW", "(", "NE", "|", "", ")"])
Beispiel #4
0
 def test_tokenize_brackets(self):
     self.assertListEqual(tokenize("^ENWW(NE|EW)$"), ["ENWW", "(", "NE", "|", "EW", ")"])
Beispiel #5
0
 def test_tokenize_simple(self):
     self.assertListEqual(tokenize("^ENWW$"), ["ENWW"])