def _parse_char(l: _Lexer, in_set=False) -> Char: c = l.next() if c == "\\": if l.peek() is None: raise ValueError( "Unexpectedly reached end of string. Expected escaped character." ) real_c = l.next() return Char(real_c) if in_set: return Char(c) if c in META_CHARACTERS: raise ValueError( f'Found "{c}" at {l.i-1}. The character is not valid in this context' ) return Char(c)
def test_char(): assert from_string("\n") == Char("\n") assert from_string("a") == Char("a") assert from_string("b") == Char("b") assert from_string("-") == Char("-") assert from_string("\\a") == Char("a") assert from_string("\\$") == Char("$") assert from_string("\\\\") == Char("\\")
def _split_labels( label1: TransitionLabel, states1: Set[State], label2: TransitionLabel, states2: Set[State], ) -> List[Tuple[TransitionLabel, Set[State]]]: # Either two partially overlapping Ranges or one Range that contains the Char if isinstance(label1, Range) and isinstance(label2, Range): label1, states1, label2, states2 = ((label1, states1, label2, states2) if (label1.start.s, label1.end.s) <= (label2.start.s, label2.end.s) else (label2, states2, label1, states1)) if label1.start == label2.start: assert label1.end.s < label2.end.s # ex: l1 = [a-f], l2 = [a-z] return [ (label1, states1 | states2), (Range(Char(chr(ord(label1.end.s) + 1)), label2.end), states2), ] assert label1.start.s < label2.start.s if label1.end.s < label2.end.s: # ex: l1 = [a-d], l2 = [c-f] return [ (Range(label1.start, Char(chr(ord(label2.start.s) - 1))), states1), (Range(label2.start, label1.end), states1 | states2), (Range(Char(chr(ord(label1.end.s) + 1)), label2.end), states2), ] if label1.end == label2.end: # ex: l1 = [a-z], l2 = [x-z] return [ (Range(label1.start, Char(chr(ord(label2.start.s) - 1))), states1), (label2, states1 | states2), ] assert label1.end.s > label2.end.s # ex: l1 = [a-f], l2 = [c-d] return [ (Range(label1.start, Char(chr(ord(label2.start.s) - 1))), states1), (label2, states1 | states2), (Range(Char(chr(ord(label2.end.s) + 1)), label1.end), states2), ] if isinstance(label1, Char): char_label = label1 char_states = states1 elif isinstance(label2, Char): char_label = label2 char_states = states2 else: assert False, "Bad logic" if isinstance(label1, Range): range_label = label1 range_states = states1 elif isinstance(label2, Range): range_label = label2 range_states = states2 else: assert False, "Bad logic" assert range_label.start.s <= char_label.s <= range_label.end.s labels: List[Tuple[TransitionLabel, Set[State]]] = [] if range_label.start != char_label: labels.append((Range(range_label.start, Char(chr(ord(char_label.s) - 1))), range_states)) labels.append((char_label, char_states | range_states)) if range_label.end != char_label: labels.append((Range(Char(chr(ord(char_label.s) + 1)), range_label.end), range_states)) return labels
def test_group_char(): assert from_string("(a)") == Group(Char("a"))
def test_maybe_char(): assert from_string("a?") == Maybe(Char("a"))
def test_plus_char(): assert from_string("a+") == Plus(Char("a"))
def test_kleene_star_char(): assert from_string("a*") == Kleene(Char("a"))
def test_or_chars(): assert from_string("a|b") == Or(Char("a"), Char("b")) assert from_string("ab|01") == Or(from_string("ab"), from_string("01"))
def test_concat_chars(): assert from_string("aa") == Concat(Char("a"), Char("a")) assert from_string("aba") == Concat(Char("a"), Concat(Char("b"), Char("a")))
def test_plus_char(): assert from_string("a+") == Plus(Char("a")) def test_maybe_char(): assert from_string("a?") == Maybe(Char("a")) def test_group_char(): assert from_string("(a)") == Group(Char("a")) @pytest.mark.parametrize( "regex,expected", [ ("[a]", PosSet([Char("a")])), ("[-]", PosSet([Char("-")])), ("[()]", PosSet([Char("("), Char(")")])), ("[a-z]", PosSet([Range(Char("a"), Char("z"))])), ("[a^]", PosSet([Char("a"), Char("^")])), ("[[]", PosSet([Char("[")])), ("[\\^]", PosSet([Char("^")])), ("[\\]]", PosSet([Char("]")])), ], ) def test_posset(regex, expected): assert from_string(regex) == expected @pytest.mark.parametrize( "regex,expected",