def test_getlower(self): import _sre self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
def test_getlower(self): import _sre assert _sre.getlower(ord('A'), 0) == ord('a') assert _sre.getlower(ord('A'), re.LOCALE) == ord('a') assert _sre.getlower(ord('A'), re.UNICODE) == ord('a') assert re.match("abc", "ABC", re.I).group(0) == "ABC" assert re.match("abc", u"ABC", re.I).group(0) == "ABC"
def _compile(code, pattern, flags): # internal: compile a (sub)pattern emit = code.append for op, av in pattern: if op in (LITERAL, NOT_LITERAL): if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) emit(_sre.getlower(av, flags)) else: emit(OPCODES[op]) emit(av) elif op is IN: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) def fixup(literal, flags=flags): return _sre.getlower(literal, flags) else: emit(OPCODES[op]) fixup = lambda x: x skip = len(code); emit(0) _compile_charset(av, flags, code, fixup) code[skip] = len(code) - skip elif op is ANY: if flags & SRE_FLAG_DOTALL: emit(OPCODES[ANY_ALL]) else: emit(OPCODES[ANY]) elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): if flags & SRE_FLAG_TEMPLATE: raise error, "internal: unsupported template operator" emit(OPCODES[REPEAT]) skip = len(code); emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip elif _simple(av) and op != REPEAT: if op == MAX_REPEAT: emit(OPCODES[REPEAT_ONE]) else: emit(OPCODES[MIN_REPEAT_ONE]) skip = len(code); emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip else: emit(OPCODES[REPEAT]) skip = len(code); emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = len(code) - skip if op == MAX_REPEAT: emit(OPCODES[MAX_UNTIL]) else: emit(OPCODES[MIN_UNTIL]) elif op is SUBPATTERN: if av[0]: emit(OPCODES[MARK]) emit((av[0]-1)*2) # _compile_info(code, av[1], flags) _compile(code, av[1], flags) if av[0]: emit(OPCODES[MARK]) emit((av[0]-1)*2+1) elif op in (SUCCESS, FAILURE): emit(OPCODES[op]) elif op in (ASSERT, ASSERT_NOT): emit(OPCODES[op]) skip = len(code); emit(0) if av[0] >= 0: emit(0) # look ahead else: lo, hi = av[1].getwidth() if lo != hi: raise error, "look-behind requires fixed-width pattern" emit(lo) # look behind _compile(code, av[1], flags) emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip elif op is CALL: emit(OPCODES[op]) skip = len(code); emit(0) _compile(code, av, flags) emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip elif op is AT: emit(OPCODES[op]) if flags & SRE_FLAG_MULTILINE: av = AT_MULTILINE.get(av, av) if flags & SRE_FLAG_LOCALE: av = AT_LOCALE.get(av, av) elif flags & SRE_FLAG_UNICODE: av = AT_UNICODE.get(av, av) emit(ATCODES[av]) elif op is BRANCH: emit(OPCODES[op]) tail = [] for av in av[1]: skip = len(code); emit(0) # _compile_info(code, av, flags) _compile(code, av, flags) emit(OPCODES[JUMP]) tail.append(len(code)); emit(0) code[skip] = len(code) - skip emit(0) # end of branch for tail in tail: code[tail] = len(code) - tail elif op is CATEGORY: emit(OPCODES[op]) if flags & SRE_FLAG_LOCALE: av = CH_LOCALE[av] elif flags & SRE_FLAG_UNICODE: av = CH_UNICODE[av] emit(CHCODES[av]) elif op is GROUPREF: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) else: emit(OPCODES[op]) emit(av-1) else: raise ValueError, ("unsupported operand type", op)
def assertLowerEqual(self, tests, flags): for arg, expected in tests: self.assertEquals(ord(expected), _sre.getlower(ord(arg), flags))
def assert_lower_equal(tests, flags): for arg, expected in tests: assert ord(expected) == _sre.getlower(ord(arg), flags)
def _compile(code, pattern, flags): emit = code.append _len = len LITERAL_CODES = _LITERAL_CODES REPEATING_CODES = _REPEATING_CODES SUCCESS_CODES = _SUCCESS_CODES ASSERT_CODES = _ASSERT_CODES for op, av in pattern: if op in LITERAL_CODES: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) emit(_sre.getlower(av, flags)) else: emit(OPCODES[op]) emit(av) elif op is IN: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) def fixup(literal, flags=flags): return _sre.getlower(literal, flags) else: emit(OPCODES[op]) fixup = _identityfunction skip = _len(code) emit(0) _compile_charset(av, flags, code, fixup) code[skip] = _len(code) - skip elif op is ANY: if flags & SRE_FLAG_DOTALL: emit(OPCODES[ANY_ALL]) else: emit(OPCODES[ANY]) elif op in REPEATING_CODES: if flags & SRE_FLAG_TEMPLATE: raise error, 'internal: unsupported template operator' emit(OPCODES[REPEAT]) skip = _len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif _simple(av) and op is not REPEAT: if op is MAX_REPEAT: emit(OPCODES[REPEAT_ONE]) else: emit(OPCODES[MIN_REPEAT_ONE]) skip = _len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip else: emit(OPCODES[REPEAT]) skip = _len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = _len(code) - skip if op is MAX_REPEAT: emit(OPCODES[MAX_UNTIL]) else: emit(OPCODES[MIN_UNTIL]) elif op is SUBPATTERN: if av[0]: emit(OPCODES[MARK]) emit((av[0] - 1) * 2) _compile(code, av[1], flags) if av[0]: emit(OPCODES[MARK]) emit((av[0] - 1) * 2 + 1) elif op in SUCCESS_CODES: emit(OPCODES[op]) elif op in ASSERT_CODES: emit(OPCODES[op]) skip = _len(code) emit(0) if av[0] >= 0: emit(0) else: lo, hi = av[1].getwidth() if lo != hi: raise error, 'look-behind requires fixed-width pattern' emit(lo) _compile(code, av[1], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif op is CALL: emit(OPCODES[op]) skip = _len(code) emit(0) _compile(code, av, flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif op is AT: emit(OPCODES[op]) if flags & SRE_FLAG_MULTILINE: av = AT_MULTILINE.get(av, av) if flags & SRE_FLAG_LOCALE: av = AT_LOCALE.get(av, av) elif flags & SRE_FLAG_UNICODE: av = AT_UNICODE.get(av, av) emit(ATCODES[av]) elif op is BRANCH: emit(OPCODES[op]) tail = [] tailappend = tail.append for av in av[1]: skip = _len(code) emit(0) _compile(code, av, flags) emit(OPCODES[JUMP]) tailappend(_len(code)) emit(0) code[skip] = _len(code) - skip emit(0) for tail in tail: code[tail] = _len(code) - tail elif op is CATEGORY: emit(OPCODES[op]) if flags & SRE_FLAG_LOCALE: av = CH_LOCALE[av] elif flags & SRE_FLAG_UNICODE: av = CH_UNICODE[av] emit(CHCODES[av]) elif op is GROUPREF: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) else: emit(OPCODES[op]) emit(av - 1) elif op is GROUPREF_EXISTS: emit(OPCODES[op]) emit(av[0] - 1) skipyes = _len(code) emit(0) _compile(code, av[1], flags) if av[2]: emit(OPCODES[JUMP]) skipno = _len(code) emit(0) code[skipyes] = _len(code) - skipyes + 1 _compile(code, av[2], flags) code[skipno] = _len(code) - skipno else: code[skipyes] = _len(code) - skipyes + 1 else: raise ValueError, ('unsupported operand type', op)
def _compile(code, pattern, flags): # internal: compile a (sub)pattern emit = code.append _len = len LITERAL_CODES = _LITERAL_CODES REPEATING_CODES = _REPEATING_CODES SUCCESS_CODES = _SUCCESS_CODES ASSERT_CODES = _ASSERT_CODES for op, av in pattern: if op in LITERAL_CODES: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) emit(_sre.getlower(av, flags)) else: emit(OPCODES[op]) emit(av) elif op is IN: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) def fixup(literal, flags=flags): return _sre.getlower(literal, flags) else: emit(OPCODES[op]) fixup = _identityfunction skip = _len(code) emit(0) _compile_charset(av, flags, code, fixup) code[skip] = _len(code) - skip elif op is ANY: if flags & SRE_FLAG_DOTALL: emit(OPCODES[ANY_ALL]) else: emit(OPCODES[ANY]) elif op in REPEATING_CODES: if flags & SRE_FLAG_TEMPLATE: raise error("internal: unsupported template operator") emit(OPCODES[REPEAT]) skip = _len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif _simple(av) and op is not REPEAT: if op is MAX_REPEAT: emit(OPCODES[REPEAT_ONE]) else: emit(OPCODES[MIN_REPEAT_ONE]) skip = _len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip else: emit(OPCODES[REPEAT]) skip = _len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = _len(code) - skip if op is MAX_REPEAT: emit(OPCODES[MAX_UNTIL]) else: emit(OPCODES[MIN_UNTIL]) elif op is SUBPATTERN: if av[0]: emit(OPCODES[MARK]) emit((av[0] - 1) * 2) # _compile_info(code, av[1], flags) _compile(code, av[1], flags) if av[0]: emit(OPCODES[MARK]) emit((av[0] - 1) * 2 + 1) elif op in SUCCESS_CODES: emit(OPCODES[op]) elif op in ASSERT_CODES: emit(OPCODES[op]) skip = _len(code) emit(0) if av[0] >= 0: emit(0) # look ahead else: lo, hi = av[1].getwidth() if lo != hi: raise error("look-behind requires fixed-width pattern") emit(lo) # look behind _compile(code, av[1], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif op is CALL: emit(OPCODES[op]) skip = _len(code) emit(0) _compile(code, av, flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif op is AT: emit(OPCODES[op]) if flags & SRE_FLAG_MULTILINE: av = AT_MULTILINE.get(av, av) if flags & SRE_FLAG_LOCALE: av = AT_LOCALE.get(av, av) elif flags & SRE_FLAG_UNICODE: av = AT_UNICODE.get(av, av) emit(ATCODES[av]) elif op is BRANCH: emit(OPCODES[op]) tail = [] tailappend = tail.append for av in av[1]: skip = _len(code) emit(0) # _compile_info(code, av, flags) _compile(code, av, flags) emit(OPCODES[JUMP]) tailappend(_len(code)) emit(0) code[skip] = _len(code) - skip emit(0) # end of branch for tail in tail: code[tail] = _len(code) - tail elif op is CATEGORY: emit(OPCODES[op]) if flags & SRE_FLAG_LOCALE: av = CH_LOCALE[av] elif flags & SRE_FLAG_UNICODE: av = CH_UNICODE[av] emit(CHCODES[av]) elif op is GROUPREF: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) else: emit(OPCODES[op]) emit(av - 1) elif op is GROUPREF_EXISTS: emit(OPCODES[op]) emit(av[0] - 1) skipyes = _len(code) emit(0) _compile(code, av[1], flags) if av[2]: emit(OPCODES[JUMP]) skipno = _len(code) emit(0) code[skipyes] = _len(code) - skipyes + 1 _compile(code, av[2], flags) code[skipno] = _len(code) - skipno else: code[skipyes] = _len(code) - skipyes + 1 else: raise ValueError("unsupported operand type", op)
#
def _compile(code, pattern, flags): # internal: compile a (sub)pattern emit = code.append _len = len LITERAL_CODES = _LITERAL_CODES REPEATING_CODES = _REPEATING_CODES SUCCESS_CODES = _SUCCESS_CODES ASSERT_CODES = _ASSERT_CODES # Pyston change: iterating over a SubPattern is fairly expensive: # - SubPattern defines __getitem__ but not __iter__, so we create a seqiter # - for each iteration, we call instance.getitem, which looks up # SubPattern.__getitem__, and calls it # - SubPattern.__getitem__ just does `return self.data[idx]`, which throws an # IndexError to signal that the iteration is done. # All the exceptions and extra layers of indirection add up. # Cut that out by simply iterating over pattern.data, which looks like is # usually a list, and we can use our fast iteration. if isinstance(pattern, sre_parse.SubPattern): pattern_iter = pattern.data else: pattern_iter = pattern for op, av in pattern_iter: if op in LITERAL_CODES: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) emit(_sre.getlower(av, flags)) else: emit(OPCODES[op]) emit(av) elif op is IN: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) def fixup(literal, flags=flags): return _sre.getlower(literal, flags) else: emit(OPCODES[op]) fixup = _identityfunction skip = _len(code) emit(0) _compile_charset(av, flags, code, fixup) code[skip] = _len(code) - skip elif op is ANY: if flags & SRE_FLAG_DOTALL: emit(OPCODES[ANY_ALL]) else: emit(OPCODES[ANY]) elif op in REPEATING_CODES: if flags & SRE_FLAG_TEMPLATE: raise error, "internal: unsupported template operator" emit(OPCODES[REPEAT]) skip = _len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif _simple(av) and op is not REPEAT: if op is MAX_REPEAT: emit(OPCODES[REPEAT_ONE]) else: emit(OPCODES[MIN_REPEAT_ONE]) skip = _len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip else: emit(OPCODES[REPEAT]) skip = _len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = _len(code) - skip if op is MAX_REPEAT: emit(OPCODES[MAX_UNTIL]) else: emit(OPCODES[MIN_UNTIL]) elif op is SUBPATTERN: if av[0]: emit(OPCODES[MARK]) emit((av[0] - 1) * 2) # _compile_info(code, av[1], flags) _compile(code, av[1], flags) if av[0]: emit(OPCODES[MARK]) emit((av[0] - 1) * 2 + 1) elif op in SUCCESS_CODES: emit(OPCODES[op]) elif op in ASSERT_CODES: emit(OPCODES[op]) skip = _len(code) emit(0) if av[0] >= 0: emit(0) # look ahead else: lo, hi = av[1].getwidth() if lo != hi: raise error, "look-behind requires fixed-width pattern" emit(lo) # look behind _compile(code, av[1], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif op is CALL: emit(OPCODES[op]) skip = _len(code) emit(0) _compile(code, av, flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif op is AT: emit(OPCODES[op]) if flags & SRE_FLAG_MULTILINE: av = AT_MULTILINE.get(av, av) if flags & SRE_FLAG_LOCALE: av = AT_LOCALE.get(av, av) elif flags & SRE_FLAG_UNICODE: av = AT_UNICODE.get(av, av) emit(ATCODES[av]) elif op is BRANCH: emit(OPCODES[op]) tail = [] tailappend = tail.append for av in av[1]: skip = _len(code) emit(0) # _compile_info(code, av, flags) _compile(code, av, flags) emit(OPCODES[JUMP]) tailappend(_len(code)) emit(0) code[skip] = _len(code) - skip emit(0) # end of branch for tail in tail: code[tail] = _len(code) - tail elif op is CATEGORY: emit(OPCODES[op]) if flags & SRE_FLAG_LOCALE: av = CH_LOCALE[av] elif flags & SRE_FLAG_UNICODE: av = CH_UNICODE[av] emit(CHCODES[av]) elif op is GROUPREF: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) else: emit(OPCODES[op]) emit(av - 1) elif op is GROUPREF_EXISTS: emit(OPCODES[op]) emit(av[0] - 1) skipyes = _len(code) emit(0) _compile(code, av[1], flags) if av[2]: emit(OPCODES[JUMP]) skipno = _len(code) emit(0) code[skipyes] = _len(code) - skipyes + 1 _compile(code, av[2], flags) code[skipno] = _len(code) - skipno else: code[skipyes] = _len(code) - skipyes + 1 else: raise ValueError, ("unsupported operand type", op)
def _compile(code, pattern, flags): # internal: compile a (sub)pattern emit = code.append _len = len LITERAL_CODES = _LITERAL_CODES REPEATING_CODES = _REPEATING_CODES SUCCESS_CODES = _SUCCESS_CODES ASSERT_CODES = _ASSERT_CODES if (flags & SRE_FLAG_IGNORECASE and not (flags & SRE_FLAG_LOCALE) and flags & SRE_FLAG_UNICODE): fixes = _ignorecase_fixes else: fixes = None for op, av in pattern: if op in LITERAL_CODES: if flags & SRE_FLAG_IGNORECASE: lo = _sre.getlower(av, flags) if fixes and lo in fixes: emit(IN_IGNORE) skip = _len(code) emit(0) if op is NOT_LITERAL: emit(NEGATE) for k in (lo, ) + fixes[lo]: emit(LITERAL) emit(k) emit(FAILURE) code[skip] = _len(code) - skip else: emit(OP_IGNORE[op]) emit(lo) else: emit(op) emit(av) elif op is IN: if flags & SRE_FLAG_IGNORECASE: emit(OP_IGNORE[op]) def fixup(literal, flags=flags): return _sre.getlower(literal, flags) else: emit(op) fixup = None skip = _len(code) emit(0) _compile_charset(av, flags, code, fixup, fixes) code[skip] = _len(code) - skip elif op is ANY: if flags & SRE_FLAG_DOTALL: emit(ANY_ALL) else: emit(ANY) elif op in REPEATING_CODES: if flags & SRE_FLAG_TEMPLATE: raise error("internal: unsupported template operator %r" % (op, )) elif _simple(av) and op is not REPEAT: if op is MAX_REPEAT: emit(REPEAT_ONE) else: emit(MIN_REPEAT_ONE) skip = _len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(SUCCESS) code[skip] = _len(code) - skip else: emit(REPEAT) skip = _len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = _len(code) - skip if op is MAX_REPEAT: emit(MAX_UNTIL) else: emit(MIN_UNTIL) elif op is SUBPATTERN: if av[0]: emit(MARK) emit((av[0] - 1) * 2) # _compile_info(code, av[1], flags) _compile(code, av[1], flags) if av[0]: emit(MARK) emit((av[0] - 1) * 2 + 1) elif op in SUCCESS_CODES: emit(op) elif op in ASSERT_CODES: emit(op) skip = _len(code) emit(0) if av[0] >= 0: emit(0) # look ahead else: lo, hi = av[1].getwidth() if lo != hi: raise error("look-behind requires fixed-width pattern") emit(lo) # look behind _compile(code, av[1], flags) emit(SUCCESS) code[skip] = _len(code) - skip elif op is CALL: emit(op) skip = _len(code) emit(0) _compile(code, av, flags) emit(SUCCESS) code[skip] = _len(code) - skip elif op is AT: emit(op) if flags & SRE_FLAG_MULTILINE: av = AT_MULTILINE.get(av, av) if flags & SRE_FLAG_LOCALE: av = AT_LOCALE.get(av, av) elif flags & SRE_FLAG_UNICODE: av = AT_UNICODE.get(av, av) emit(av) elif op is BRANCH: emit(op) tail = [] tailappend = tail.append for av in av[1]: skip = _len(code) emit(0) # _compile_info(code, av, flags) _compile(code, av, flags) emit(JUMP) tailappend(_len(code)) emit(0) code[skip] = _len(code) - skip emit(FAILURE) # end of branch for tail in tail: code[tail] = _len(code) - tail elif op is CATEGORY: emit(op) if flags & SRE_FLAG_LOCALE: av = CH_LOCALE[av] elif flags & SRE_FLAG_UNICODE: av = CH_UNICODE[av] emit(av) elif op is GROUPREF: if flags & SRE_FLAG_IGNORECASE: emit(OP_IGNORE[op]) else: emit(op) emit(av - 1) elif op is GROUPREF_EXISTS: emit(op) emit(av[0] - 1) skipyes = _len(code) emit(0) _compile(code, av[1], flags) if av[2]: emit(JUMP) skipno = _len(code) emit(0) code[skipyes] = _len(code) - skipyes + 1 _compile(code, av[2], flags) code[skipno] = _len(code) - skipno else: code[skipyes] = _len(code) - skipyes + 1 else: raise error("internal: unsupported operand type %r" % (op, ))
def _compile(code, pattern, flags): # internal: compile a (sub)pattern emit = code.append for op, av in pattern: if op in (LITERAL, NOT_LITERAL): if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) emit(_sre.getlower(av, flags)) else: emit(OPCODES[op]) emit(av) elif op is IN: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) def fixup(literal, flags=flags): return _sre.getlower(literal, flags) else: emit(OPCODES[op]) fixup = lambda x: x skip = len(code) emit(0) _compile_charset(av, flags, code, fixup) code[skip] = len(code) - skip elif op is ANY: if flags & SRE_FLAG_DOTALL: emit(OPCODES[ANY_ALL]) else: emit(OPCODES[ANY]) elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): if flags & SRE_FLAG_TEMPLATE: raise error, "internal: unsupported template operator" emit(OPCODES[REPEAT]) skip = len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip elif _simple(av) and op == MAX_REPEAT: emit(OPCODES[REPEAT_ONE]) skip = len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip else: emit(OPCODES[REPEAT]) skip = len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = len(code) - skip if op == MAX_REPEAT: emit(OPCODES[MAX_UNTIL]) else: emit(OPCODES[MIN_UNTIL]) elif op is SUBPATTERN: if av[0]: emit(OPCODES[MARK]) emit((av[0] - 1) * 2) # _compile_info(code, av[1], flags) _compile(code, av[1], flags) if av[0]: emit(OPCODES[MARK]) emit((av[0] - 1) * 2 + 1) elif op in (SUCCESS, FAILURE): emit(OPCODES[op]) elif op in (ASSERT, ASSERT_NOT): emit(OPCODES[op]) skip = len(code) emit(0) if av[0] >= 0: emit(0) # look ahead else: lo, hi = av[1].getwidth() if lo != hi: raise error, "look-behind requires fixed-width pattern" emit(lo) # look behind _compile(code, av[1], flags) emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip elif op is CALL: emit(OPCODES[op]) skip = len(code) emit(0) _compile(code, av, flags) emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip elif op is AT: emit(OPCODES[op]) if flags & SRE_FLAG_MULTILINE: av = AT_MULTILINE.get(av, av) if flags & SRE_FLAG_LOCALE: av = AT_LOCALE.get(av, av) elif flags & SRE_FLAG_UNICODE: av = AT_UNICODE.get(av, av) emit(ATCODES[av]) elif op is BRANCH: emit(OPCODES[op]) tail = [] for av in av[1]: skip = len(code) emit(0) # _compile_info(code, av, flags) _compile(code, av, flags) emit(OPCODES[JUMP]) tail.append(len(code)) emit(0) code[skip] = len(code) - skip emit(0) # end of branch for tail in tail: code[tail] = len(code) - tail elif op is CATEGORY: emit(OPCODES[op]) if flags & SRE_FLAG_LOCALE: av = CH_LOCALE[av] elif flags & SRE_FLAG_UNICODE: av = CH_UNICODE[av] emit(CHCODES[av]) elif op is GROUPREF: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) else: emit(OPCODES[op]) emit(av - 1) else: raise ValueError, ("unsupported operand type", op)
def _compile(code, pattern, flags): # internal: compile a (sub)pattern emit = code.append _len = len LITERAL_CODES = _LITERAL_CODES REPEATING_CODES = _REPEATING_CODES SUCCESS_CODES = _SUCCESS_CODES ASSERT_CODES = _ASSERT_CODES # Pyston change: iterating over a SubPattern is fairly expensive: # - SubPattern defines __getitem__ but not __iter__, so we create a seqiter # - for each iteration, we call instance.getitem, which looks up # SubPattern.__getitem__, and calls it # - SubPattern.__getitem__ just does `return self.data[idx]`, which throws an # IndexError to signal that the iteration is done. # All the exceptions and extra layers of indirection add up. # Cut that out by simply iterating over pattern.data, which looks like is # usually a list, and we can use our fast iteration. if isinstance(pattern, sre_parse.SubPattern): pattern_iter = pattern.data else: pattern_iter = pattern for op, av in pattern_iter: if op in LITERAL_CODES: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) emit(_sre.getlower(av, flags)) else: emit(OPCODES[op]) emit(av) elif op is IN: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) def fixup(literal, flags=flags): return _sre.getlower(literal, flags) else: emit(OPCODES[op]) fixup = _identityfunction skip = _len(code); emit(0) _compile_charset(av, flags, code, fixup) code[skip] = _len(code) - skip elif op is ANY: if flags & SRE_FLAG_DOTALL: emit(OPCODES[ANY_ALL]) else: emit(OPCODES[ANY]) elif op in REPEATING_CODES: if flags & SRE_FLAG_TEMPLATE: raise error, "internal: unsupported template operator" emit(OPCODES[REPEAT]) skip = _len(code); emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif _simple(av) and op is not REPEAT: if op is MAX_REPEAT: emit(OPCODES[REPEAT_ONE]) else: emit(OPCODES[MIN_REPEAT_ONE]) skip = _len(code); emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip else: emit(OPCODES[REPEAT]) skip = _len(code); emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = _len(code) - skip if op is MAX_REPEAT: emit(OPCODES[MAX_UNTIL]) else: emit(OPCODES[MIN_UNTIL]) elif op is SUBPATTERN: if av[0]: emit(OPCODES[MARK]) emit((av[0]-1)*2) # _compile_info(code, av[1], flags) _compile(code, av[1], flags) if av[0]: emit(OPCODES[MARK]) emit((av[0]-1)*2+1) elif op in SUCCESS_CODES: emit(OPCODES[op]) elif op in ASSERT_CODES: emit(OPCODES[op]) skip = _len(code); emit(0) if av[0] >= 0: emit(0) # look ahead else: lo, hi = av[1].getwidth() if lo != hi: raise error, "look-behind requires fixed-width pattern" emit(lo) # look behind _compile(code, av[1], flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif op is CALL: emit(OPCODES[op]) skip = _len(code); emit(0) _compile(code, av, flags) emit(OPCODES[SUCCESS]) code[skip] = _len(code) - skip elif op is AT: emit(OPCODES[op]) if flags & SRE_FLAG_MULTILINE: av = AT_MULTILINE.get(av, av) if flags & SRE_FLAG_LOCALE: av = AT_LOCALE.get(av, av) elif flags & SRE_FLAG_UNICODE: av = AT_UNICODE.get(av, av) emit(ATCODES[av]) elif op is BRANCH: emit(OPCODES[op]) tail = [] tailappend = tail.append for av in av[1]: skip = _len(code); emit(0) # _compile_info(code, av, flags) _compile(code, av, flags) emit(OPCODES[JUMP]) tailappend(_len(code)); emit(0) code[skip] = _len(code) - skip emit(0) # end of branch for tail in tail: code[tail] = _len(code) - tail elif op is CATEGORY: emit(OPCODES[op]) if flags & SRE_FLAG_LOCALE: av = CH_LOCALE[av] elif flags & SRE_FLAG_UNICODE: av = CH_UNICODE[av] emit(CHCODES[av]) elif op is GROUPREF: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) else: emit(OPCODES[op]) emit(av-1) elif op is GROUPREF_EXISTS: emit(OPCODES[op]) emit(av[0]-1) skipyes = _len(code); emit(0) _compile(code, av[1], flags) if av[2]: emit(OPCODES[JUMP]) skipno = _len(code); emit(0) code[skipyes] = _len(code) - skipyes + 1 _compile(code, av[2], flags) code[skipno] = _len(code) - skipno else: code[skipyes] = _len(code) - skipyes + 1 else: raise ValueError, ("unsupported operand type", op)
def _compile(code, pattern, flags): emit = code.append for ( op, av, ) in pattern: if (op in (LITERAL, NOT_LITERAL)): if (flags & SRE_FLAG_IGNORECASE): emit(OPCODES[OP_IGNORE[op]]) emit(_sre.getlower(av, flags)) else: emit(OPCODES[op]) emit(av) elif (op is IN): if (flags & SRE_FLAG_IGNORECASE): emit(OPCODES[OP_IGNORE[op]]) def fixup(literal, flags=flags): return _sre.getlower(literal, flags) else: emit(OPCODES[op]) fixup = lambda x: x skip = len(code) emit(0) _compile_charset(av, flags, code, fixup) code[skip] = (len(code) - skip) elif (op is ANY): if (flags & SRE_FLAG_DOTALL): emit(OPCODES[ANY_ALL]) else: emit(OPCODES[ANY]) elif (op in (REPEAT, MIN_REPEAT, MAX_REPEAT)): if (flags & SRE_FLAG_TEMPLATE): raise error, 'internal: unsupported template operator' emit(OPCODES[REPEAT]) skip = len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = (len(code) - skip) elif (_simple(av) and (op == MAX_REPEAT)): emit(OPCODES[REPEAT_ONE]) skip = len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = (len(code) - skip) else: emit(OPCODES[REPEAT]) skip = len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = (len(code) - skip) if (op == MAX_REPEAT): emit(OPCODES[MAX_UNTIL]) else: emit(OPCODES[MIN_UNTIL]) elif (op is SUBPATTERN): if av[0]: emit(OPCODES[MARK]) emit(((av[0] - 1) * 2)) _compile(code, av[1], flags) if av[0]: emit(OPCODES[MARK]) emit((((av[0] - 1) * 2) + 1)) elif (op in (SUCCESS, FAILURE)): emit(OPCODES[op]) elif (op in (ASSERT, ASSERT_NOT)): emit(OPCODES[op]) skip = len(code) emit(0) if (av[0] >= 0): emit(0) else: ( lo, hi, ) = av[1].getwidth() if (lo != hi): raise error, 'look-behind requires fixed-width pattern' emit(lo) _compile(code, av[1], flags) emit(OPCODES[SUCCESS]) code[skip] = (len(code) - skip) elif (op is CALL): emit(OPCODES[op]) skip = len(code) emit(0) _compile(code, av, flags) emit(OPCODES[SUCCESS]) code[skip] = (len(code) - skip) elif (op is AT): emit(OPCODES[op]) if (flags & SRE_FLAG_MULTILINE): av = AT_MULTILINE.get(av, av) if (flags & SRE_FLAG_LOCALE): av = AT_LOCALE.get(av, av) elif (flags & SRE_FLAG_UNICODE): av = AT_UNICODE.get(av, av) emit(ATCODES[av]) elif (op is BRANCH): emit(OPCODES[op]) tail = [] for av in av[1]: skip = len(code) emit(0) _compile(code, av, flags) emit(OPCODES[JUMP]) tail.append(len(code)) emit(0) code[skip] = (len(code) - skip) emit(0) for tail in tail: code[tail] = (len(code) - tail) elif (op is CATEGORY): emit(OPCODES[op]) if (flags & SRE_FLAG_LOCALE): av = CH_LOCALE[av] elif (flags & SRE_FLAG_UNICODE): av = CH_UNICODE[av] emit(CHCODES[av]) elif (op is GROUPREF): if (flags & SRE_FLAG_IGNORECASE): emit(OPCODES[OP_IGNORE[op]]) else: emit(OPCODES[op]) emit((av - 1)) else: raise ValueError, ('unsupported operand type', op)
def _compile(code, pattern, flags): # internal: compile a (sub)pattern emit = code.append _len = len LITERAL_CODES = _LITERAL_CODES REPEATING_CODES = _REPEATING_CODES SUCCESS_CODES = _SUCCESS_CODES ASSERT_CODES = _ASSERT_CODES if (flags & SRE_FLAG_IGNORECASE and not (flags & SRE_FLAG_LOCALE) and flags & SRE_FLAG_UNICODE and not (flags & SRE_FLAG_ASCII)): fixes = _ignorecase_fixes else: fixes = None for op, av in pattern: if op in LITERAL_CODES: if flags & SRE_FLAG_IGNORECASE: lo = _sre.getlower(av, flags) if fixes and lo in fixes: emit(IN_IGNORE) skip = _len(code); emit(0) if op is NOT_LITERAL: emit(NEGATE) for k in (lo,) + fixes[lo]: emit(LITERAL) emit(k) emit(FAILURE) code[skip] = _len(code) - skip else: emit(OP_IGNORE[op]) emit(lo) else: emit(op) emit(av) elif op is IN: if flags & SRE_FLAG_IGNORECASE: emit(OP_IGNORE[op]) def fixup(literal, flags=flags): return _sre.getlower(literal, flags) else: emit(op) fixup = None skip = _len(code); emit(0) _compile_charset(av, flags, code, fixup, fixes) code[skip] = _len(code) - skip elif op is ANY: if flags & SRE_FLAG_DOTALL: emit(ANY_ALL) else: emit(ANY) elif op in REPEATING_CODES: if flags & SRE_FLAG_TEMPLATE: raise error("internal: unsupported template operator %r" % (op,)) elif _simple(av) and op is not REPEAT: if op is MAX_REPEAT: emit(REPEAT_ONE) else: emit(MIN_REPEAT_ONE) skip = _len(code); emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(SUCCESS) code[skip] = _len(code) - skip else: emit(REPEAT) skip = _len(code); emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = _len(code) - skip if op is MAX_REPEAT: emit(MAX_UNTIL) else: emit(MIN_UNTIL) elif op is SUBPATTERN: group, add_flags, del_flags, p = av if group: emit(MARK) emit((group-1)*2) # _compile_info(code, p, (flags | add_flags) & ~del_flags) _compile(code, p, (flags | add_flags) & ~del_flags) if group: emit(MARK) emit((group-1)*2+1) elif op in SUCCESS_CODES: emit(op) elif op in ASSERT_CODES: emit(op) skip = _len(code); emit(0) if av[0] >= 0: emit(0) # look ahead else: lo, hi = av[1].getwidth() if lo != hi: raise error("look-behind requires fixed-width pattern") emit(lo) # look behind _compile(code, av[1], flags) emit(SUCCESS) code[skip] = _len(code) - skip elif op is CALL: emit(op) skip = _len(code); emit(0) _compile(code, av, flags) emit(SUCCESS) code[skip] = _len(code) - skip elif op is AT: emit(op) if flags & SRE_FLAG_MULTILINE: av = AT_MULTILINE.get(av, av) if flags & SRE_FLAG_LOCALE: av = AT_LOCALE.get(av, av) elif (flags & SRE_FLAG_UNICODE) and not (flags & SRE_FLAG_ASCII): av = AT_UNICODE.get(av, av) emit(av) elif op is BRANCH: emit(op) tail = [] tailappend = tail.append for av in av[1]: skip = _len(code); emit(0) # _compile_info(code, av, flags) _compile(code, av, flags) emit(JUMP) tailappend(_len(code)); emit(0) code[skip] = _len(code) - skip emit(FAILURE) # end of branch for tail in tail: code[tail] = _len(code) - tail elif op is CATEGORY: emit(op) if flags & SRE_FLAG_LOCALE: av = CH_LOCALE[av] elif (flags & SRE_FLAG_UNICODE) and not (flags & SRE_FLAG_ASCII): av = CH_UNICODE[av] emit(av) elif op is GROUPREF: if flags & SRE_FLAG_IGNORECASE: emit(OP_IGNORE[op]) else: emit(op) emit(av-1) elif op is GROUPREF_EXISTS: emit(op) emit(av[0]-1) skipyes = _len(code); emit(0) _compile(code, av[1], flags) if av[2]: emit(JUMP) skipno = _len(code); emit(0) code[skipyes] = _len(code) - skipyes + 1 _compile(code, av[2], flags) code[skipno] = _len(code) - skipno else: code[skipyes] = _len(code) - skipyes + 1 else: raise error("internal: unsupported operand type %r" % (op,))
def fixup(literal, flags=flags): return _sre.getlower(literal, flags)
def _compile(code, pattern, flags): emit = code.append for (op, av,) in pattern: if (op in (LITERAL, NOT_LITERAL)): if (flags & SRE_FLAG_IGNORECASE): emit(OPCODES[OP_IGNORE[op]]) emit(_sre.getlower(av, flags)) else: emit(OPCODES[op]) emit(av) elif (op is IN): if (flags & SRE_FLAG_IGNORECASE): emit(OPCODES[OP_IGNORE[op]]) def fixup(literal, flags = flags): return _sre.getlower(literal, flags) else: emit(OPCODES[op]) fixup = lambda x:x skip = len(code) emit(0) _compile_charset(av, flags, code, fixup) code[skip] = (len(code) - skip) elif (op is ANY): if (flags & SRE_FLAG_DOTALL): emit(OPCODES[ANY_ALL]) else: emit(OPCODES[ANY]) elif (op in (REPEAT, MIN_REPEAT, MAX_REPEAT)): if (flags & SRE_FLAG_TEMPLATE): raise error, 'internal: unsupported template operator' emit(OPCODES[REPEAT]) skip = len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = (len(code) - skip) elif (_simple(av) and (op == MAX_REPEAT)): emit(OPCODES[REPEAT_ONE]) skip = len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = (len(code) - skip) else: emit(OPCODES[REPEAT]) skip = len(code) emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = (len(code) - skip) if (op == MAX_REPEAT): emit(OPCODES[MAX_UNTIL]) else: emit(OPCODES[MIN_UNTIL]) elif (op is SUBPATTERN): if av[0]: emit(OPCODES[MARK]) emit(((av[0] - 1) * 2)) _compile(code, av[1], flags) if av[0]: emit(OPCODES[MARK]) emit((((av[0] - 1) * 2) + 1)) elif (op in (SUCCESS, FAILURE)): emit(OPCODES[op]) elif (op in (ASSERT, ASSERT_NOT)): emit(OPCODES[op]) skip = len(code) emit(0) if (av[0] >= 0): emit(0) else: (lo, hi,) = av[1].getwidth() if (lo != hi): raise error, 'look-behind requires fixed-width pattern' emit(lo) _compile(code, av[1], flags) emit(OPCODES[SUCCESS]) code[skip] = (len(code) - skip) elif (op is CALL): emit(OPCODES[op]) skip = len(code) emit(0) _compile(code, av, flags) emit(OPCODES[SUCCESS]) code[skip] = (len(code) - skip) elif (op is AT): emit(OPCODES[op]) if (flags & SRE_FLAG_MULTILINE): av = AT_MULTILINE.get(av, av) if (flags & SRE_FLAG_LOCALE): av = AT_LOCALE.get(av, av) elif (flags & SRE_FLAG_UNICODE): av = AT_UNICODE.get(av, av) emit(ATCODES[av]) elif (op is BRANCH): emit(OPCODES[op]) tail = [] for av in av[1]: skip = len(code) emit(0) _compile(code, av, flags) emit(OPCODES[JUMP]) tail.append(len(code)) emit(0) code[skip] = (len(code) - skip) emit(0) for tail in tail: code[tail] = (len(code) - tail) elif (op is CATEGORY): emit(OPCODES[op]) if (flags & SRE_FLAG_LOCALE): av = CH_LOCALE[av] elif (flags & SRE_FLAG_UNICODE): av = CH_UNICODE[av] emit(CHCODES[av]) elif (op is GROUPREF): if (flags & SRE_FLAG_IGNORECASE): emit(OPCODES[OP_IGNORE[op]]) else: emit(OPCODES[op]) emit((av - 1)) else: raise ValueError, ('unsupported operand type', op)