def setup_cgi(interp, params, argv, post_data=None): get = OrderedDict() post = OrderedDict() files = OrderedDict() space = interp.space query = get_param(params, 'QUERY_STRING') if query is not None: unpack_query(get, query, space) script_name = get_param(params, 'SCRIPT_NAME') initial_server_dict = OrderedDict() if script_name is not None: initial_server_dict['PHP_SELF'] = space.wrap(script_name) cookie = get_param(params, 'HTTP_COOKIE') content_length = get_param(params, 'CONTENT_LENGTH') content_type_set = get_param(params, 'CONTENT_TYPE') content_type = "" boundary = "" for k in all_keys_from(params): initial_server_dict[k] = space.wrap(get_param(params, k)) if content_type_set is not None and content_length is not None: m = search("[; ,]", content_type_set) if m: start = m.start(0) assert start >= 0 content_type = content_type_set[:start] content_type = content_type.lower() else: content_type = content_type_set.lower() if (content_type == 'x-www-form-urlencoded' or content_type == 'application/x-www-form-urlencoded'): content_length = int(content_length) if post_data is None: stdin = interp.open_stdin_stream() post_data = stdin.read(content_length) unpack_query(post, post_data, space) elif content_type == 'multipart/form-data': m = search("boundary", content_type_set) if m: end = m.end(0) assert end >= 0 boundary = content_type_set[end + 1:] if not boundary: interp.warn("Missing boundary, ignoring post") fp = RStringIO() fp.write(post_data) fp.seek(0) post, files = parse_multipart(space, fp, boundary, OrderedDict(), OrderedDict()) fp.close() else: interp.warn("Unknown content type: %s, ignoring post" % content_type) return CGIConfig(space.new_array_from_rdict(get), space.new_array_from_rdict(post), space.new_array_from_rdict(files), initial_server_dict, cookie)
def trim_comment(source): comments = r'(#.*)(?:\n|\Z)' comment = re.search(comments, source) while comment is not None: start, end = comment.span(1) assert start >= 0 and end >= 0 source = source[0:start] + source[end:] # remove comments comment = re.search(comments, source) return source
def trim_multiline(source): multiline = r'([\s]+)(?:\n)' line = re.search(multiline, source) while line is not None: start, end = line.span(1) assert start >= 0 and end >= 0 source = source[0:start] + source[end:] # remove empty lines line = re.search(multiline, source) return source
def trim_multiline(source): multiline = r'([\s]+)(?:\n)' line = re.search(multiline,source) while line is not None: start, end = line.span(1) assert start >= 0 and end >= 0 source = source[0:start] + source[end:] # remove empty lines line = re.search(multiline,source) return source
def trim_comment(source): comments = r'(#.*)(?:\n|\Z)' comment = re.search(comments,source) while comment is not None: start, end = comment.span(1) assert start >= 0 and end >= 0 source = source[0:start] + source[end:] # remove comments comment = re.search(comments,source) return source
def test_search_star_plus(self): assert re.search('x*', 'axx').span(0) == (0, 0) assert re.search('x*', 'axx').span() == (0, 0) assert re.search('x+', 'axx').span(0) == (1, 3) assert re.search('x+', 'axx').span() == (1, 3) assert re.search('x', 'aaa') == None assert re.match('a*', 'xxx').span(0) == (0, 0) assert re.match('a*', 'xxx').span() == (0, 0) assert re.match('x*', 'xxxa').span(0) == (0, 3) assert re.match('x*', 'xxxa').span() == (0, 3) assert re.match('a+', 'xxx') == None
def rsre_example(): # scan through string looking for the first location where the regular # expression pattern produces a match print re.search('x*', 'axx').span(0) # if zero or more characters at the beginning of string match the regular # expression pattern print re.match('a+', 'xxx') # split string by the occurrences of pattern print re.split(":", ":a:b::c") # return all non-overlapping matches of pattern in string, as a list of # strings print re.findall(":+", "a:b::c:::d")
def f(i): if i: s = "aaaaaa" else: s = "caaaaa" print rsre_re.match("(a|b)aa", s) print rsre_re.match("a{4}", s) print rsre_re.search("(a|b)aa", s) print rsre_re.search("a{4}", s) for x in rsre_re.findall("(a|b)a", s): print x for x in rsre_re.findall("a{2}", s): print x for x in rsre_re.finditer("(a|b)a", s): print x for x in rsre_re.finditer("a{2}", s): print x for x in rsre_re.split("(a|b)a", s): print x for x in rsre_re.split("a{2}", s): print x return 0
def rsre_example(): # simulate scanf, %s - %d errors, %d warnings print re.search(r"(\S+) - (\d+) errors, (\d+) warnings", "/usr/sbin/sendmail - 0 errors, 4 warnings").groups() # making a phone book text = """Ross McFluff: 834.345.1254 155 Elm Street Ronald Heathmore: 892.345.3428 436 Finley Avenue Frank Burger: 925.541.7625 662 South Dogwood Way Heather Albrecht: 548.326.4584 919 Park Place""" entries = re.split("\n+", text) print[re.split(":? ", entry, 4) for entry in entries] # finding all adverbs text = "He was carefully disguised but captured quickly by police." print re.findall(r"\w+ly", text)
def test_bug_418626(self): # bugs 418626 at al. -- Testing Greg Chapman's addition of op code # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of # pattern '*?' on a long string. assert re.match('.*?c', 10000*'ab'+'cd').end(0) == 20001 assert re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0) == ( 20003) assert re.match('.*?cd', 20000*'abc'+'de').end(0) == 60001 # non-simple '*?' still used to hit the recursion limit, before the # non-recursive scheme was implemented. assert re.search('(a|b)*?c', 10000*'ab'+'cd').end(0) == 20001
def test_bug_418626(self): # bugs 418626 at al. -- Testing Greg Chapman's addition of op code # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of # pattern '*?' on a long string. assert re.match('.*?c', 10000 * 'ab' + 'cd').end(0) == 20001 assert re.match('.*?cd', 5000 * 'ab' + 'c' + 5000 * 'ab' + 'cde').end(0) == (20003) assert re.match('.*?cd', 20000 * 'abc' + 'de').end(0) == 60001 # non-simple '*?' still used to hit the recursion limit, before the # non-recursive scheme was implemented. assert re.search('(a|b)*?c', 10000 * 'ab' + 'cd').end(0) == 20001
def lex(source): comments = r'(#.*)(?:\n|\Z)' multiline = r'([\s]+)(?:\n)' comment = re.search(comments,source) while comment is not None: start, end = comment.span(1) assert start >= 0 and end >= 0 source = source[0:start] + source[end:] #remove string part that was a comment comment = re.search(comments,source) line = re.search(multiline,source) while line is not None: start, end = line.span(1) assert start >= 0 and end >= 0 source = source[0:start] + source[end:] #remove string part that was an empty line line = re.search(multiline,source) #print "source is now: %s" % source return lexer.lex(source)
def lex(source): comments = r'(#.*)(?:\n|\Z)' multiline = r'([\s]+)(?:\n)' comment = re.search(comments, source) while comment is not None: start, end = comment.span(1) assert start >= 0 and end >= 0 source = source[0:start] + source[ end:] # remove string part that was a comment comment = re.search(comments, source) line = re.search(multiline, source) while line is not None: start, end = line.span(1) assert start >= 0 and end >= 0 source = source[0:start] + source[ end:] # remove string part that was an empty line line = re.search(multiline, source) # print "source is now: %s" % source return lexer.lex(source)
def setup_cgi(interp, argv): get = OrderedDict() post = OrderedDict() space = interp.space query = os.environ.get('QUERY_STRING') if query is not None: unpack_query(get, query, space) script_name = os.environ.get('SCRIPT_NAME') initial_server_dict = OrderedDict() if script_name is not None: initial_server_dict['PHP_SELF'] = space.wrap(script_name) if argv: initial_server_dict['argc'] = space.wrap(len(argv)) initial_server_dict['argv'] = space.new_array_from_list( [space.wrap(x) for x in argv]) cookie = os.environ.get('HTTP_COOKIE') content_length = os.environ.get('CONTENT_LENGTH') content_type = os.environ.get('CONTENT_TYPE') if content_type is not None and content_length is not None: m = search("[; ,]", content_type) if m: start = m.start(0) assert start >= 0 content_type = content_type[:start] content_type = content_type.lower() if (content_type == 'x-www-form-urlencoded' or content_type == 'application/x-www-form-urlencoded'): content_length = int(content_length) stdin = interp.open_stdin_stream() post_data = stdin.read(content_length) unpack_query(post, post_data, space) else: interp.warn("Unknown content type: %s, ignoring post" % content_type) return CGIConfig(space.new_array_from_rdict(get), space.new_array_from_rdict(post), initial_server_dict, cookie)
def test_special_escapes(self): assert re.search(r"\b(b.)\b", "abcd abc bcd bx").group(1) == "bx" assert re.search(r"\B(b.)\B", "abc bcd bc abxd").group(1) == "bx" assert re.search(r"\b(b.)\b", "abcd abc bcd bx", re.LOCALE).group(1) == "bx" assert re.search(r"\B(b.)\B", "abc bcd bc abxd", re.LOCALE).group(1) == "bx" assert re.search(r"\b(b.)\b", "abcd abc bcd bx", re.UNICODE).group(1) == "bx" assert re.search(r"\B(b.)\B", "abc bcd bc abxd", re.UNICODE).group(1) == "bx" assert re.search(r"^abc$", "\nabc\n", re.M).group(0) == "abc" assert re.search(r"^\Aabc\Z$", "abc", re.M).group(0) == "abc" assert re.search(r"^\Aabc\Z$", "\nabc\n", re.M) == None assert re.search(r"\b(b.)\b", u"abcd abc bcd bx").group(1) == "bx" assert re.search(r"\B(b.)\B", u"abc bcd bc abxd").group(1) == "bx" assert re.search(r"^abc$", u"\nabc\n", re.M).group(0) == "abc" assert re.search(r"^\Aabc\Z$", u"abc", re.M).group(0) == "abc" assert re.search(r"^\Aabc\Z$", u"\nabc\n", re.M) == None assert re.search(r"\d\D\w\W\s\S", "1aa! a").group(0) == "1aa! a" assert re.search(r"\d\D\w\W\s\S", "1aa! a", re.LOCALE).group(0) == "1aa! a" assert re.search(r"\d\D\w\W\s\S", "1aa! a", re.UNICODE).group(0) == "1aa! a"
def test_not_literal(self): assert re.search("\s([^a])", " b").group(1) == "b" assert re.search("\s([^a]*)", " bb").group(1) == "bb"
def test_search_coverage(self): assert re.search("\s(b)", " b").group(1) == "b" assert re.search("a\s", "a ").group(0) == "a "