def test_regularexpression_compile_unclosed_charclass_exception(): with pytest.raises(ValueError) as exc_info: # unclosed character class regular.compile("[0123") assert exc_info.type is ValueError assert "regex parse error" in exc_info.value.args[0]
def test_match_as_str_method(): regex = regular.compile("[01]") regex2 = regular.compile("[34]") text = "10234510" m = regex.find(text) m2 = regex2.find(text) assert type(m) is regular.Match assert m.as_str() == "1" assert m2.as_str() == "3"
def test_regularexpression_compile_exception_unrecognized_escape_raw_string(): with pytest.raises(ValueError) as exc_info: # unnecessary escape in raw string # note that there is a Python side deprecation warning # if this is used outside of a raw string regular.compile(r"\/\d") assert exc_info.type is ValueError assert "regex parse error" in exc_info.value.args[0]
def test_regularexpression_richcmp_eq_method(): r1 = regular.compile("[^01]") r2 = regular.compile("[^01]") r3 = regular.compile("[10101]") assert type(r1) is RegularExpression assert type(r2) is RegularExpression assert type(r3) is RegularExpression assert r1 == r2 # assert equality assert r1 is not r2 # but they are not the same object assert (r1 == r3) is False
def test_match_range_method(): regex = regular.compile("[01]") regex2 = regular.compile("[34]") text = "10234510" m = regex.find(text) m2 = regex2.find(text) assert type(m) is regular.Match assert type(m2) is regular.Match assert m.range() == (0, 1) assert m2.range() == (3, 4)
def test_regularexpression_richcmp_ne_method(): r1 = regular.compile("[10101]") r2 = regular.compile("[^01]") r3 = regular.compile("[10101]") assert type(r1) is RegularExpression assert type(r2) is RegularExpression assert type(r3) is RegularExpression assert r1 != r2 assert (r1 != r3) is False assert r1 is not r2
def test_regularexpression_split_iter_no_match(): regex = regular.compile(r"[01]") text = "a b \t c\td e" m = regex.split_iter(text) assert next(m) == "a b \t c\td e" with pytest.raises(StopIteration): next(m)
def test_regularexpression_find_iter(): regex = regular.compile("[01]") text = "0123410" i = regex.find_iter(text) assert type(i) is regular.MatchesIterator # test iteration with next m1 = next(i) assert m1.start == 0 and m1.end == 1 and m1.text == "0" m2 = next(i) assert m2.start == 1 and m2.end == 2 and m2.text == "1" m3 = next(i) assert m3.start == 5 and m3.end == 6 and m3.text == "1" m4 = next(i) assert m4.start == 6 and m4.end == 7 and m4.text == "0" with pytest.raises(StopIteration): next(i) # test for loop i2 = regex.find_iter(text) for m in i2: assert type(m) is regular.Match # test cast to list collection i3 = regex.find_iter(text) col = list(i3) assert len(col) == 4 for m in col: assert type(m) is regular.Match
def test_match_richcmp_ne_method(): r1 = regular.compile("[01]") r2 = regular.compile("[34]") r3 = regular.compile("[01]") text = "01234501" text2 = "23450101" m1 = r1.find(text) m2 = r2.find(text) m3 = r3.find(text) m4 = r1.find(text2) # same match string, different location assert type(m1) is Match assert type(m2) is Match assert type(m3) is Match assert type(m4) is Match assert (m1 != m3) is False assert m1 != m2 assert m4.text == m1.text assert m1 != m4 # matched same string, but different location
def test_regularexpression_find(): regex = regular.compile(r"\b\w{13}\b") test_string = "I categorically deny having triskaidekaphobia." m = regex.find(test_string) assert m.start == 2 assert m.end == 15 assert m.range() == (2, 15) assert m.text == "categorically" # indices are appropriately defined for string slices assert test_string[m.start:m.end] == "categorically"
def test_regularexpression_splitn(): regex = regular.compile(r"[ \t]+") text = "a b \t c\td e" m = regex.splitn(text, 2) assert type(m) is list assert m == ["a", "b \t c\td e"] m2 = regex.splitn(text, 3) assert type(m2) is list assert m2 == ["a", "b", "c\td e"] m3 = regex.splitn(text, 0) assert m3 == []
def test_regularexpression_split_iter(): regex = regular.compile(r"[ \t]+") text = "a b \t c\td e" m = regex.split_iter(text) assert next(m) == "a" assert next(m) == "b" assert next(m) == "c" assert next(m) == "d" assert next(m) == "e" with pytest.raises(StopIteration): next(m) m2 = regex.split_iter(text) for match in m2: assert type(match) is str
def test_regularexpression_find_iter_no_match(): regex = regular.compile("[ab]") text = "01234510" i = regex.find_iter(text) assert type(i) is regular.MatchesIterator # has empty/no match mechanics with pytest.raises(StopIteration): next(i) i2 = regex.find_iter(text) for m in i2: # does not raise exception when no matches pass i3 = regex.find_iter(text) # cast to list does not raise exception col = list(i3) assert len(col) == 0
def test_regularexpression_find_all(): regex = regular.compile("[01]") text = "0123410" i = regex.find_all(text) # find_all returns List[Optional[regular.Match]] assert type(i) is list assert len(i) == 4 for item in i: assert type(item) is regular.Match m1 = i[0] assert m1.start == 0 and m1.end == 1 and m1.text == "0" m2 = i[1] assert m2.start == 1 and m2.end == 2 and m2.text == "1" m3 = i[2] assert m3.start == 5 and m3.end == 6 and m3.text == "1" m4 = i[3] assert m4.start == 6 and m4.end == 7 and m4.text == "0"
def profile(): # ------------------------------------------------------------------------------ # Setup a profile # ------------------------------------------------------------------------------ pr = cProfile.Profile() # ------------------------------------------------------------------------------ # Enter setup code below # ------------------------------------------------------------------------------ # Optional: include setup code here with open("1468-6708-3-4.txt", "r") as f: text = f.read() # ------------------------------------------------------------------------------ # Start profiler # ------------------------------------------------------------------------------ pr.enable() # ------------------------------------------------------------------------------ # BEGIN profiled code block # ------------------------------------------------------------------------------ regex = regular.compile("[Tt]o") for _ in range(10000): # re.search("[Tt]o", text) # re.sub("[Tt]o", "01", text) regex.replace_all(text, "01") # ------------------------------------------------------------------------------ # END profiled code block # ------------------------------------------------------------------------------ pr.disable() s = StringIO() sortby = "cumulative" ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.strip_dirs().sort_stats("time").print_stats() print(s.getvalue())
def test_regularexpression_find_no_match(): regex = regular.compile("[01]") test_string = "I categorically deny having triskaidekaphobia." m = regex.find(test_string) assert m is None
def test_regularexpression_replace(): regex = regular.compile("[^01]+") assert regex.replace("1078910", "") == "1010"
def test_regularexpression_as_str(): regex = regular.compile("[^01]") assert regex.as_str() == "[^01]"
def test_regularexpression_compile_success(): # should not raise exception regular.compile("[^01]")
def test_regularexpression_splitn_no_match(): regex = regular.compile(r"[01]") text = "a b \t c\td e" m = regex.splitn(text, 3) assert type(m) is list assert m == ["a b \t c\td e"]
def test_regularexpression_replace_capture_groups_escapes(): regex = regular.compile(r"(?P<first>\w+)\s+(?P<second>\w+)") assert regex.replace("deep fried", "${first}_$second") == "deep_fried"
def test_regularexpression_split(): regex = regular.compile(r"[ \t]+") text = "a b \t c\td e" m = regex.split(text) assert type(m) is list assert m == ["a", "b", "c", "d", "e"]
def test_regularexpression_is_match_false(): regex = regular.compile("[01]{3}") test_string = "This string includes 0" assert regex.is_match(test_string) is False
def test_regularexpression_replacen_no_match(): regex = regular.compile("[a-z]") assert regex.replacen("1078910", 2, "") == "1078910"
def test_regularexpression_replacen(): regex = regular.compile("[01]") assert regex.replacen("1078910", 2, "") == "78910"
def test_regularexpression_find_all_no_match(): regex = regular.compile("[ab]") text = "0123410" i = regex.find_all(text) assert type(i) is list assert len(i) == 0
def test_regularexpression_compile_fail(): # should raise with pytest.raises(Exception): regular.compile("\\\\\\\\\\\\\\")
def test_regularexpression_replace_n_1(): # performs a single replacement regex = regular.compile("[01]") assert regex.replace("1078910", "") == "078910"
def test_regularexpression_replace_no_match(): regex = regular.compile(r"\d") assert regex.replace("abcdefg", "") == "abcdefg"
def test_regularexpression_replace_capture_groups(): regex = regular.compile(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)") assert regex.replace("Springsteen, Bruce", "$first $last") == "Bruce Springsteen"