class StateSar: def __init__(self, init_state): super().__init__() self._sar = RegexpSar() self._state = init_state self._all_states = set() def add_regexp(self, state, regexp, regexp_callback): self._all_states.add(state) def inner_callback(from_pos, to_pos): if self._state == state: regexp_callback(from_pos, to_pos) self._sar.continue_from(to_pos) self._sar.add_regexp(regexp, inner_callback) def match(self, match_str): self._sar.match(match_str) def set_state(self, new_state): if new_state not in self._all_states: raise Exception("Unknown state: " + str(new_state)) self._state = new_state def get_state(self): return self._state
def test18(): myStr = "first abbbbc second 123 end" matches = [] regexps = [ ["ab+c", "First Match"], ["\\d+", "Second Match"], ] sar = RegexpSar() for re in regexps: reStr = re[0] reTitle = re[1] def gen_call(reTitleInternal): def call(from_pos, to_pos): matchStr = myStr[from_pos:to_pos] matches.append(reTitleInternal + ": " + matchStr) sar.continue_from(to_pos) return call sar.add_regexp(reStr, gen_call(reTitle)) sar.match(myStr) validate(2, len(matches)) validate("First Match: abbbbc", matches[0]) validate("Second Match: 123", matches[1])
def test14(): myStr = "backup:x:34:34:backup:/var/backups:/usr/sbin/nologin" elemNum = 6 sar = RegexpSar() matchCount = VarPointer() strt = VarPointer() end = VarPointer() def sar_callback(from_pos, to_pos): matchCount.append(1) sar.continue_from(to_pos) if matchCount.val == elemNum - 1: strt.set(to_pos) elif matchCount.val == elemNum: end.set(from_pos) sar.stop_match() sar.add_regexp(":", sar_callback) sar.match(myStr) if end.val > 0: matchStr = myStr[strt.val:end.val] validate("/var/backups", matchStr) else: # fail on purpuse validate(0 == 1, "no match found")
def test5(): obj = VarPointer() sar = RegexpSar() re = "abcd" myStr = "qabcdef" sar.add_regexp(re, lambda from_pos, to_pos: obj.append(1)) sar.match(myStr) validate(1, obj.val)
def test10(): myStr = "abc" res = [] sar = RegexpSar() sar.add_regexp("\\w+", lambda from_pos, to_pos: res.append(myStr[from_pos:to_pos])) sar.match(myStr) validate("abc", res[0]) validate("bc", res[1]) validate("c", res[2]) validate(3, len(res))
def test15(): myStr = "123abc" obj = VarPointer() sar = RegexpSar() def call(from_pos, to_pos): obj.set(1) sar.add_regexp("\\d+", call) sar.add_regexp("\\w+", call) sar.match(myStr) validate(1, obj.val)
def test11(): myStr = "123abc456" res = [] sar = RegexpSar() def sar_callback(from_pos, to_pos): res.append(myStr[from_pos:to_pos]) sar.continue_from(to_pos + 1) sar.add_regexp("\\a+", sar_callback) sar.match(myStr) validate("abc", res[0]) validate(1, len(res))
def test17(): myStr = "mm abc nn" obj = VarPointer() sar = RegexpSar() def call(from_pos, to_pos): obj.set(1) sar.add_regexp("\\d+", call) sar.add_regexp("\\a+", call) sar.match(myStr) if obj.val != 1: validate(0 == 1, "regexp should match")
def test9(): objFrom = VarPointer() objTo = VarPointer() sar = RegexpSar() myStr = "1234" re = "\\d+" def sar_callback(from_pos, to_pos): objFrom.set(from_pos) objTo.set(to_pos) sar.stop_match() sar.add_regexp(re, sar_callback) sar.match(myStr) validate(myStr, myStr[objFrom.val:objTo.val])
def test22(): sar = RegexpSar() alpha_pos = VarPointer() alpha_pos.val = -1 anchorPos = VarPointer() def sar_digit_callback(digit_pos, to_pos): if alpha_pos.val != -1: dist = digit_pos - alpha_pos.val if (dist == 1): anchorPos.set(digit_pos) sar.add_regexp("\\a", lambda from_pos, to_pos: alpha_pos.set(from_pos)) sar.add_regexp("\\d", sar_digit_callback) sar.match("aa bb2cc dd") validate(5, anchorPos.val)
def test21(): # get match only at certain position myStr = "aaaaaaaaaaaaaaaaaaaaaaaaaabbbc" matched = VarPointer() sar1 = RegexpSar() sar2 = RegexpSar() def sar1_callback(from_pos, to_pos): sar2.match_at(myStr, to_pos) sar1.stop_match() sar2.add_regexp("b+c", lambda from_pos, to_pos: matched.append(1)) sar1.add_regexp("a+", sar1_callback) sar1.match(myStr) validate(1, matched.val)
def test16(): myStr = "123abc" res = [] sar = RegexpSar() def call(from_pos, to_pos): res.append(myStr[from_pos:to_pos]) sar.continue_from(to_pos + 1) sar.add_regexp("\\d+", call) sar.add_regexp("\\a+", call) sar.match(myStr) validate("123", res[0]) validate("bc", res[1]) validate(2, len(res))
def match_test(all_regexp, match_string, match_expected): sar = RegexpSar() path_res = 0 for i in range(len(all_regexp)): re_num = 2**i # NOTE: this is done since re_num is not defined every iteration, but only once def gen_callback(cur_re_num): def match_callback(from_pos, to_pos): nonlocal path_res path_res += cur_re_num return match_callback sar.add_regexp(all_regexp[i], gen_callback(re_num)) sar.match(match_string) assert match_expected == path_res, f"Match fail for [{', '.join(all_regexp)}] in >>{match_string}<<"
def test13(): myStr = "123abc" res = [] sar = RegexpSar() def call(from_pos, to_pos): res.append(myStr[from_pos:to_pos]) sar.continue_from(to_pos) sar.add_regexp("\\d+", call) sar.add_regexp("\\w+", call) sar.match(myStr) validate("123", res[0]) # TODO check if this is a real error? validate("123abc", res[1]) validate(2, len(res))
def test7(): obj = VarPointer() sar = RegexpSar() sar.add_regexp("abcd", lambda from_pos, to_pos: obj.append(1)) sar.match("qabcdeabcdkabcdf") validate(3, obj.val) obj2 = VarPointer() sar2 = RegexpSar() def sar2_callback(from_pos, to_pos): obj2.append(1) if obj2.val == 2: sar2.stop_match() sar2.add_regexp("abcd", sar2_callback) sar2.match("qabcdeabcdkabcdf") validate(2, obj2.val)
def test19(): # get third match and stop myStr = "aa11 bb22 cc33 dd44" matchCount = VarPointer() matchedStr3 = VarPointer() sar = RegexpSar() def call(from_pos, to_pos): matchCount.append(1) if matchCount.val == 3: matchedStr3.val = myStr[from_pos:to_pos] sar.stop_match() else: sar.continue_from(to_pos) sar.add_regexp("\\w+", call) sar.match(myStr) validate(3, matchCount.val) validate("cc33", matchedStr3.val)
def test8(): objFrom = VarPointer() objTo = VarPointer() sar = RegexpSar() myStr = "qabcdef" re = "abcd" def sar_callback(from_pos, to_pos): objFrom.set(from_pos) objTo.set(to_pos) sar.add_regexp(re, sar_callback) sar.match(myStr) validate(1, objFrom.val) validate(5, objTo.val) matchStr = myStr[objFrom.val:objTo.val] validate(re, matchStr) objFrom.set(0) objTo.set(0) sar.match("qqqqabcdttt") validate(4, objFrom.val) validate(8, objTo.val)
def test3(): obj = VarPointer() sar = RegexpSar() sar.add_regexp("abcd", lambda from_pos, to_pos: obj.append(1)) sar.match("qqqqqqqqqqqq") validate(0, obj.val)
# reset words in sentence to false is_alice_in_sentence = False is_cat_in_sentence = False is_rabbit_in_sentence = False # get Alice In Wonderland book content with open("./alice_in_wonderland.txt", "r") as alice_book: text = alice_book.read() # add alice/cat/rabbit regexps sar.add_regexp('\\^\\walice\\^\\w', find_alice) sar.add_regexp('\\^\\wAlice\\^\\w', find_alice) sar.add_regexp('\\^\\wcat\\^\\w', find_cat) sar.add_regexp('\\^\\wCat\\^\\w', find_cat) sar.add_regexp('\\^\\wrabbit\\^\\w', find_rabbit) sar.add_regexp('\\^\\wRabbit\\^\\w', find_rabbit) # add end of sentence regexps, # NOTE that they point to the same callback for cur_re in ['\\.', '\\?', '!', ';']: sar.add_regexp(cur_re, end_of_sentence) # run the regexps on the text sar.match(text) # the matching has completed, show the results: print("Alice + Cat Matches: " + str(alice_and_cat_position)) print("Alice + Rabbit Matches: " + str(alice_and_rabbit_position))
from regexp_sar import RegexpSar sar = RegexpSar() match_str = "hello world abc" sar.add_regexp( '\w+', lambda from_pos, to_pos: print("Match Word: " + match_str[from_pos: to_pos])) sar.add_regexp( 'world', lambda from_pos, to_pos: print("Found world from: " + str( from_pos) + " to: " + str(to_pos))) ''' Output: Match Word: hello Match Word: ello Match Word: llo Match Word: lo Match Word: o Match Word: world Found world from: 6 to: 11 Match Word: orld Match Word: rld Match Word: ld Match Word: d Match Word: abc Match Word: bc Match Word: c ''' sar.match(match_str)
# name of function we are currently in its body (with its body) inside_function_name = None curly_bracket_count = 0 def handle_open_curly_bracket(from_pos, to_pos): global curly_bracket_count, inside_function_name if curly_bracket_count == 0: inside_function_name = last_found_function curly_bracket_count += 1 sar.add_regexp("{", handle_open_curly_bracket) def handle_close_curly_bracket(from_pos, to_pos): global curly_bracket_count curly_bracket_count -= 1 sar.add_regexp("}", handle_close_curly_bracket) def handle_required_method_found(from_pos, to_pos): print(f"found at: {inside_function_name}") sar.add_regexp(f"\\^\\w{required_method}\\^\\w", handle_required_method_found) sar.match(c_content)