Ejemplo n.º 1
0
def test4():
    obj = VarPointer()
    sar = RegexpSar()
    sar.add_regexp("abcd", lambda from_pos, to_pos: obj.append(1))
    sar.add_regexp("nm", lambda from_pos, to_pos: obj.append(2))
    my_str = "qabcdefnmq"
    sar.match_from(my_str, 1)
    validate(3, obj.val)

    obj.set(0)
    sar.match_from(my_str, 2)
    validate(2, obj.val)
Ejemplo n.º 2
0
def test21():
    # get match only at certain position
    myStr = "aaaaaaaaaaaaaaaaaaaaaaaaaabbbc"
    matched = VarPointer()
    sar1 = RegexpSar()
    sar2 = RegexpSar()

    def sar1_callback(from_pos, to_pos):
        sar2.match_at(myStr, to_pos)
        sar1.stop_match()

    sar2.add_regexp("b+c", lambda from_pos, to_pos: matched.append(1))
    sar1.add_regexp("a+", sar1_callback)
    sar1.match(myStr)

    validate(1, matched.val)
Ejemplo n.º 3
0
class StateSar:
    def __init__(self, init_state):
        super().__init__()
        self._sar = RegexpSar()
        self._state = init_state
        self._all_states = set()

    def add_regexp(self, state, regexp, regexp_callback):
        self._all_states.add(state)

        def inner_callback(from_pos, to_pos):
            if self._state == state:
                regexp_callback(from_pos, to_pos)
            self._sar.continue_from(to_pos)

        self._sar.add_regexp(regexp, inner_callback)

    def match(self, match_str):
        self._sar.match(match_str)

    def set_state(self, new_state):
        if new_state not in self._all_states:
            raise Exception("Unknown state: " + str(new_state))
        self._state = new_state

    def get_state(self):
        return self._state
Ejemplo n.º 4
0
def test14():
    myStr = "backup:x:34:34:backup:/var/backups:/usr/sbin/nologin"
    elemNum = 6
    sar = RegexpSar()
    matchCount = VarPointer()
    strt = VarPointer()
    end = VarPointer()

    def sar_callback(from_pos, to_pos):
        matchCount.append(1)
        sar.continue_from(to_pos)
        if matchCount.val == elemNum - 1:
            strt.set(to_pos)
        elif matchCount.val == elemNum:
            end.set(from_pos)
            sar.stop_match()

    sar.add_regexp(":", sar_callback)
    sar.match(myStr)
    if end.val > 0:
        matchStr = myStr[strt.val:end.val]
        validate("/var/backups", matchStr)
    else:
        # fail on purpuse
        validate(0 == 1, "no match found")
Ejemplo n.º 5
0
def test18():
    myStr = "first abbbbc second 123 end"
    matches = []
    regexps = [
        ["ab+c", "First Match"],
        ["\\d+", "Second Match"],
    ]

    sar = RegexpSar()
    for re in regexps:
        reStr = re[0]
        reTitle = re[1]

        def gen_call(reTitleInternal):
            def call(from_pos, to_pos):
                matchStr = myStr[from_pos:to_pos]
                matches.append(reTitleInternal + ": " + matchStr)
                sar.continue_from(to_pos)

            return call

        sar.add_regexp(reStr, gen_call(reTitle))

    sar.match(myStr)
    validate(2, len(matches))
    validate("First Match: abbbbc", matches[0])
    validate("Second Match: 123", matches[1])
Ejemplo n.º 6
0
def test5():
    obj = VarPointer()
    sar = RegexpSar()
    re = "abcd"
    myStr = "qabcdef"
    sar.add_regexp(re, lambda from_pos, to_pos: obj.append(1))
    sar.match(myStr)
    validate(1, obj.val)
Ejemplo n.º 7
0
def test6():
    obj = VarPointer()
    sar = RegexpSar()
    myStr = "0123456 789"
    sar.add_regexp("\\d+",
                   lambda from_pos, to_pos: obj.set(myStr[from_pos:to_pos]))
    sar.match_at(myStr, 3)
    validate("3456", obj.val)
Ejemplo n.º 8
0
def test22():
    sar = RegexpSar()
    alpha_pos = VarPointer()
    alpha_pos.val = -1
    anchorPos = VarPointer()

    def sar_digit_callback(digit_pos, to_pos):
        if alpha_pos.val != -1:
            dist = digit_pos - alpha_pos.val
            if (dist == 1):
                anchorPos.set(digit_pos)

    sar.add_regexp("\\a", lambda from_pos, to_pos: alpha_pos.set(from_pos))
    sar.add_regexp("\\d", sar_digit_callback)
    sar.match("aa bb2cc dd")
    validate(5, anchorPos.val)
Ejemplo n.º 9
0
def test10():
    myStr = "abc"
    res = []
    sar = RegexpSar()
    sar.add_regexp("\\w+",
                   lambda from_pos, to_pos: res.append(myStr[from_pos:to_pos]))
    sar.match(myStr)
    validate("abc", res[0])
    validate("bc", res[1])
    validate("c", res[2])
    validate(3, len(res))
Ejemplo n.º 10
0
def test15():
    myStr = "123abc"
    obj = VarPointer()
    sar = RegexpSar()

    def call(from_pos, to_pos):
        obj.set(1)

    sar.add_regexp("\\d+", call)
    sar.add_regexp("\\w+", call)
    sar.match(myStr)

    validate(1, obj.val)
Ejemplo n.º 11
0
def test11():
    myStr = "123abc456"
    res = []
    sar = RegexpSar()

    def sar_callback(from_pos, to_pos):
        res.append(myStr[from_pos:to_pos])
        sar.continue_from(to_pos + 1)

    sar.add_regexp("\\a+", sar_callback)
    sar.match(myStr)
    validate("abc", res[0])
    validate(1, len(res))
Ejemplo n.º 12
0
def test20():
    # get match only at certain position
    myStr = "aa11 bb22 cc33 dd44"
    matchedStr4 = VarPointer()
    sar = RegexpSar()

    def call(from_pos, to_pos):
        matchedStr4.val = myStr[from_pos:to_pos]

    sar.add_regexp("\\w+", call)
    sar.match_at(myStr, 5)

    validate("bb22", matchedStr4.val)
Ejemplo n.º 13
0
def test17():
    myStr = "mm abc nn"
    obj = VarPointer()
    sar = RegexpSar()

    def call(from_pos, to_pos):
        obj.set(1)

    sar.add_regexp("\\d+", call)
    sar.add_regexp("\\a+", call)
    sar.match(myStr)

    if obj.val != 1:
        validate(0 == 1, "regexp should match")
Ejemplo n.º 14
0
def test8():
    objFrom = VarPointer()
    objTo = VarPointer()
    sar = RegexpSar()
    myStr = "qabcdef"
    re = "abcd"

    def sar_callback(from_pos, to_pos):
        objFrom.set(from_pos)
        objTo.set(to_pos)

    sar.add_regexp(re, sar_callback)
    sar.match(myStr)
    validate(1, objFrom.val)
    validate(5, objTo.val)
    matchStr = myStr[objFrom.val:objTo.val]
    validate(re, matchStr)
    objFrom.set(0)
    objTo.set(0)
    sar.match("qqqqabcdttt")
    validate(4, objFrom.val)
    validate(8, objTo.val)
Ejemplo n.º 15
0
def test9():
    objFrom = VarPointer()
    objTo = VarPointer()
    sar = RegexpSar()
    myStr = "1234"
    re = "\\d+"

    def sar_callback(from_pos, to_pos):
        objFrom.set(from_pos)
        objTo.set(to_pos)
        sar.stop_match()

    sar.add_regexp(re, sar_callback)
    sar.match(myStr)
    validate(myStr, myStr[objFrom.val:objTo.val])
Ejemplo n.º 16
0
def test16():
    myStr = "123abc"
    res = []
    sar = RegexpSar()

    def call(from_pos, to_pos):
        res.append(myStr[from_pos:to_pos])
        sar.continue_from(to_pos + 1)

    sar.add_regexp("\\d+", call)
    sar.add_regexp("\\a+", call)
    sar.match(myStr)

    validate("123", res[0])
    validate("bc", res[1])
    validate(2, len(res))
Ejemplo n.º 17
0
def test13():
    myStr = "123abc"
    res = []
    sar = RegexpSar()

    def call(from_pos, to_pos):
        res.append(myStr[from_pos:to_pos])
        sar.continue_from(to_pos)

    sar.add_regexp("\\d+", call)
    sar.add_regexp("\\w+", call)
    sar.match(myStr)

    validate("123", res[0])
    # TODO check if this is a real error?
    validate("123abc", res[1])
    validate(2, len(res))
Ejemplo n.º 18
0
def match_test(all_regexp, match_string, match_expected):
    sar = RegexpSar()
    path_res = 0
    for i in range(len(all_regexp)):
        re_num = 2**i

        # NOTE: this is done since re_num is not defined every iteration, but only once
        def gen_callback(cur_re_num):
            def match_callback(from_pos, to_pos):
                nonlocal path_res
                path_res += cur_re_num

            return match_callback

        sar.add_regexp(all_regexp[i], gen_callback(re_num))
    sar.match(match_string)
    assert match_expected == path_res, f"Match fail for [{', '.join(all_regexp)}] in >>{match_string}<<"
Ejemplo n.º 19
0
def test19():
    # get third match and stop
    myStr = "aa11 bb22 cc33 dd44"
    matchCount = VarPointer()
    matchedStr3 = VarPointer()
    sar = RegexpSar()

    def call(from_pos, to_pos):
        matchCount.append(1)
        if matchCount.val == 3:
            matchedStr3.val = myStr[from_pos:to_pos]
            sar.stop_match()
        else:
            sar.continue_from(to_pos)

    sar.add_regexp("\\w+", call)
    sar.match(myStr)

    validate(3, matchCount.val)
    validate("cc33", matchedStr3.val)
Ejemplo n.º 20
0
 def __init__(self, init_state):
     super().__init__()
     self._sar = RegexpSar()
     self._state = init_state
     self._all_states = set()
Ejemplo n.º 21
0
from regexp_sar import RegexpSar

# create a SAR instance
sar = RegexpSar()

# initialize lists of all the sentences positions of alice with cat/rabbit
alice_and_cat_position = []
alice_and_rabbit_position = []

# keep track of sentence start position
sentence_start_position = 0

# flags for whether alice/cat/rabbit occurs in the current sentence
is_alice_in_sentence = False
is_cat_in_sentence = False
is_rabbit_in_sentence = False

# mark that the word alice is found in the current sentence


def find_alice(f, t):
    global is_alice_in_sentence
    is_alice_in_sentence = True


# mark that the word cat is found in the current sentence
def find_cat(f, t):
    global is_cat_in_sentence
    is_cat_in_sentence = True

Ejemplo n.º 22
0
from regexp_sar import RegexpSar

sar = RegexpSar()

match_word_count = 0
match_word = None
match_num_count = 0
match_num = None
match_str = "hello world 123 abc 456 789"

regexps = [
    ['\w+', 'word'],
    ['\d+', 'number'],
]

for cur_regexp in regexps:

    def find_second_match(description):
        match_count = 0
        match_val = None

        def callback(from_pos, to_pos):
            nonlocal match_count, match_val
            match_count += 1
            if match_count == 2:
                print("Match: " + str(description) + ": " +
                      match_str[from_pos:to_pos])
            sar.continue_from(to_pos)

        return callback
from regexp_sar import RegexpSar

with open("./c_code.c", "r") as c_fh:
    c_content = c_fh.read()

required_method = "malloc"

sar = RegexpSar()

last_found_word = None


def found_name(from_pos, to_pos):
    global last_found_word
    last_found_word = c_content[from_pos:to_pos]
    sar.continue_from(to_pos)


sar.add_regexp("\\w+", found_name)

# name of last encountered function
last_found_function = None


def found_function(from_pos, to_pos):
    global last_found_function
    last_found_function = last_found_word


sar.add_regexp("(", found_function)
Ejemplo n.º 24
0
def test7():
    obj = VarPointer()
    sar = RegexpSar()
    sar.add_regexp("abcd", lambda from_pos, to_pos: obj.append(1))
    sar.match("qabcdeabcdkabcdf")
    validate(3, obj.val)

    obj2 = VarPointer()
    sar2 = RegexpSar()

    def sar2_callback(from_pos, to_pos):
        obj2.append(1)
        if obj2.val == 2:
            sar2.stop_match()

    sar2.add_regexp("abcd", sar2_callback)
    sar2.match("qabcdeabcdkabcdf")
    validate(2, obj2.val)
Ejemplo n.º 25
0
def test3():
    obj = VarPointer()
    sar = RegexpSar()
    sar.add_regexp("abcd", lambda from_pos, to_pos: obj.append(1))
    sar.match("qqqqqqqqqqqq")
    validate(0, obj.val)
Ejemplo n.º 26
0
from regexp_sar import RegexpSar

sar = RegexpSar()

match_str = "hello world abc"

sar.add_regexp(
    '\w+', lambda from_pos, to_pos: print("Match Word: " + match_str[from_pos:
                                                                     to_pos]))
sar.add_regexp(
    'world', lambda from_pos, to_pos: print("Found world from: " + str(
        from_pos) + " to: " + str(to_pos)))
'''
Output:
    Match Word: hello
    Match Word: ello
    Match Word: llo
    Match Word: lo
    Match Word: o
    Match Word: world
    Found world from: 6 to: 11
    Match Word: orld
    Match Word: rld
    Match Word: ld
    Match Word: d
    Match Word: abc
    Match Word: bc
    Match Word: c
'''
sar.match(match_str)
Ejemplo n.º 27
0
match_count = 0
# this will be called every time any regexp was matched in the text
def count_matches(from_pos, to_pos):
    global match_count
    match_count += 1


# benchmark native Python regexp engine performance
print("Start python match")
# generate Python regexp string using alternation
python_native_re = f"({'|'.join(regexp_list)})"
start = time.time()

# find amount of matches found with native Python regexp engine
python_re_match_count = len(re.findall(python_native_re, match_str))
print(f"Done in {time.time() - start} seconds {python_re_match_count} matches")


# create a SAR instance
sar = RegexpSar()

# append all the regexps for the SAR instance
for cur_re in regexp_list:
    sar.add_regexp(cur_re, count_matches)

# benchmark SAR performance
print("Start SAR Match")
start = time.time()
sar.match(match_str)
print(f"Done Match in: {time.time() - start} seconds. {match_count} matches!")