コード例 #1
0
    def enterR3(self, ctx):
        if ctx.SYMBOL() != None:
            self.stack.append(RegEx(rx.SYMBOL_SIMPLE, toString(ctx.SYMBOL())))

        if ctx.star() != None:
            self.stack.append("*")

        if ctx.plus() != None:
            self.stack.append("+")

        if ctx.maybe() != None:
            self.stack.append("?")

        if ctx.range_0() != None:
            self.stack.append("{")
            if ctx.range_0().fixed() != None:
                num = getNumber(ctx.range_0().fixed().R_FIXED())
                self.stack.append((num, num))

            if ctx.range_0().min_0() != None:
                num = getFirstNumber(ctx.range_0().min_0().R_MIN())
                self.stack.append((num, -1))

            if ctx.range_0().max_0() != None:
                num = getLastNumber(ctx.range_0().max_0().R_MAX())
                self.stack.append((-1, num))

            if ctx.range_0().interval() != None:
                num1 = getFirstNumber(ctx.range_0().interval().R_INTERVAL())
                num2 = getLastNumber(ctx.range_0().interval().R_INTERVAL())
                self.stack.append((num1, num2))
コード例 #2
0
ファイル: test.py プロジェクト: wocoburguesa/Compiladores
def test1():
    a = RegEx()
    program = open('example.js')
    b = [word.strip() for word in program.readlines()]
    program.close()
    output = open('tokens.txt', 'a')

    line_number = 1
    for test in b:
        output.write('Line #%d: %s\t%s\n' % (
                line_number,
                test,
                a.process(test, line_number)))
        line_number += 1

    output.close()
コード例 #3
0
ファイル: main.py プロジェクト: catalinmares/Regex-Engine
def apply_transformations(regex_string, pos, symbol_regex):
    while pos < len(regex_string):
        if regex_string[pos] == '*':
            symbol_regex = RegEx(RX_STAR, symbol_regex)
            pos = pos + 1
        elif regex_string[pos] == '+':
            symbol_regex = RegEx(RX_PLUS, symbol_regex)
            pos = pos + 1
        elif regex_string[pos] == '?':
            symbol_regex = RegEx(RX_MAYBE, symbol_regex)
            pos = pos + 1
        elif regex_string[pos] == '{':
            if regex_string[pos + 1] == ',':
                symbol_regex = RegEx(RX_RANGE, symbol_regex, (-1, int(regex_string[pos + 2])))
                pos = pos + 4
            elif regex_string[pos + 2] == ',':
                if regex_string[pos + 3] == '}':
                    symbol_regex = RegEx(RX_RANGE, symbol_regex, (int(regex_string[pos + 1]), -1))
                    pos = pos + 4
                elif regex_string[pos + 4] == '}':
                    symbol_regex = RegEx(RX_RANGE, symbol_regex, (int(regex_string[pos + 1]), int(regex_string[pos + 3])))       
                    pos = pos + 5
            else:
                symbol_regex = RegEx(RX_RANGE, symbol_regex, (int(regex_string[pos + 1]), int(regex_string[pos + 1])))
                pos = pos + 3
        else:
            break

    return (symbol_regex, pos)
コード例 #4
0
    def clean(self, ex):

        if instr(self.stack, 1) == "*":
            self.stack.pop()
            self.clean(RegEx(rx.STAR, ex))

        elif instr(self.stack, 1) == "+":
            self.stack.pop()
            self.clean(RegEx(rx.PLUS, ex))

        elif instr(self.stack, 1) == "?":
            self.stack.pop()
            self.clean(RegEx(rx.MAYBE, ex))

        elif instr(self.stack, 2) == "{":
            ran = self.stack.pop()
            self.stack.pop()
            self.clean(RegEx(rx.RANGE, ex, ran))

        elif instr(self.stack,
                   2) == "|" and not (isinstance(instr(self.stack, 1), str)):
            ex1 = self.stack.pop()
            self.stack.pop()
            self.clean(RegEx(rx.ALTERNATION, ex1, ex))

        elif instr(self.stack,
                   2) == "." and not (isinstance(instr(self.stack, 1), str)):
            ex1 = self.stack.pop()
            self.stack.pop()
            self.clean(RegEx(rx.CONCATENATION, ex1, ex))

        else:
            self.stack.append(ex)
コード例 #5
0
ファイル: main.py プロジェクト: catalinmares/Regex-Engine
def build_set(regex_string, pos):
    symbol_set = set()

    while regex_string[pos] != ']':
        if regex_string[pos] in CHARSET:
            if regex_string[pos + 1] == '-':
                symbol_set.add((regex_string[pos], regex_string[pos + 2]))
                pos = pos + 3
            else:
                symbol_set.add(regex_string[pos])
                pos = pos + 1

    return (RegEx(RX_SYMBOL_SET, symbol_set), pos)
コード例 #6
0
ファイル: automaton.py プロジェクト: povelly/Librarian
 def dfa(regex):
     print("build dfa")
     tt = RegEx(regex).parsex()
     print("parsex ok")
     auto_res = Automaton.parse_tree(tt)
     print("parse_tree ok")
     auto_e1 = Automaton.reduce_automaton(auto_res)
     print("reduce ok")
     auto2 = Automaton.reduce_automaton2(auto_e1)
     print("reduce2 ok")
     auto2.cleanUp()
     print("cleanup ok")
     return auto2
コード例 #7
0
ファイル: regex_test.py プロジェクト: vottie/lang
 def test_regex(self):
     string = 'hello'
     r = RegEx(string)
     r.search()
     self.assertTrue(r)
コード例 #8
0
ファイル: main.py プロジェクト: alexandrapetre/Tema-LFA
        
        for i in range (len(regex_string)):
            last = None
            if modify[i] == '(':
                ceva = 1
                ok = 2
                start = i
            if modify[i] == ')':
                ok = 0
                end = i 
                ceva = 0
            if ok == 2:
                end = i + 1

            if len(regex_string) == 1 and regex_string[i] != '.':
                regex = RegEx(1, regex_string[i],None)
            elif regex_string[i] == '.':
                regex = RegEx(2, None, None)
            
            if regex_string[i] == '[':
                square = 1
                start1 = i
                ók1 = 2
            if regex_string[i] == ']':
                square = 2
                end1 = i
                ok1 = 0

            if ok1 == 2:
                end1 = start1 + 1
コード例 #9
0
from regex import RegEx

# fixture
regex_obj = RegEx()


def test_pass():
    test_string = 'ABRACADABRA'
    rv = regex_obj.transduce(test_string)
    assert rv == [
        'ACCEPT',
        'ACCEPT',
        'ACCEPT',
        'ACCEPT',
        'ACCEPT',
        'ACCEPT',
        'ACCEPT',
        'ACCEPT',
        'ACCEPT',
        'ACCEPT',
        'ACCEPT',
    ]
    print(f'The return value is {rv}.')


def test_fail_first():
    test_string = '12345'
    rv = regex_obj.transduce(test_string)
    assert rv == [
        'REJECT',
        'REJECT',
コード例 #10
0
ファイル: main.py プロジェクト: outOfBoun/projects
 def exitRrange4(self, ctx: MyRegExParser.Rrange4Context):
     lhs = self.RegExstack.pop()
     count2 = self.Numstack.pop()
     count1 = self.Numstack.pop()
     rhs = (count1, count2)
     self.RegExstack.append(RegEx(rgxRANGE, lhs, rhs))
コード例 #11
0
ファイル: main.py プロジェクト: catalinmares/Regex-Engine
def get_parsed_regex(regex_string, startpoint, endpoint):
    regex = None
    i = startpoint

    while i < endpoint:
        if regex_string[i] in CHARSET:
            symbol_regex = RegEx(RX_SYMBOL_SIMPLE, regex_string[i])

            (new_symbol_regex, i) = apply_transformations(regex_string, i + 1, symbol_regex)
            
            if regex == None:
                regex = new_symbol_regex
            else:
                regex = RegEx(RX_CONCATENATION, regex, new_symbol_regex)
        elif regex_string[i] == '[':
            (symbol_set, i) = build_set(regex_string, i + 1)

            (new_symbol_set, i) = apply_transformations(regex_string, i + 1, symbol_set)

            if regex == None:
                regex = new_symbol_set
            else:
                regex = RegEx(RX_CONCATENATION, regex, new_symbol_set)
        elif regex_string[i] == '.':
            symbol_any = RegEx(RX_SYMBOL_ANY)
            i = i + 1

            if regex == None:
                regex = symbol_any
            else:
                regex = RegEx(RX_CONCATENATION, regex, symbol_any)
        elif regex_string[i] == '(':
            j = i + 1

            stack = ['(']

            while len(stack) != 0:
                if regex_string[j] == '(':
                    stack.append('(')
                elif regex_string[j] == ')':
                    stack.pop()

                if len(stack) != 0:
                    j = j + 1

            parenthesis_expr = get_parsed_regex(regex_string, i + 1, j)
            i = j + 1

            (new_parenthesis_expr, i) = apply_transformations(regex_string, i, parenthesis_expr)

            if regex == None:
                regex = new_parenthesis_expr
            else:
                regex = RegEx(RX_CONCATENATION, regex, new_parenthesis_expr)
        elif regex_string[i] == '|':
            j = i + 1

            while j < endpoint:
                if regex_string[j] == '|':
                    break;

                j = j + 1

            if j == endpoint:
                j = j + 1

            alternation_expr = get_parsed_regex(regex_string, i + 1, j - 1)

            if j == (endpoint + 1):
                i = endpoint
            else:
                i = j

            regex = RegEx(RX_ALTERNATION, regex, alternation_expr)

    return regex
コード例 #12
0
 def exitSet_0(self, ctx):
     self.stack.pop()
     self.stack.append(RegEx(rx.SYMBOL_SET, self.set.copy()))
     self.set.clear()
コード例 #13
0
    def exitExp(self, ctx:ReGexParser.ExpContext):

        #compute case for each input
        #regex symbol_simple char
        if ctx.CHAR():
        	return RegEx(SYMBOL_SIMPLE, ctx.getText())

        #regex symbol_simple number
        if ctx.NUMBER():
        	return RegEx(SYMBOL_SIMPLE, ctx.getText())

        #regex symbol_any
        if ctx.ANY():
        	return RegEx(SYMBOL_ANY)

        #regex maybe for lhs
        if ctx.MAYBE():
        	expression = self.exitExp(ctx.getChild(0))
        	return RegEx(MAYBE, expression)

        #regex star for lhs
        if ctx.STAR():
        	expression = self.exitExp(ctx.getChild(0))
        	return RegEx(STAR, expression)

        ##regex plus for lhs
        if ctx.PLUS():
        	expression = self.exitExp(ctx.getChild(0))
        	return RegEx(PLUS, expression)

        #cross expression and add char or tuple of chars in set
        if ctx.SET():

            #cross expression and add char or tuple of chars in set
        	text = ctx.getText()
        	set_symbols = set()
        	j = 1

            #cross expression
        	for i in range(1, len(text) - 1):
        		if text[j] != '-':

                    #found tuple of chars
        			if text[j + 1] == '-':
        				set_symbols.add((text[j], text[j + 2]))
        				j = j + 3
        				if j >= (len(text) - 1):
        					break;

                    #found char
        			else:
        				set_symbols.add(text[j])
        				j = j + 1
        				if j >= (len(text) - 1):
        					break
                            
        	return RegEx(SYMBOL_SET, set_symbols)

        if ctx.RANGE():

            #check tuple to identify input case
        	text = ctx.getText()
        	symbol = text[0]

            #min interval = max interval
        	if text.find(",") == -1:
        		number = ord(text[2]) - 48;
        		return RegEx(RANGE, RegEx(SYMBOL_SIMPLE, symbol), (number, number))
        	if text[2] == ',':
        		number = ord(text[3]) - 48;
        		return RegEx(RANGE, RegEx(SYMBOL_SIMPLE, symbol), (-1, number))
        	else:
        		acolada = text.find('}')

                #min interval
        		if text[acolada - 1] == ',':
        			number = ord(text[2]) - 48
        			return RegEx(RANGE, RegEx(SYMBOL_SIMPLE, symbol), (number, -1))

                #normal interval
        		else:
        			acolada1 = text.find('{')
        			acolada2 = text.find('}')
        			virgula = text.find(',')
        			nr1 = 0
        			nr2 = 0

                    #compute min limit
        			for i in range(acolada1 + 1, virgula):
        				temp = ord(text[i]) - 48
        				nr1 = nr1 * 10 + temp

                    #compute max limit
        			for i in range(virgula + 1, acolada2):
        				temp = ord(text[i]) - 48
        				nr2 = nr2 * 10 + temp
        			return RegEx(RANGE, RegEx(SYMBOL_SIMPLE, symbol), (nr1, nr2))

        #alternate lhs and rhs
        if ctx.ALTERNATION():
        	e1 = self.exitExp(ctx.getChild(0))
        	e2 = self.exitExp(ctx.getChild(2))
        	return RegEx(ALTERNATION, e1, e2)

        else:
        	text = ctx.getText()

            #no priority needed before concatenation
        	if text[0] == '(':
        		if text[len(text) - 1] == ')' :
        			return self.exitExp(ctx.getChild(1))
        		else:
        			e1 = self.exitExp(ctx.getChild(1))
        			e2 = self.exitExp(ctx.getChild(3))
        			return RegEx(CONCATENATION, e1, e2)

            #concatenate lhs and rhs
        	e1 = self.exitExp(ctx.getChild(0))
        	e2 = self.exitExp(ctx.getChild(1))
        	return RegEx(CONCATENATION, e1, e2)
コード例 #14
0
ファイル: main.py プロジェクト: outOfBoun/projects
 def exitSetsymb(self, ctx: MyRegExParser.SetsymbContext):
     self.RegExstack.append(RegEx(rgxSYMBOL_SET, self.setsymb))
コード例 #15
0
ファイル: main.py プロジェクト: outOfBoun/projects
 def exitAnysymb(self, ctx: MyRegExParser.AnysymbContext):
     self.RegExstack.append(RegEx(rgxSYMBOL_ANY))
コード例 #16
0
ファイル: main.py プロジェクト: outOfBoun/projects
 def exitStar(self, ctx: MyRegExParser.StarContext):
     lhs = self.RegExstack.pop()
     self.RegExstack.append(RegEx(rgxSTAR, lhs))
コード例 #17
0
ファイル: main.py プロジェクト: outOfBoun/projects
 def exitAltern(self, ctx: MyRegExParser.AlternContext):
     rhs = self.RegExstack.pop()
     lhs = self.RegExstack.pop()
     self.RegExstack.append(RegEx(rgxALTERNATION, lhs, rhs))
コード例 #18
0
ファイル: main.py プロジェクト: outOfBoun/projects
 def exitConcat(self, ctx: MyRegExParser.ConcatContext):
     rhs = self.RegExstack.pop()
     lhs = self.RegExstack.pop()
     self.RegExstack.append(RegEx(rgxCONCATENATION, lhs, rhs))
コード例 #19
0
ファイル: main.py プロジェクト: outOfBoun/projects
 def exitAtom(self, ctx: MyRegExParser.AtomContext):
     if ctx.symbol() != None:
         self.RegExstack.append(
             RegEx(rgxSYMBOL_SIMPLE, self.Symbstack.pop()))
コード例 #20
0
ファイル: main.py プロジェクト: likemaq/REGEX-Parser
def parse(regex_string):
    # in acc retin regex-urile deja parsate
    acc = []
    open_par = 0
    closed_par = 0
    first_open_par = -1
    last_closed_par = -1
    between_par = ""

    open_curly_pos = -1
    closed_curly_pos = -1

    open_square_pos = -1
    closed_square_pos = -1

    # Marcheaza initializarea unui regex
    ex = RegEx(EMPTY_STRING)

    for i in range(len(regex_string)):
        symbol = regex_string[i]

        # open_par == 1 => se citeste ceea ce este intre paranteze rotunde
        # open_par == 0 => parsare normala fara paranteze
        if open_par == 0:

            # pentru regexul set retin pozitiile parantezelor patrate si apelez functia get_sym_for_square de mai sus
            if symbol == "[":
                open_square_pos = i + 1

            if symbol == "]":
                closed_square_pos = i
                square_expr_set = get_sym_for_square(
                    regex_string[open_square_pos:closed_square_pos])
                sym = RegEx(SYMBOL_SET, square_expr_set)
                acc.append(sym)
                open_square_pos = -1
                closed_square_pos = -1

            if open_square_pos != -1:
                continue

            #  pentru regexul range este identic ca mai sus
            if symbol == "{":
                open_curly_pos = i + 1

            if symbol == "}":
                closed_curly_pos = i
                i += 1
                x, y = get_from_curly(
                    regex_string[open_curly_pos:closed_curly_pos])
                sym = acc.pop()
                new_sym = RegEx(RANGE, sym, (x, y))
                acc.append(new_sym)
                open_curly_pos = -1
                closed_curly_pos = -1

            # daca gasesc simbol (fara a fi intre "{}")
            if symbol in ALFANUM and open_curly_pos == -1:
                sym = RegEx(SYMBOL_SIMPLE, symbol)
                acc.append(sym)

            # sirul vid
            if regex_string == "":
                sym = RegEx(EMPTY_STRING)
                acc.append(sym)

            # Pentru . se creeaza regex la fel ca pentru orice alt caracter alfanum
            if symbol == ".":
                sym = RegEx(SYMBOL_ANY)
                acc.append(sym)

            # Pentru ? scot din lista ceea ce a fost parsat inainte si creez alt regex inserandu-l pe cel nou in lista
            if symbol == "?":
                anterior = acc.pop()
                sym = RegEx(MAYBE, anterior)
                acc.append(sym)

            # Pentru * la fel
            if symbol == "*":
                anterior = acc.pop()
                sym = RegEx(STAR, anterior)
                acc.append(sym)

            # Pentru + la fel
            if symbol == "+":
                anterior = acc.pop()
                sym = RegEx(PLUS, anterior)
                acc.append(sym)

            # Pentru | concatenez in lhs toti termenii din acc
            # In rhs construiesc regexul dat de parse pe restul sirului neparsat
            if symbol == "|":
                lhs = RegEx(EMPTY_STRING)

                for reg in acc:
                    if lhs.type == EMPTY_STRING:
                        lhs = reg
                    else:
                        lhs = RegEx(CONCATENATION, lhs, reg)

                acc = []

                rhs = parse(regex_string[i + 1:])
                sym = RegEx(ALTERNATION, lhs, rhs)
                acc.append(sym)

                break

        # Marchez indexii parantezelor inchise si deschise
        # Numar cate paranteze au fost inchise si cate deschise
        # Apelez parsare pe ceea ce este intre paranteze cand paranteze_deschise == paranteze_inchise
        if symbol == "(":
            open_par += 1

            if first_open_par == -1:
                first_open_par = i + 1

        if symbol == ")":
            closed_par += 1
            if open_par == closed_par:
                last_closed_par = i
                sym = parse(regex_string[first_open_par:last_closed_par])
                acc.append(sym)
                open_par = 0
                closed_par = 0
                first_open_par = -1
                last_closed_par = -1

    # La final dupa ce am terminat sirul de parsat concatenez toti termenii RegEx din
    for reg in acc:
        if ex.type == EMPTY_STRING:
            ex = reg
        else:
            ex = RegEx(CONCATENATION, ex, reg)

    return ex
コード例 #21
0
 def enterAny_0(self, ctx):
     self.stack.append(RegEx(rx.SYMBOL_ANY))
コード例 #22
0
ファイル: main.py プロジェクト: mariajianu/RegEx-Engine
def convertRegEx(parsed_regex):
    if parsed_regex.type == EMPTY_STRING:
        regular_expression = RegularExpression(1)
        return regular_expression
    if parsed_regex.type == SYMBOL_SIMPLE:
        regular_expression = RegularExpression(2, str(parsed_regex))
        return regular_expression
    # CONCATENATION = 8
    if parsed_regex.type == 8:
        regular_expression = RegularExpression(4,
                                               convertRegEx(parsed_regex.lhs),
                                               convertRegEx(parsed_regex.rhs))
        return regular_expression
    # ALTERNATION = 9
    if parsed_regex.type == 9:
        regular_expression = RegularExpression(5,
                                               convertRegEx(parsed_regex.lhs),
                                               convertRegEx(parsed_regex.rhs))
        return regular_expression
    # SYMBOL_ANY = 2
    if parsed_regex.type == 2:
        regular_expression = RegularExpression(1)
        for i in alphabet:
            symbol = RegEx(SYMBOL_SIMPLE, i)
            regular_expression = RegularExpression(5, regular_expression,
                                                   convertRegEx(symbol))
        return regular_expression
    # MAYBE = 4
    if parsed_regex.type == 4:
        aux = RegularExpression(1)
        regular_expression = RegularExpression(5, aux,
                                               convertRegEx(parsed_regex.lhs))
        return regular_expression
    # STAR = 5
    if parsed_regex.type == 5:
        regular_expression = RegularExpression(3,
                                               convertRegEx(parsed_regex.lhs))
        return regular_expression
    # PLUS = 6
    if parsed_regex.type == 6:
        aux = convertRegEx(parsed_regex.lhs)
        aux2 = RegularExpression(3, aux)
        regular_expression = RegularExpression(4, aux, aux2)
        return regular_expression
    # RANGE = 8
    if parsed_regex.type == 7:
        x, y = parsed_regex.range
        if x == y:
            regular_expression = convertRegEx(parsed_regex.lhs)
            for i in range(x - 1):
                aux = convertRegEx(parsed_regex.lhs)
                regular_expression = RegularExpression(4, regular_expression,
                                                       aux)
            return regular_expression
        if x == -1:
            regular_expression = RegularExpression(1)
            for i in range(y + 1):
                if i != 0:
                    exp = RegEx(RANGE, parsed_regex.lhs, (i, i))
                    regular_expression = RegularExpression(
                        5, regular_expression, convertRegEx(exp))
            return regular_expression
        if y == -1:
            exp = RegEx(RANGE, parsed_regex.lhs, (x, x))
            star_exp = RegularExpression(3, convertRegEx(parsed_regex.lhs))
            regular_expression = RegularExpression(4, convertRegEx(exp),
                                                   star_exp)
            return regular_expression
        else:
            # intre x si y aparitii
            exp = RegEx(RANGE, parsed_regex.lhs, (x, x))
            regular_expression = convertRegEx(exp)
            for i in range(x + 1, y + 1):
                exp = RegEx(RANGE, parsed_regex.lhs, (i, i))
                regular_expression = RegularExpression(5, regular_expression,
                                                       convertRegEx(exp))
            return regular_expression
    # SYMBOL_SET = 3
    if parsed_regex.type == 3:
        regular_expression = None
        for i in parsed_regex.symbol_set:
            if type(i) is tuple:
                if i[0] in digits:
                    _range_ = RegularExpression(2, str(int(i[0]) + 1))
                    if regular_expression is not None:
                        aux = RegularExpression(2, i[0])
                        regular_expression = RegularExpression(
                            5, regular_expression, aux)
                    else:
                        regular_expression = RegularExpression(2, i[0])
                    for k in range(int(i[0]) + 2, int(i[1]) + 1):
                        symb = RegularExpression(2, str(k))
                        _range_ = RegularExpression(5, _range_, symb)
                    regular_expression = RegularExpression(
                        5, regular_expression, _range_)
                else:
                    _range_ = RegularExpression(2, chr(ord(i[0]) + 1))
                    if regular_expression is not None:
                        aux = RegularExpression(2, i[0])
                        regular_expression = RegularExpression(
                            5, regular_expression, aux)
                    else:
                        regular_expression = RegularExpression(2, i[0])
                    char = chr(ord(i[0]) + 2)
                    while char <= i[1]:
                        symb = RegularExpression(2, char)
                        _range_ = RegularExpression(5, _range_, symb)
                        char = chr(ord(char) + 1)
                    regular_expression = RegularExpression(
                        5, regular_expression, _range_)
        count = 0
        for i in parsed_regex.symbol_set:
            if type(i) is not tuple:
                if count == 0:
                    symbol = RegEx(SYMBOL_SIMPLE, i)
                    reg_symbol = convertRegEx(symbol)
                    if regular_expression is None:
                        regular_expression = reg_symbol
                else:
                    symbol = RegEx(SYMBOL_SIMPLE, i)
                    reg_symbol = convertRegEx(symbol)
                    regular_expression = RegularExpression(
                        5, regular_expression, reg_symbol)
            count = count + 1

        return regular_expression
コード例 #23
0
ファイル: main.py プロジェクト: outOfBoun/projects
 def exitMaybe(self, ctx: MyRegExParser.MaybeContext):
     lhs = self.RegExstack.pop()
     self.RegExstack.append(RegEx(rgxMAYBE, lhs))
コード例 #24
0
ファイル: main.py プロジェクト: outOfBoun/projects
 def exitRrange3(self, ctx: MyRegExParser.Rrange3Context):
     lhs = self.RegExstack.pop()
     count = self.Numstack.pop()
     rhs = (count, -1)
     self.RegExstack.append(RegEx(rgxRANGE, lhs, rhs))
コード例 #25
0
ファイル: main.py プロジェクト: outOfBoun/projects
 def exitPlus(self, ctx: MyRegExParser.PlusContext):
     lhs = self.RegExstack.pop()
     self.RegExstack.append(RegEx(rgxPLUS, lhs))
コード例 #26
0
ファイル: main.py プロジェクト: mariajianu/RegEx-Engine
def parseRegEx(regex_string):
    count = 0
    for i in regex_string:
        #numar setul de paranteze
        if (i == '('):
            count = count + 1
    if regex_string == "":
        #sirul vid
        parsed_regex = RegEx(EMPTY_STRING)
        return parsed_regex

    if regex_string[0] == "." and len(regex_string) == 1:
        parsed_regex = RegEx(SYMBOL_ANY)
        return parsed_regex

    if regex_string[0] in alphabet and len(regex_string) == 1:
        #expresii de tipul {a}, {b}..
        parsed_regex = RegEx(SYMBOL_SIMPLE, regex_string[0])
        return parsed_regex

    if regex_string[0] in alphabet and regex_string[1] in alphabet:
        if len(regex_string) == 2:
            #expresii de tipul ab, aa, bb..
            parsed_regex = RegEx(CONCATENATION,
                                 RegEx(SYMBOL_SIMPLE, regex_string[0]),
                                 RegEx(SYMBOL_SIMPLE, regex_string[1]))
            return parsed_regex
        else:
            #expresii de tipul ab*, ab+, ab?..
            if regex_string[2] == "*":
                parsed_regex = RegEx(
                    CONCATENATION, RegEx(SYMBOL_SIMPLE, regex_string[0]),
                    RegEx(STAR, RegEx(SYMBOL_SIMPLE, regex_string[1])))
                return parsed_regex
            if regex_string[2] == "?":
                parsed_regex = RegEx(
                    CONCATENATION, RegEx(SYMBOL_SIMPLE, regex_string[0]),
                    RegEx(MAYBE, RegEx(SYMBOL_SIMPLE, regex_string[1])))
                return parsed_regex
            if regex_string[2] == "+":
                parsed_regex = RegEx(
                    CONCATENATION, RegEx(SYMBOL_SIMPLE, regex_string[0]),
                    RegEx(PLUS, RegEx(SYMBOL_SIMPLE, regex_string[1])))
                return parsed_regex
            #expresii de tipul aa|bb
            if regex_string[2] == "|":
                parsed_regex = RegEx(
                    ALTERNATION,
                    RegEx(CONCATENATION, RegEx(SYMBOL_SIMPLE, regex_string[0]),
                          RegEx(SYMBOL_SIMPLE, regex_string[1])),
                    parseRegEx(regex_string[3:]))
                return parsed_regex

    if regex_string[0] in alphabet and regex_string[
            1] in special_characters and len(regex_string) == 2:
        #expresii de tipul a*, b+
        if regex_string[1] == '?':
            parsed_regex = RegEx(MAYBE, RegEx(SYMBOL_SIMPLE, regex_string[0]))
            return parsed_regex
        if regex_string[1] == '*':
            parsed_regex = RegEx(STAR, RegEx(SYMBOL_SIMPLE, regex_string[0]))
            return parsed_regex
        if regex_string[1] == '+':
            parsed_regex = RegEx(PLUS, RegEx(SYMBOL_SIMPLE, regex_string[0]))
            return parsed_regex

    if regex_string[0] in alphabet and regex_string[1] in special_characters:
        #expresii de tipul a|b
        first = regex_string[0]
        if regex_string[1] == '|':
            regex_string = regex_string[2:]
            parsed_regex = RegEx(ALTERNATION, RegEx(SYMBOL_SIMPLE, first),
                                 parseRegEx(regex_string))
            return parsed_regex
        if regex_string[1] == '?':
            regex_string = regex_string[2:]
            parsed_regex = RegEx(CONCATENATION,
                                 (RegEx(MAYBE, RegEx(SYMBOL_SIMPLE, first))),
                                 parseRegEx(regex_string))
            return parsed_regex

    if count > 1:
        #expresii cu un numar de perechi de paranteze > 1
        paranthesis_nr = 0
        exp1 = ""
        exp2 = ""
        exp3 = ""
        for i in regex_string:
            if i == "(":
                paranthesis_nr = paranthesis_nr + 1
            if paranthesis_nr == count and i != ")" and i != "(":
                exp1 = exp1 + i
            if i == ")" and paranthesis_nr == count:
                break
        paranthesis_nr = 0
        for i in regex_string:
            if i == "(":
                paranthesis_nr = paranthesis_nr + 1
            if paranthesis_nr == count - 1 and i != ")" and i != "(":
                exp2 = exp2 + i
            if i == ")" and paranthesis_nr == count - 1:
                break
        paranthesis_nr = 0
        for i in regex_string:
            if i == "(":
                paranthesis_nr = paranthesis_nr + 1
            if paranthesis_nr == count - 2 and i != ")" and i != "(":
                exp3 = exp3 + i
            if i == ")" and paranthesis_nr == count - 2:
                break
        parsed_regex1 = RegEx(CONCATENATION, parseRegEx(exp2),
                              parseRegEx(exp1))
        if regex_string[len(regex_string) - 1] == "+":
            parsed_regex1 = RegEx(PLUS, parsed_regex1)
        parsed_regex = RegEx(CONCATENATION, parseRegEx(exp3), parsed_regex1)
        return parsed_regex

    if regex_string[0] == '(' or regex_string[1] == '(':
        #expresii cu o singura pereche de paranteze
        newString = ""
        for i in regex_string:
            if i != '(' and i != ')':
                newString = newString + i
            if i == ')':
                break
        if regex_string[0] in alphabet:
            newString = newString[1:]
            parsed_regex = parseRegEx(newString)
            parsed_regex = RegEx(CONCATENATION,
                                 RegEx(SYMBOL_SIMPLE, regex_string[0]),
                                 parsed_regex)
        else:
            parsed_regex = parseRegEx(newString)

        if len(newString) + 2 == len(regex_string):
            return parsed_regex
        else:
            if regex_string[len(newString) + 2] == '+':
                parsed_regex = RegEx(PLUS, parsed_regex)
            if regex_string[len(newString) + 2] == '*':
                parsed_regex = RegEx(STAR, parsed_regex)
            if regex_string[len(newString) + 2] == '?':
                parsed_regex = RegEx(MAYBE, parsed_regex)
            if regex_string[len(newString) + 2] in alphabet:
                parsed_regex = RegEx(
                    CONCATENATION, parsed_regex,
                    RegEx(SYMBOL_SIMPLE, regex_string[len(newString) + 2]))
            if regex_string[0] in alphabet:
                if len(regex_string) > len(newString) + 3:
                    if regex_string[len(newString) + 3] in alphabet:
                        parsed_regex = RegEx(
                            CONCATENATION, parsed_regex,
                            RegEx(SYMBOL_SIMPLE,
                                  regex_string[len(newString) + 3]))
            return parsed_regex

    if regex_string[0] == '[':
        #expresii de tipul [a-z], [abc0-9]..
        if '-' in regex_string and len(regex_string) == 5 or len(
                regex_string) == 6:
            parsed_regex = RegEx(SYMBOL_SET,
                                 {(regex_string[1], regex_string[3])})
            if len(regex_string) == 6:
                if regex_string[5] == "*":
                    parsed_regex = RegEx(STAR, parsed_regex)
            return parsed_regex
        if '-' in regex_string and len(regex_string) > 6:
            count = 0
            for i in regex_string:
                if i == '-':
                    count = count + 1
            if count == 1:
                symbols = ""
                _range_ = ""
                for i in range(len(regex_string)):
                    if regex_string[i] in digits or regex_string[i] in letters:
                        if regex_string[i] not in _range_:
                            symbols += regex_string[i]
                    if regex_string[i] == '-':
                        _range_ += symbols[len(symbols) - 1]
                        _range_ += regex_string[i + 1]
                        symbols = symbols[:-1]
                parsed_regex = RegEx(SYMBOL_SET, {
                    symbols[0], symbols[1], symbols[2],
                    (_range_[0], _range_[1])
                })
                return parsed_regex
            if count == 2:
                if len(regex_string) == 8:
                    parsed_regex = RegEx(
                        SYMBOL_SET, {(regex_string[1], regex_string[3]),
                                     (regex_string[4], regex_string[6])})
                    return parsed_regex
                else:
                    symbols = ""
                    _range1_ = ""
                    _range2_ = ""
                    for i in range(len(regex_string)):
                        if regex_string[i] in digits or regex_string[
                                i] in letters:
                            if regex_string[i] not in _range1_ and regex_string[
                                    i] not in _range2_:
                                symbols += regex_string[i]
                        if regex_string[i] == '-':
                            if _range1_ == "":
                                _range1_ += symbols[len(symbols) - 1]
                                _range1_ += regex_string[i + 1]
                                symbols = symbols[:-1]
                            else:
                                _range2_ += symbols[len(symbols) - 1]
                                _range2_ += regex_string[i + 1]
                                symbols = symbols[:-1]
                    parsed_regex = RegEx(
                        SYMBOL_SET, {
                            symbols[0], symbols[1], symbols[2],
                            (_range1_[0], _range1_[1]),
                            (_range2_[0], _range2_[1])
                        })
                    return parsed_regex
        if '-' not in regex_string:
            newString = ""
            for i in regex_string:
                if i != '[' and i != ']':
                    newString = newString + i
                if i == ']':
                    break
            parsed_regex = RegEx(SYMBOL_SET, newString)
            return parsed_regex

    if regex_string[0] in alphabet and regex_string[1] == "{":
        #expresii de tipul a{2,}, a{,2}..
        if regex_string[2] in digits and len(regex_string) > 4:
            if regex_string[4] in digits:
                parsed_regex = RegEx(
                    RANGE, RegEx(SYMBOL_SIMPLE, regex_string[0]),
                    (int(regex_string[2]), int(regex_string[4])))
                return parsed_regex
        if regex_string[2] in string.digits:
            if (regex_string[3] == "}"):
                parsed_regex = RegEx(
                    RANGE, RegEx(SYMBOL_SIMPLE, regex_string[0]),
                    (int(regex_string[2]), int(regex_string[2])))
                return parsed_regex
            else:
                parsed_regex = RegEx(RANGE,
                                     RegEx(SYMBOL_SIMPLE, regex_string[0]),
                                     (int(regex_string[2]), -1))
                return parsed_regex
        if regex_string[2] == ',' and regex_string[3] in digits:
            parsed_regex = RegEx(RANGE, RegEx(SYMBOL_SIMPLE, regex_string[0]),
                                 (-1, int(regex_string[3])))
            return parsed_regex