def enterR3(self, ctx): if ctx.SYMBOL() != None: self.stack.append(RegEx(rx.SYMBOL_SIMPLE, toString(ctx.SYMBOL()))) if ctx.star() != None: self.stack.append("*") if ctx.plus() != None: self.stack.append("+") if ctx.maybe() != None: self.stack.append("?") if ctx.range_0() != None: self.stack.append("{") if ctx.range_0().fixed() != None: num = getNumber(ctx.range_0().fixed().R_FIXED()) self.stack.append((num, num)) if ctx.range_0().min_0() != None: num = getFirstNumber(ctx.range_0().min_0().R_MIN()) self.stack.append((num, -1)) if ctx.range_0().max_0() != None: num = getLastNumber(ctx.range_0().max_0().R_MAX()) self.stack.append((-1, num)) if ctx.range_0().interval() != None: num1 = getFirstNumber(ctx.range_0().interval().R_INTERVAL()) num2 = getLastNumber(ctx.range_0().interval().R_INTERVAL()) self.stack.append((num1, num2))
def test1(): a = RegEx() program = open('example.js') b = [word.strip() for word in program.readlines()] program.close() output = open('tokens.txt', 'a') line_number = 1 for test in b: output.write('Line #%d: %s\t%s\n' % ( line_number, test, a.process(test, line_number))) line_number += 1 output.close()
def apply_transformations(regex_string, pos, symbol_regex): while pos < len(regex_string): if regex_string[pos] == '*': symbol_regex = RegEx(RX_STAR, symbol_regex) pos = pos + 1 elif regex_string[pos] == '+': symbol_regex = RegEx(RX_PLUS, symbol_regex) pos = pos + 1 elif regex_string[pos] == '?': symbol_regex = RegEx(RX_MAYBE, symbol_regex) pos = pos + 1 elif regex_string[pos] == '{': if regex_string[pos + 1] == ',': symbol_regex = RegEx(RX_RANGE, symbol_regex, (-1, int(regex_string[pos + 2]))) pos = pos + 4 elif regex_string[pos + 2] == ',': if regex_string[pos + 3] == '}': symbol_regex = RegEx(RX_RANGE, symbol_regex, (int(regex_string[pos + 1]), -1)) pos = pos + 4 elif regex_string[pos + 4] == '}': symbol_regex = RegEx(RX_RANGE, symbol_regex, (int(regex_string[pos + 1]), int(regex_string[pos + 3]))) pos = pos + 5 else: symbol_regex = RegEx(RX_RANGE, symbol_regex, (int(regex_string[pos + 1]), int(regex_string[pos + 1]))) pos = pos + 3 else: break return (symbol_regex, pos)
def clean(self, ex): if instr(self.stack, 1) == "*": self.stack.pop() self.clean(RegEx(rx.STAR, ex)) elif instr(self.stack, 1) == "+": self.stack.pop() self.clean(RegEx(rx.PLUS, ex)) elif instr(self.stack, 1) == "?": self.stack.pop() self.clean(RegEx(rx.MAYBE, ex)) elif instr(self.stack, 2) == "{": ran = self.stack.pop() self.stack.pop() self.clean(RegEx(rx.RANGE, ex, ran)) elif instr(self.stack, 2) == "|" and not (isinstance(instr(self.stack, 1), str)): ex1 = self.stack.pop() self.stack.pop() self.clean(RegEx(rx.ALTERNATION, ex1, ex)) elif instr(self.stack, 2) == "." and not (isinstance(instr(self.stack, 1), str)): ex1 = self.stack.pop() self.stack.pop() self.clean(RegEx(rx.CONCATENATION, ex1, ex)) else: self.stack.append(ex)
def build_set(regex_string, pos): symbol_set = set() while regex_string[pos] != ']': if regex_string[pos] in CHARSET: if regex_string[pos + 1] == '-': symbol_set.add((regex_string[pos], regex_string[pos + 2])) pos = pos + 3 else: symbol_set.add(regex_string[pos]) pos = pos + 1 return (RegEx(RX_SYMBOL_SET, symbol_set), pos)
def dfa(regex): print("build dfa") tt = RegEx(regex).parsex() print("parsex ok") auto_res = Automaton.parse_tree(tt) print("parse_tree ok") auto_e1 = Automaton.reduce_automaton(auto_res) print("reduce ok") auto2 = Automaton.reduce_automaton2(auto_e1) print("reduce2 ok") auto2.cleanUp() print("cleanup ok") return auto2
def test_regex(self): string = 'hello' r = RegEx(string) r.search() self.assertTrue(r)
for i in range (len(regex_string)): last = None if modify[i] == '(': ceva = 1 ok = 2 start = i if modify[i] == ')': ok = 0 end = i ceva = 0 if ok == 2: end = i + 1 if len(regex_string) == 1 and regex_string[i] != '.': regex = RegEx(1, regex_string[i],None) elif regex_string[i] == '.': regex = RegEx(2, None, None) if regex_string[i] == '[': square = 1 start1 = i ók1 = 2 if regex_string[i] == ']': square = 2 end1 = i ok1 = 0 if ok1 == 2: end1 = start1 + 1
from regex import RegEx # fixture regex_obj = RegEx() def test_pass(): test_string = 'ABRACADABRA' rv = regex_obj.transduce(test_string) assert rv == [ 'ACCEPT', 'ACCEPT', 'ACCEPT', 'ACCEPT', 'ACCEPT', 'ACCEPT', 'ACCEPT', 'ACCEPT', 'ACCEPT', 'ACCEPT', 'ACCEPT', ] print(f'The return value is {rv}.') def test_fail_first(): test_string = '12345' rv = regex_obj.transduce(test_string) assert rv == [ 'REJECT', 'REJECT',
def exitRrange4(self, ctx: MyRegExParser.Rrange4Context): lhs = self.RegExstack.pop() count2 = self.Numstack.pop() count1 = self.Numstack.pop() rhs = (count1, count2) self.RegExstack.append(RegEx(rgxRANGE, lhs, rhs))
def get_parsed_regex(regex_string, startpoint, endpoint): regex = None i = startpoint while i < endpoint: if regex_string[i] in CHARSET: symbol_regex = RegEx(RX_SYMBOL_SIMPLE, regex_string[i]) (new_symbol_regex, i) = apply_transformations(regex_string, i + 1, symbol_regex) if regex == None: regex = new_symbol_regex else: regex = RegEx(RX_CONCATENATION, regex, new_symbol_regex) elif regex_string[i] == '[': (symbol_set, i) = build_set(regex_string, i + 1) (new_symbol_set, i) = apply_transformations(regex_string, i + 1, symbol_set) if regex == None: regex = new_symbol_set else: regex = RegEx(RX_CONCATENATION, regex, new_symbol_set) elif regex_string[i] == '.': symbol_any = RegEx(RX_SYMBOL_ANY) i = i + 1 if regex == None: regex = symbol_any else: regex = RegEx(RX_CONCATENATION, regex, symbol_any) elif regex_string[i] == '(': j = i + 1 stack = ['('] while len(stack) != 0: if regex_string[j] == '(': stack.append('(') elif regex_string[j] == ')': stack.pop() if len(stack) != 0: j = j + 1 parenthesis_expr = get_parsed_regex(regex_string, i + 1, j) i = j + 1 (new_parenthesis_expr, i) = apply_transformations(regex_string, i, parenthesis_expr) if regex == None: regex = new_parenthesis_expr else: regex = RegEx(RX_CONCATENATION, regex, new_parenthesis_expr) elif regex_string[i] == '|': j = i + 1 while j < endpoint: if regex_string[j] == '|': break; j = j + 1 if j == endpoint: j = j + 1 alternation_expr = get_parsed_regex(regex_string, i + 1, j - 1) if j == (endpoint + 1): i = endpoint else: i = j regex = RegEx(RX_ALTERNATION, regex, alternation_expr) return regex
def exitSet_0(self, ctx): self.stack.pop() self.stack.append(RegEx(rx.SYMBOL_SET, self.set.copy())) self.set.clear()
def exitExp(self, ctx:ReGexParser.ExpContext): #compute case for each input #regex symbol_simple char if ctx.CHAR(): return RegEx(SYMBOL_SIMPLE, ctx.getText()) #regex symbol_simple number if ctx.NUMBER(): return RegEx(SYMBOL_SIMPLE, ctx.getText()) #regex symbol_any if ctx.ANY(): return RegEx(SYMBOL_ANY) #regex maybe for lhs if ctx.MAYBE(): expression = self.exitExp(ctx.getChild(0)) return RegEx(MAYBE, expression) #regex star for lhs if ctx.STAR(): expression = self.exitExp(ctx.getChild(0)) return RegEx(STAR, expression) ##regex plus for lhs if ctx.PLUS(): expression = self.exitExp(ctx.getChild(0)) return RegEx(PLUS, expression) #cross expression and add char or tuple of chars in set if ctx.SET(): #cross expression and add char or tuple of chars in set text = ctx.getText() set_symbols = set() j = 1 #cross expression for i in range(1, len(text) - 1): if text[j] != '-': #found tuple of chars if text[j + 1] == '-': set_symbols.add((text[j], text[j + 2])) j = j + 3 if j >= (len(text) - 1): break; #found char else: set_symbols.add(text[j]) j = j + 1 if j >= (len(text) - 1): break return RegEx(SYMBOL_SET, set_symbols) if ctx.RANGE(): #check tuple to identify input case text = ctx.getText() symbol = text[0] #min interval = max interval if text.find(",") == -1: number = ord(text[2]) - 48; return RegEx(RANGE, RegEx(SYMBOL_SIMPLE, symbol), (number, number)) if text[2] == ',': number = ord(text[3]) - 48; return RegEx(RANGE, RegEx(SYMBOL_SIMPLE, symbol), (-1, number)) else: acolada = text.find('}') #min interval if text[acolada - 1] == ',': number = ord(text[2]) - 48 return RegEx(RANGE, RegEx(SYMBOL_SIMPLE, symbol), (number, -1)) #normal interval else: acolada1 = text.find('{') acolada2 = text.find('}') virgula = text.find(',') nr1 = 0 nr2 = 0 #compute min limit for i in range(acolada1 + 1, virgula): temp = ord(text[i]) - 48 nr1 = nr1 * 10 + temp #compute max limit for i in range(virgula + 1, acolada2): temp = ord(text[i]) - 48 nr2 = nr2 * 10 + temp return RegEx(RANGE, RegEx(SYMBOL_SIMPLE, symbol), (nr1, nr2)) #alternate lhs and rhs if ctx.ALTERNATION(): e1 = self.exitExp(ctx.getChild(0)) e2 = self.exitExp(ctx.getChild(2)) return RegEx(ALTERNATION, e1, e2) else: text = ctx.getText() #no priority needed before concatenation if text[0] == '(': if text[len(text) - 1] == ')' : return self.exitExp(ctx.getChild(1)) else: e1 = self.exitExp(ctx.getChild(1)) e2 = self.exitExp(ctx.getChild(3)) return RegEx(CONCATENATION, e1, e2) #concatenate lhs and rhs e1 = self.exitExp(ctx.getChild(0)) e2 = self.exitExp(ctx.getChild(1)) return RegEx(CONCATENATION, e1, e2)
def exitSetsymb(self, ctx: MyRegExParser.SetsymbContext): self.RegExstack.append(RegEx(rgxSYMBOL_SET, self.setsymb))
def exitAnysymb(self, ctx: MyRegExParser.AnysymbContext): self.RegExstack.append(RegEx(rgxSYMBOL_ANY))
def exitStar(self, ctx: MyRegExParser.StarContext): lhs = self.RegExstack.pop() self.RegExstack.append(RegEx(rgxSTAR, lhs))
def exitAltern(self, ctx: MyRegExParser.AlternContext): rhs = self.RegExstack.pop() lhs = self.RegExstack.pop() self.RegExstack.append(RegEx(rgxALTERNATION, lhs, rhs))
def exitConcat(self, ctx: MyRegExParser.ConcatContext): rhs = self.RegExstack.pop() lhs = self.RegExstack.pop() self.RegExstack.append(RegEx(rgxCONCATENATION, lhs, rhs))
def exitAtom(self, ctx: MyRegExParser.AtomContext): if ctx.symbol() != None: self.RegExstack.append( RegEx(rgxSYMBOL_SIMPLE, self.Symbstack.pop()))
def parse(regex_string): # in acc retin regex-urile deja parsate acc = [] open_par = 0 closed_par = 0 first_open_par = -1 last_closed_par = -1 between_par = "" open_curly_pos = -1 closed_curly_pos = -1 open_square_pos = -1 closed_square_pos = -1 # Marcheaza initializarea unui regex ex = RegEx(EMPTY_STRING) for i in range(len(regex_string)): symbol = regex_string[i] # open_par == 1 => se citeste ceea ce este intre paranteze rotunde # open_par == 0 => parsare normala fara paranteze if open_par == 0: # pentru regexul set retin pozitiile parantezelor patrate si apelez functia get_sym_for_square de mai sus if symbol == "[": open_square_pos = i + 1 if symbol == "]": closed_square_pos = i square_expr_set = get_sym_for_square( regex_string[open_square_pos:closed_square_pos]) sym = RegEx(SYMBOL_SET, square_expr_set) acc.append(sym) open_square_pos = -1 closed_square_pos = -1 if open_square_pos != -1: continue # pentru regexul range este identic ca mai sus if symbol == "{": open_curly_pos = i + 1 if symbol == "}": closed_curly_pos = i i += 1 x, y = get_from_curly( regex_string[open_curly_pos:closed_curly_pos]) sym = acc.pop() new_sym = RegEx(RANGE, sym, (x, y)) acc.append(new_sym) open_curly_pos = -1 closed_curly_pos = -1 # daca gasesc simbol (fara a fi intre "{}") if symbol in ALFANUM and open_curly_pos == -1: sym = RegEx(SYMBOL_SIMPLE, symbol) acc.append(sym) # sirul vid if regex_string == "": sym = RegEx(EMPTY_STRING) acc.append(sym) # Pentru . se creeaza regex la fel ca pentru orice alt caracter alfanum if symbol == ".": sym = RegEx(SYMBOL_ANY) acc.append(sym) # Pentru ? scot din lista ceea ce a fost parsat inainte si creez alt regex inserandu-l pe cel nou in lista if symbol == "?": anterior = acc.pop() sym = RegEx(MAYBE, anterior) acc.append(sym) # Pentru * la fel if symbol == "*": anterior = acc.pop() sym = RegEx(STAR, anterior) acc.append(sym) # Pentru + la fel if symbol == "+": anterior = acc.pop() sym = RegEx(PLUS, anterior) acc.append(sym) # Pentru | concatenez in lhs toti termenii din acc # In rhs construiesc regexul dat de parse pe restul sirului neparsat if symbol == "|": lhs = RegEx(EMPTY_STRING) for reg in acc: if lhs.type == EMPTY_STRING: lhs = reg else: lhs = RegEx(CONCATENATION, lhs, reg) acc = [] rhs = parse(regex_string[i + 1:]) sym = RegEx(ALTERNATION, lhs, rhs) acc.append(sym) break # Marchez indexii parantezelor inchise si deschise # Numar cate paranteze au fost inchise si cate deschise # Apelez parsare pe ceea ce este intre paranteze cand paranteze_deschise == paranteze_inchise if symbol == "(": open_par += 1 if first_open_par == -1: first_open_par = i + 1 if symbol == ")": closed_par += 1 if open_par == closed_par: last_closed_par = i sym = parse(regex_string[first_open_par:last_closed_par]) acc.append(sym) open_par = 0 closed_par = 0 first_open_par = -1 last_closed_par = -1 # La final dupa ce am terminat sirul de parsat concatenez toti termenii RegEx din for reg in acc: if ex.type == EMPTY_STRING: ex = reg else: ex = RegEx(CONCATENATION, ex, reg) return ex
def enterAny_0(self, ctx): self.stack.append(RegEx(rx.SYMBOL_ANY))
def convertRegEx(parsed_regex): if parsed_regex.type == EMPTY_STRING: regular_expression = RegularExpression(1) return regular_expression if parsed_regex.type == SYMBOL_SIMPLE: regular_expression = RegularExpression(2, str(parsed_regex)) return regular_expression # CONCATENATION = 8 if parsed_regex.type == 8: regular_expression = RegularExpression(4, convertRegEx(parsed_regex.lhs), convertRegEx(parsed_regex.rhs)) return regular_expression # ALTERNATION = 9 if parsed_regex.type == 9: regular_expression = RegularExpression(5, convertRegEx(parsed_regex.lhs), convertRegEx(parsed_regex.rhs)) return regular_expression # SYMBOL_ANY = 2 if parsed_regex.type == 2: regular_expression = RegularExpression(1) for i in alphabet: symbol = RegEx(SYMBOL_SIMPLE, i) regular_expression = RegularExpression(5, regular_expression, convertRegEx(symbol)) return regular_expression # MAYBE = 4 if parsed_regex.type == 4: aux = RegularExpression(1) regular_expression = RegularExpression(5, aux, convertRegEx(parsed_regex.lhs)) return regular_expression # STAR = 5 if parsed_regex.type == 5: regular_expression = RegularExpression(3, convertRegEx(parsed_regex.lhs)) return regular_expression # PLUS = 6 if parsed_regex.type == 6: aux = convertRegEx(parsed_regex.lhs) aux2 = RegularExpression(3, aux) regular_expression = RegularExpression(4, aux, aux2) return regular_expression # RANGE = 8 if parsed_regex.type == 7: x, y = parsed_regex.range if x == y: regular_expression = convertRegEx(parsed_regex.lhs) for i in range(x - 1): aux = convertRegEx(parsed_regex.lhs) regular_expression = RegularExpression(4, regular_expression, aux) return regular_expression if x == -1: regular_expression = RegularExpression(1) for i in range(y + 1): if i != 0: exp = RegEx(RANGE, parsed_regex.lhs, (i, i)) regular_expression = RegularExpression( 5, regular_expression, convertRegEx(exp)) return regular_expression if y == -1: exp = RegEx(RANGE, parsed_regex.lhs, (x, x)) star_exp = RegularExpression(3, convertRegEx(parsed_regex.lhs)) regular_expression = RegularExpression(4, convertRegEx(exp), star_exp) return regular_expression else: # intre x si y aparitii exp = RegEx(RANGE, parsed_regex.lhs, (x, x)) regular_expression = convertRegEx(exp) for i in range(x + 1, y + 1): exp = RegEx(RANGE, parsed_regex.lhs, (i, i)) regular_expression = RegularExpression(5, regular_expression, convertRegEx(exp)) return regular_expression # SYMBOL_SET = 3 if parsed_regex.type == 3: regular_expression = None for i in parsed_regex.symbol_set: if type(i) is tuple: if i[0] in digits: _range_ = RegularExpression(2, str(int(i[0]) + 1)) if regular_expression is not None: aux = RegularExpression(2, i[0]) regular_expression = RegularExpression( 5, regular_expression, aux) else: regular_expression = RegularExpression(2, i[0]) for k in range(int(i[0]) + 2, int(i[1]) + 1): symb = RegularExpression(2, str(k)) _range_ = RegularExpression(5, _range_, symb) regular_expression = RegularExpression( 5, regular_expression, _range_) else: _range_ = RegularExpression(2, chr(ord(i[0]) + 1)) if regular_expression is not None: aux = RegularExpression(2, i[0]) regular_expression = RegularExpression( 5, regular_expression, aux) else: regular_expression = RegularExpression(2, i[0]) char = chr(ord(i[0]) + 2) while char <= i[1]: symb = RegularExpression(2, char) _range_ = RegularExpression(5, _range_, symb) char = chr(ord(char) + 1) regular_expression = RegularExpression( 5, regular_expression, _range_) count = 0 for i in parsed_regex.symbol_set: if type(i) is not tuple: if count == 0: symbol = RegEx(SYMBOL_SIMPLE, i) reg_symbol = convertRegEx(symbol) if regular_expression is None: regular_expression = reg_symbol else: symbol = RegEx(SYMBOL_SIMPLE, i) reg_symbol = convertRegEx(symbol) regular_expression = RegularExpression( 5, regular_expression, reg_symbol) count = count + 1 return regular_expression
def exitMaybe(self, ctx: MyRegExParser.MaybeContext): lhs = self.RegExstack.pop() self.RegExstack.append(RegEx(rgxMAYBE, lhs))
def exitRrange3(self, ctx: MyRegExParser.Rrange3Context): lhs = self.RegExstack.pop() count = self.Numstack.pop() rhs = (count, -1) self.RegExstack.append(RegEx(rgxRANGE, lhs, rhs))
def exitPlus(self, ctx: MyRegExParser.PlusContext): lhs = self.RegExstack.pop() self.RegExstack.append(RegEx(rgxPLUS, lhs))
def parseRegEx(regex_string): count = 0 for i in regex_string: #numar setul de paranteze if (i == '('): count = count + 1 if regex_string == "": #sirul vid parsed_regex = RegEx(EMPTY_STRING) return parsed_regex if regex_string[0] == "." and len(regex_string) == 1: parsed_regex = RegEx(SYMBOL_ANY) return parsed_regex if regex_string[0] in alphabet and len(regex_string) == 1: #expresii de tipul {a}, {b}.. parsed_regex = RegEx(SYMBOL_SIMPLE, regex_string[0]) return parsed_regex if regex_string[0] in alphabet and regex_string[1] in alphabet: if len(regex_string) == 2: #expresii de tipul ab, aa, bb.. parsed_regex = RegEx(CONCATENATION, RegEx(SYMBOL_SIMPLE, regex_string[0]), RegEx(SYMBOL_SIMPLE, regex_string[1])) return parsed_regex else: #expresii de tipul ab*, ab+, ab?.. if regex_string[2] == "*": parsed_regex = RegEx( CONCATENATION, RegEx(SYMBOL_SIMPLE, regex_string[0]), RegEx(STAR, RegEx(SYMBOL_SIMPLE, regex_string[1]))) return parsed_regex if regex_string[2] == "?": parsed_regex = RegEx( CONCATENATION, RegEx(SYMBOL_SIMPLE, regex_string[0]), RegEx(MAYBE, RegEx(SYMBOL_SIMPLE, regex_string[1]))) return parsed_regex if regex_string[2] == "+": parsed_regex = RegEx( CONCATENATION, RegEx(SYMBOL_SIMPLE, regex_string[0]), RegEx(PLUS, RegEx(SYMBOL_SIMPLE, regex_string[1]))) return parsed_regex #expresii de tipul aa|bb if regex_string[2] == "|": parsed_regex = RegEx( ALTERNATION, RegEx(CONCATENATION, RegEx(SYMBOL_SIMPLE, regex_string[0]), RegEx(SYMBOL_SIMPLE, regex_string[1])), parseRegEx(regex_string[3:])) return parsed_regex if regex_string[0] in alphabet and regex_string[ 1] in special_characters and len(regex_string) == 2: #expresii de tipul a*, b+ if regex_string[1] == '?': parsed_regex = RegEx(MAYBE, RegEx(SYMBOL_SIMPLE, regex_string[0])) return parsed_regex if regex_string[1] == '*': parsed_regex = RegEx(STAR, RegEx(SYMBOL_SIMPLE, regex_string[0])) return parsed_regex if regex_string[1] == '+': parsed_regex = RegEx(PLUS, RegEx(SYMBOL_SIMPLE, regex_string[0])) return parsed_regex if regex_string[0] in alphabet and regex_string[1] in special_characters: #expresii de tipul a|b first = regex_string[0] if regex_string[1] == '|': regex_string = regex_string[2:] parsed_regex = RegEx(ALTERNATION, RegEx(SYMBOL_SIMPLE, first), parseRegEx(regex_string)) return parsed_regex if regex_string[1] == '?': regex_string = regex_string[2:] parsed_regex = RegEx(CONCATENATION, (RegEx(MAYBE, RegEx(SYMBOL_SIMPLE, first))), parseRegEx(regex_string)) return parsed_regex if count > 1: #expresii cu un numar de perechi de paranteze > 1 paranthesis_nr = 0 exp1 = "" exp2 = "" exp3 = "" for i in regex_string: if i == "(": paranthesis_nr = paranthesis_nr + 1 if paranthesis_nr == count and i != ")" and i != "(": exp1 = exp1 + i if i == ")" and paranthesis_nr == count: break paranthesis_nr = 0 for i in regex_string: if i == "(": paranthesis_nr = paranthesis_nr + 1 if paranthesis_nr == count - 1 and i != ")" and i != "(": exp2 = exp2 + i if i == ")" and paranthesis_nr == count - 1: break paranthesis_nr = 0 for i in regex_string: if i == "(": paranthesis_nr = paranthesis_nr + 1 if paranthesis_nr == count - 2 and i != ")" and i != "(": exp3 = exp3 + i if i == ")" and paranthesis_nr == count - 2: break parsed_regex1 = RegEx(CONCATENATION, parseRegEx(exp2), parseRegEx(exp1)) if regex_string[len(regex_string) - 1] == "+": parsed_regex1 = RegEx(PLUS, parsed_regex1) parsed_regex = RegEx(CONCATENATION, parseRegEx(exp3), parsed_regex1) return parsed_regex if regex_string[0] == '(' or regex_string[1] == '(': #expresii cu o singura pereche de paranteze newString = "" for i in regex_string: if i != '(' and i != ')': newString = newString + i if i == ')': break if regex_string[0] in alphabet: newString = newString[1:] parsed_regex = parseRegEx(newString) parsed_regex = RegEx(CONCATENATION, RegEx(SYMBOL_SIMPLE, regex_string[0]), parsed_regex) else: parsed_regex = parseRegEx(newString) if len(newString) + 2 == len(regex_string): return parsed_regex else: if regex_string[len(newString) + 2] == '+': parsed_regex = RegEx(PLUS, parsed_regex) if regex_string[len(newString) + 2] == '*': parsed_regex = RegEx(STAR, parsed_regex) if regex_string[len(newString) + 2] == '?': parsed_regex = RegEx(MAYBE, parsed_regex) if regex_string[len(newString) + 2] in alphabet: parsed_regex = RegEx( CONCATENATION, parsed_regex, RegEx(SYMBOL_SIMPLE, regex_string[len(newString) + 2])) if regex_string[0] in alphabet: if len(regex_string) > len(newString) + 3: if regex_string[len(newString) + 3] in alphabet: parsed_regex = RegEx( CONCATENATION, parsed_regex, RegEx(SYMBOL_SIMPLE, regex_string[len(newString) + 3])) return parsed_regex if regex_string[0] == '[': #expresii de tipul [a-z], [abc0-9].. if '-' in regex_string and len(regex_string) == 5 or len( regex_string) == 6: parsed_regex = RegEx(SYMBOL_SET, {(regex_string[1], regex_string[3])}) if len(regex_string) == 6: if regex_string[5] == "*": parsed_regex = RegEx(STAR, parsed_regex) return parsed_regex if '-' in regex_string and len(regex_string) > 6: count = 0 for i in regex_string: if i == '-': count = count + 1 if count == 1: symbols = "" _range_ = "" for i in range(len(regex_string)): if regex_string[i] in digits or regex_string[i] in letters: if regex_string[i] not in _range_: symbols += regex_string[i] if regex_string[i] == '-': _range_ += symbols[len(symbols) - 1] _range_ += regex_string[i + 1] symbols = symbols[:-1] parsed_regex = RegEx(SYMBOL_SET, { symbols[0], symbols[1], symbols[2], (_range_[0], _range_[1]) }) return parsed_regex if count == 2: if len(regex_string) == 8: parsed_regex = RegEx( SYMBOL_SET, {(regex_string[1], regex_string[3]), (regex_string[4], regex_string[6])}) return parsed_regex else: symbols = "" _range1_ = "" _range2_ = "" for i in range(len(regex_string)): if regex_string[i] in digits or regex_string[ i] in letters: if regex_string[i] not in _range1_ and regex_string[ i] not in _range2_: symbols += regex_string[i] if regex_string[i] == '-': if _range1_ == "": _range1_ += symbols[len(symbols) - 1] _range1_ += regex_string[i + 1] symbols = symbols[:-1] else: _range2_ += symbols[len(symbols) - 1] _range2_ += regex_string[i + 1] symbols = symbols[:-1] parsed_regex = RegEx( SYMBOL_SET, { symbols[0], symbols[1], symbols[2], (_range1_[0], _range1_[1]), (_range2_[0], _range2_[1]) }) return parsed_regex if '-' not in regex_string: newString = "" for i in regex_string: if i != '[' and i != ']': newString = newString + i if i == ']': break parsed_regex = RegEx(SYMBOL_SET, newString) return parsed_regex if regex_string[0] in alphabet and regex_string[1] == "{": #expresii de tipul a{2,}, a{,2}.. if regex_string[2] in digits and len(regex_string) > 4: if regex_string[4] in digits: parsed_regex = RegEx( RANGE, RegEx(SYMBOL_SIMPLE, regex_string[0]), (int(regex_string[2]), int(regex_string[4]))) return parsed_regex if regex_string[2] in string.digits: if (regex_string[3] == "}"): parsed_regex = RegEx( RANGE, RegEx(SYMBOL_SIMPLE, regex_string[0]), (int(regex_string[2]), int(regex_string[2]))) return parsed_regex else: parsed_regex = RegEx(RANGE, RegEx(SYMBOL_SIMPLE, regex_string[0]), (int(regex_string[2]), -1)) return parsed_regex if regex_string[2] == ',' and regex_string[3] in digits: parsed_regex = RegEx(RANGE, RegEx(SYMBOL_SIMPLE, regex_string[0]), (-1, int(regex_string[3]))) return parsed_regex