Ejemplo n.º 1
0
def alphabet():

    #create ER equivalent to "."
    pool = "bcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
    e = RegularExpression(SYMBOL, "a")
    for c in pool:
        e = RegularExpression(ALTERNATION, e, RegularExpression(SYMBOL, c))
    return e
Ejemplo n.º 2
0
def get_set(tuple, temp):
    (begin, end) = tuple
    for i in range(rx.CHARSET.index(begin), rx.CHARSET.index(end) + 1):
        if (temp == None):
            temp = RegularExpression(re.SYMBOL, rx.CHARSET[i])
        else:
            e0 = temp
            e1 = RegularExpression(re.SYMBOL, rx.CHARSET[i])
            temp = RegularExpression(re.ALTERNATION, e0, e1)

    return temp
Ejemplo n.º 3
0
def setAlph(interval):

    #create ER equivalent to reunion of chars in set, or in the range sugested by tuples of chars
    pool = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
    fst = 0
    for elem in interval:

        #first elem of range case
        if fst == 0:
            fst = fst + 1

            #elem in range of chars
            if type(elem) is tuple:
                e = RegularExpression(SYMBOL, elem[0])
                for c in range(pool.find(elem[0]) + 1, pool.find(elem[1]) + 1):
                    e = RegularExpression(ALTERNATION, e,
                                          RegularExpression(SYMBOL, pool[c]))

            #elem is one char
            else:
                e = RegularExpression(SYMBOL, elem)

        else:
            #elem in range of chars
            if type(elem) is tuple:
                for c in range(pool.find(elem[0]), pool.find(elem[1]) + 1):
                    e = RegularExpression(ALTERNATION, e,
                                          RegularExpression(SYMBOL, pool[c]))

            #elem is one char
            else:
                e = RegularExpression(ALTERNATION, e,
                                      RegularExpression(SYMBOL, elem))
    return e
Ejemplo n.º 4
0
    def er_fa_btn_clicked(self, table):
        st = self.ui.er_text.toPlainText()
        if st == '':
            st = "(0*(1(01*0)*1)*0*)*"

        fa = RegularExpression(st).to_deterministic_finite_automaton()
        self.add_fa_on_list('ER => FA', fa)
        self.set_fa_on_table(fa, table)
Ejemplo n.º 5
0
    def er_search_btn_clicked(self):
        st = self.ui.er_text.toPlainText()
        if st == '':
            st = "(0*(1(01*0)*1)*0*)*"

        fa = RegularExpression(st).to_deterministic_finite_automaton()

        text_highliter = TextHighlighter(fa, self)
        text_highliter.show()
Ejemplo n.º 6
0
def converRegToEr(expression):

    #EMPTY_STRING
    if expression.type == 0:
        return RegularExpression(EMPTY_STRING)

    #SYMBOL_SIMPLE
    if expression.type == 1:
        return RegularExpression(SYMBOL, expression.symbol)

    #SYMBOL_ANY
    if expression.type == 2:
        return alphabet()

    #SYMBOL_SET
    if expression.type == 3:
        return setAlph(expression.symbol_set)

    #MAYBE
    if expression.type == 4:
        return RegularExpression(ALTERNATION, RegularExpression(EMPTY_STRING),
                                 converRegToEr(expression.lhs))

    #STAR
    if expression.type == 5:
        return RegularExpression(STAR, converRegToEr(expression.lhs))

    #PLUS
    if expression.type == 6:
        return RegularExpression(
            CONCATENATION, converRegToEr(expression.lhs),
            RegularExpression(STAR, converRegToEr(expression.lhs)))

    #RANGE
    if expression.type == 7:
        return rangeAlphabet(converRegToEr(expression.lhs), expression.range)

    #CONCATENATION
    if expression.type == 8:
        return RegularExpression(CONCATENATION, converRegToEr(expression.lhs),
                                 converRegToEr(expression.rhs))

    #ALTERNATION
    elif expression.type == 9:
        return RegularExpression(ALTERNATION, converRegToEr(expression.lhs),
                                 converRegToEr(expression.rhs))
Ejemplo n.º 7
0
    def er_equals_gr_btn_clicked(self):
        st = self.ui.gr_text.toPlainText()
        if st == '':
            st = "S -> aS | a | bS | b"
        gr = Grammar.text_to_grammar(st)
        fa_gr = gr.to_finite_automaton()

        st = self.ui.er_text.toPlainText()
        if st == '':
            st = "(0*(1(01*0)*1)*0*)*"
        fa_er = RegularExpression(st).to_deterministic_finite_automaton()

        if fa_gr.is_equal(fa_er):
            QMessageBox.about(
                self, "Equivalência entre GR e ER",
                "A Expressão Regular é equivalente à Gramática Regular")
        else:
            QMessageBox.about(
                self, "Equivalência entre GR e ER",
                "A Expressão Regular NÃO é equivalente à Gramática Regular")
Ejemplo n.º 8
0
def getREfromREGEX(rgx):
    if rgx.type == rgxEMPTY_STRING:
        return RegularExpression(reEMPTY_STRING)

    elif rgx.type == rgxSYMBOL_SIMPLE:
        return RegularExpression(reSYMBOL, rgx.symbol)

    elif rgx.type == rgxSYMBOL_ANY:
        re = RegularExpression(reSYMBOL, alphabet[0])
        for i in alphabet[1:]:
            re = re | i
        return re

    elif rgx.type == rgxSYMBOL_SET:
        re = None
        for el in rgx.symbol_set:
            if re == None:
                if isinstance(el, tuple):
                    re = RegularExpression(reSYMBOL, el[0])
                    for ch in range(ord(el[0]) + 1, ord(el[1]) + 1):
                        re = re | chr(ch)
                else:
                    re = RegularExpression(reSYMBOL, el)
            else:
                if isinstance(el, tuple):
                    for ch in range(ord(el[0]), ord(el[1]) + 1):
                        re = re | chr(ch)
                else:
                    re = re | el
        return re

    elif rgx.type == rgxMAYBE:
        re = RegularExpression(reEMPTY_STRING)
        re = re | getREfromREGEX(rgx.lhs)
        return re

    elif rgx.type == rgxSTAR:
        re = getREfromREGEX(rgx.lhs)
        re = re.star()
        return re

    elif rgx.type == rgxPLUS:
        re = getREfromREGEX(rgx.lhs)
        re = re * re.star()
        return re

    elif rgx.type == rgxRANGE:
        re = None
        if rgx.range[0] == rgx.range[1]:
            lhsre = getREfromREGEX(rgx.lhs)
            re = lhsre
            for i in range(2, rgx.range[0] + 1):
                re = re * lhsre

        elif rgx.range[0] == -1:
            lhsre = getREfromREGEX(rgx.lhs)
            concatlhsre = lhsre
            re = RegularExpression(reEMPTY_STRING) | concatlhsre
            for i in range(2, rgx.range[1] + 1):
                concatlhsre = concatlhsre * lhsre
                re = re | concatlhsre

        elif rgx.range[1] == -1:
            lhsre = getREfromREGEX(rgx.lhs)
            concatlhsre = lhsre
            for i in range(1, rgx.range[0]):
                concatlhsre = concatlhsre * lhsre
            re = concatlhsre * lhsre.star()

        else:
            lhsre = getREfromREGEX(rgx.lhs)
            concatlhsre = lhsre
            for i in range(1, rgx.range[0]):
                concatlhsre = concatlhsre * lhsre
            re = concatlhsre
            for i in range(rgx.range[0] + 1, rgx.range[1] + 1):
                concatlhsre = concatlhsre * lhsre
                re = re | concatlhsre

        return re

    elif rgx.type == rgxCONCATENATION:
        re = getREfromREGEX(rgx.lhs) * getREfromREGEX(rgx.rhs)
        return re

    elif rgx.type == rgxALTERNATION:
        re = getREfromREGEX(rgx.lhs) | getREfromREGEX(rgx.rhs)
        return re
    def test_normalize(self):
        regex = RegularExpression('(ab)*|ab')
        self.assertEqual('(a.b)*|a.b', regex._string)

        regex = RegularExpression('(a)|(b).(c)')
        self.assertEqual('(a)|(b).(c)', regex._string)

        regex = RegularExpression('a|bc')
        self.assertEqual('a|b.c', regex._string)

        regex = RegularExpression('(ab)*(ba)*')
        self.assertEqual('(a.b)*.(b.a)*', regex._string)

        regex = RegularExpression('a(ba)*b')
        self.assertEqual('a.(b.a)*.b', regex._string)

        regex = RegularExpression('(ba|a(ba)*a)*(ab)*')
        self.assertEqual('(b.a|a.(b.a)*.a)*.(a.b)*', regex._string)

        regex = RegularExpression('abab')
        self.assertEqual('a.b.a.b', regex._string)

        regex = RegularExpression('(a*)')
        self.assertEqual('a*', regex._string)

        regex = RegularExpression('(a)*')
        self.assertEqual('(a)*', regex._string)

        regex = RegularExpression('a')
        self.assertEqual('a', regex._string)

        regex = RegularExpression('')
        self.assertEqual('', regex._string)
Ejemplo n.º 10
0
        tuplet = None
        par = 0
        set1 = {}
        set1 = set()
        punct = 0
        stack_punct = []
        var = 0


        for i in range (len(regex_string)):

            if regex_string[i] == '.':
                punct = 1
                for j in range (len(alfabet)):
                    element = alfabet[j]
                    reg_expr1 = RegularExpression(2, element, None)
                    stack_punct.append(reg_expr1)

            if punct == 1:
                ok = 0
                for j in range (len(stack_punct)) :
                    if j + 1 != (len(stack_punct)): 
                        if j == 0:
                            reg_expr =  RegularExpression(5, stack_punct[j], stack_punct[j +1])
                        elif j > 0 and j+1 < (len(stack_punct)):
                            reg_expr2 = RegularExpression(5, reg_expr, stack_punct[j + 1])
                            reg_expr = reg_expr2
                stack_elems.append(reg_expr)
       
           
            if modify[i] == '(':
Ejemplo n.º 11
0
def rangeAlphabet(expression, tuple):

    #create ER equivalent to apparition of exp "tuple" of times
    e = expression
    rez = RegularExpression(EMPTY_STRING)

    #exact interval : minlimit_interval == maxlimit_interval
    if tuple[0] == tuple[1]:
        rez = expression
        for i in range(1, tuple[0]):
            e = RegularExpression(CONCATENATION, e, expression)
        return e

    #min interval : minlimit_interval == -1
    elif tuple[0] == -1:
        rez = RegularExpression(ALTERNATION, rez, e)

        #create concatenation of exp by maxlim_interval times, starting from  minlim_interval
        for i in range(1, tuple[1]):
            e = RegularExpression(CONCATENATION, e, expression)
            rez = RegularExpression(ALTERNATION, rez, e)

    #max interval : max_limit interval == -1
    elif tuple[1] == -1:

        #create concatenation of exp by maxlim_interval times, starting from  minlim_interval
        for i in range(1, tuple[0]):
            e = RegularExpression(CONCATENATION, e, expression)
        rez = RegularExpression(CONCATENATION, e,
                                RegularExpression(STAR, expression))

    #normal interval
    else:

        #create concatenation of exp by maxlim_interval times, starting from  minlim_interval
        for i in range(1, tuple[0]):
            e = RegularExpression(CONCATENATION, e, expression)
        rez = e

        #add concatenation cases to reunion
        for i in range(tuple[0], tuple[1]):
            e = RegularExpression(CONCATENATION, e, expression)
            rez = RegularExpression(ALTERNATION, rez, e)
    return rez
	def test_get_de_simone_tree(self):
		regex = RegularExpression('(ab)*|ab')
		self.assertEqual('{{{{None[a]None}[.]{None[b]None}}[*]None}[|]{{None[a]None}[.]{None[b]None}}}',regex._get_de_simone_tree().__str__())

		regex = RegularExpression('(a)|(b).(c)')
		self.assertEqual('{{None[a]None}[|]{{None[b]None}[.]{None[c]None}}}',regex._get_de_simone_tree().__str__())

		regex = RegularExpression('a|bc')
		self.assertEqual('{{None[a]None}[|]{{None[b]None}[.]{None[c]None}}}',regex._get_de_simone_tree().__str__())

		regex = RegularExpression('(ab)*(ba)*')
		self.assertEqual('{{{{None[a]None}[.]{None[b]None}}[*]None}[.]{{{None[b]None}[.]{None[a]None}}[*]None}}',regex._get_de_simone_tree().__str__())

		regex = RegularExpression('a(ba)*b')
		self.assertEqual('{{None[a]None}[.]{{{{None[b]None}[.]{None[a]None}}[*]None}[.]{None[b]None}}}',regex._get_de_simone_tree().__str__())

		regex = RegularExpression('(ba|a(ba)*a)*(ab)*')
		self.assertEqual('{{{{{None[b]None}[.]{None[a]None}}[|]{{None[a]None}[.]{{{{None[b]None}[.]{None[a]None}}[*]None}[.]{None[a]None}}}}[*]None}[.]{{{None[a]None}[.]{None[b]None}}[*]None}}',regex._get_de_simone_tree().__str__())

		regex = RegularExpression('abab')
		self.assertEqual('{{None[a]None}[.]{{None[b]None}[.]{{None[a]None}[.]{None[b]None}}}}',regex._get_de_simone_tree().__str__())

		regex = RegularExpression('(a*)')
		self.assertEqual('{{None[a]None}[*]None}',regex._get_de_simone_tree().__str__())

		regex = RegularExpression('(a)*')
		self.assertEqual('{{None[a]None}[*]None}',regex._get_de_simone_tree().__str__())

		regex = RegularExpression('a')
		self.assertEqual('{None[a]None}',regex._get_de_simone_tree().__str__())

		regex = RegularExpression('')
		self.assertEqual('{None[&]None}',regex._get_de_simone_tree().__str__())
	def test_get_less_significant(self):
		regex = RegularExpression('(ab)*|ab')
		self.assertEqual(('|',6),regex._get_less_significant())

		regex = RegularExpression('(a)|(b).(c)')
		self.assertEqual(('|',3),regex._get_less_significant())

		regex = RegularExpression('a|bc')
		self.assertEqual(('|',1),regex._get_less_significant())

		regex = RegularExpression('(ab)*(ba)*')
		self.assertEqual(('.',6),regex._get_less_significant())

		regex = RegularExpression('a(ba)*b')
		self.assertEqual(('.',1),regex._get_less_significant())

		regex = RegularExpression('(ba|a(ba)*a)*(ab)*')
		self.assertEqual(('.',17),regex._get_less_significant())

		regex = RegularExpression('abab')
		self.assertEqual(('.',1),regex._get_less_significant())

		regex = RegularExpression('(a*)')
		self.assertEqual(('*',1),regex._get_less_significant())

		regex = RegularExpression('(a)*')
		self.assertEqual(('*',3),regex._get_less_significant())

		regex = RegularExpression('a')
		self.assertEqual(('a',0),regex._get_less_significant())

		regex = RegularExpression('')
		self.assertEqual(('&',-1),regex._get_less_significant())
Ejemplo n.º 14
0
def regex_to_regular_expr(regex):
    if regex is None:
        return RegularExpression(re.EMPTY_SET)

    elif (regex.type == rx.EMPTY_STRING):
        return RegularExpression(re.EMPTY_STRING)

    elif (regex.type == rx.SYMBOL_SIMPLE):
        return RegularExpression(re.SYMBOL, regex.symbol)

    elif (regex.type == rx.SYMBOL_ANY):
        return any_symbol_reg_expr

    elif (regex.type == rx.SYMBOL_SET):
        chars = regex.symbol_set
        temp = None
        for c in chars:
            if (isinstance(c, str)):
                if (temp == None):
                    temp = RegularExpression(re.SYMBOL, c)
                else:
                    e0 = temp
                    e1 = RegularExpression(re.SYMBOL, c)
                    temp = RegularExpression(re.ALTERNATION, e0, e1)
            elif (isinstance(c, tuple)):
                temp = get_set(c, temp)

        return temp

    elif (regex.type == rx.MAYBE):
        e = regex_to_regular_expr(regex.lhs)
        empty = RegularExpression(re.EMPTY_STRING)
        return RegularExpression(re.ALTERNATION, e, empty)

    elif (regex.type == rx.STAR):
        e = regex_to_regular_expr(regex.lhs)
        return RegularExpression(re.STAR, e)

    elif (regex.type == rx.PLUS):
        e = regex_to_regular_expr(regex.lhs)
        star_e = RegularExpression(re.STAR, e)
        return RegularExpression(re.CONCATENATION, e, star_e)

    elif (regex.type == rx.RANGE):
        e = regex_to_regular_expr(regex.lhs)
        final = RegularExpression(re.EMPTY_SET)
        (begin, end) = regex.range

        if (begin == -1):
            begin = 0

        temp = RegularExpression(re.EMPTY_STRING)

        for i in range(0, begin):
            e1 = temp
            if (temp.type == re.EMPTY_STRING):
                temp = e
            else:
                temp = RegularExpression(re.CONCATENATION, e1, e)

        if (end == -1):
            e1 = temp
            star_e = RegularExpression(re.STAR, e)
            if (temp.type == re.EMPTY_STRING):
                temp = star_e
            else:
                temp = RegularExpression(re.CONCATENATION, e1, star_e)
            final = temp

        else:
            final = temp
            rep1 = temp
            for i in range(begin, end):
                temp = final
                rep0 = rep1
                if (rep0.type == re.EMPTY_STRING):
                    rep1 = e
                else:
                    rep1 = RegularExpression(re.CONCATENATION, rep0, e)
                final = RegularExpression(re.ALTERNATION, temp, rep1)

        return final

    elif (regex.type == rx.CONCATENATION):
        e0 = regex_to_regular_expr(regex.lhs)
        e1 = regex_to_regular_expr(regex.rhs)
        return RegularExpression(re.CONCATENATION, e0, e1)

    elif (regex.type == rx.ALTERNATION):
        e0 = regex_to_regular_expr(regex.lhs)
        e1 = regex_to_regular_expr(regex.rhs)
        return RegularExpression(re.ALTERNATION, e0, e1)
Ejemplo n.º 15
0
def re_any():
	alf = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
	e = RegularExpression(2, "a")
	for i in range(1, len(alf)):
		e = RegularExpression(5, e, RegularExpression(2, alf[i]))
	return e
Ejemplo n.º 16
0
def regex_to_re(reg):
	e = None
	#print(reg.type)
	if reg.type == SYMBOL_SIMPLE:
		return RegularExpression(2, reg.symbol)
	if reg.type == CONCATENATION:
		return RegularExpression(4, regex_to_re(reg.lhs), regex_to_re(reg.rhs))
	if reg.type == ALTERNATION:
		return RegularExpression(5, regex_to_re(reg.lhs), regex_to_re(reg.rhs))
	if reg.type == SYMBOL_ANY:
		return e_any
	if reg.type == STAR:
		return RegularExpression(3, regex_to_re(reg.lhs))
	if reg.type == PLUS:
		return RegularExpression(4, regex_to_re(reg.lhs), 
		RegularExpression(3, regex_to_re(reg.lhs) ) ) # concatenare dintre e si e*
	if reg.type == MAYBE:
		return RegularExpression(5, RegularExpression(1), regex_to_re(reg.lhs) ) # niciuna sau una
	if reg.type == RANGE:
		x,y = reg.range
		aux = regex_to_re(reg.lhs)
		if x == y:
			e = aux
			for i in range(0, x - 1):
				e = RegularExpression(4, e, aux)
		elif x != -1 and y != -1:
			aux2 = aux
			for i in range(x - 1):
				aux2 = RegularExpression(4, aux2, aux)
			e = aux2
			for i in range(y - x):
				aux2 = RegularExpression(4, aux2, aux)
				e = RegularExpression(5, e, aux2)
		elif x == -1:
			aux = regex_to_re(reg.lhs)
			aux2 = aux
			e = RegularExpression(1)
			for i in range(y):
				e = RegularExpression(5, e, aux2)
				aux2 = RegularExpression(4, aux2, aux)
		else:
			aux = regex_to_re(reg.lhs)
			aux2 = aux
			for i in range(x - 1):
				aux2 = RegularExpression(4, aux2, aux)
			e = RegularExpression(4, aux2, RegularExpression(3, aux))
	if reg.type == SYMBOL_SET:
		for i in reg.symbol_set:
			if e == None:
				if isinstance(i, str):
					e = RegularExpression(2, i)
				elif isinstance(i, tuple):
					e = RegularExpression(2, i[0])
					for j in range((ord(i[0]) + 1), (ord(i[1]) + 1)  ):
						e = RegularExpression(5, e, RegularExpression(2, str(chr(j)) )  )
			else:
				if isinstance(i, str):
					e = RegularExpression(5, e , RegularExpression(2, i))
				elif isinstance(i, tuple):
					for j in range((ord(i[0]) ), (ord(i[1]) + 1)  ):
						e = RegularExpression(5, e, RegularExpression(2, str(chr(j)) )  )
					


	return e
Ejemplo n.º 17
0
def regex_to_re(regex):
    if regex.type == RX_EMPTY_STRING:
        return RegularExpression(RE_EMPTY_STRING)

    if regex.type == RX_SYMBOL_SIMPLE:
        return RegularExpression(RE_SYMBOL, regex.symbol)

    if regex.type == RX_SYMBOL_ANY:
        regular_expr = None

        for char in CHARSET:
            symbol_expr = RegularExpression(RE_SYMBOL, char)

            if regular_expr == None:
                regular_expr = symbol_expr
            else:
                regular_expr = RegularExpression(RE_ALTERNATION, regular_expr, symbol_expr)

        return regular_expr

    if regex.type == RX_SYMBOL_SET:
        regular_expr = None

        for elem in regex.symbol_set:
            if not(type(elem) is tuple):
                symbol_expr = RegularExpression(RE_SYMBOL, elem)

                if regular_expr == None:
                    regular_expr = symbol_expr
                else:
                    regular_expr = RegularExpression(RE_ALTERNATION, regular_expr, symbol_expr)
            else:
                for num in range(ord(elem[0]), ord(elem[1]) + 1):
                    symbol_expr = RegularExpression(RE_SYMBOL, chr(num))

                    if regular_expr == None:
                        regular_expr = symbol_expr
                    else:
                        regular_expr = RegularExpression(RE_ALTERNATION, regular_expr, symbol_expr)

        return regular_expr

    if regex.type == RX_MAYBE:
        return RegularExpression(RE_ALTERNATION, RegularExpression(RE_EMPTY_STRING), regex_to_re(regex.lhs))

    if regex.type == RX_STAR:
        return RegularExpression(RE_STAR, regex_to_re(regex.lhs))

    if regex.type == RX_PLUS:
        regular_expr = regex_to_re(regex.lhs)

        return RegularExpression(RE_CONCATENATION, regular_expr, RegularExpression(RE_STAR, regular_expr))

    if regex.type == RX_RANGE:
        body_expr = regex_to_re(regex.lhs)

        if regex.range[0] == regex.range[1]:    # Exact x aparitii
            regular_expr = body_expr

            for i in range(0, regex.range[0] - 1):
                regular_expr = RegularExpression(RE_CONCATENATION, regular_expr, body_expr)

            return regular_expr

        if regex.range[0] == -1:    # Cel mult y aparitii
            regular_expr = RegularExpression(RE_EMPTY_STRING)

            for i in range(0, regex.range[1]):
                aux_expr = body_expr

                for j in range(1, i + 1):
                    aux_expr = RegularExpression(RE_CONCATENATION, aux_expr, body_expr)

                regular_expr = RegularExpression(RE_ALTERNATION, regular_expr, aux_expr)

            return regular_expr

        if regex.range[1] == -1:    # Cel putin x aparitii
            regular_expr = body_expr

            for i in range(1, regex.range[0]):
                regular_expr = RegularExpression(RE_CONCATENATION, regular_expr, body_expr)

            regular_expr = RegularExpression(RE_CONCATENATION, regular_expr, RegularExpression(RE_STAR, body_expr))

            return regular_expr

        if regex.range[0] != regex.range[1]:    # Intre x si y aparitii
            regular_expr = None

            for i in range(regex.range[0], regex.range[1] + 1):
                aux_expr = body_expr

                for j in range(1, i):
                    aux_expr = RegularExpression(RE_CONCATENATION, aux_expr, body_expr)

                if regular_expr == None:
                    regular_expr = aux_expr
                else:
                    regular_expr = RegularExpression(RE_ALTERNATION, regular_expr, aux_expr)

            return regular_expr

    if regex.type == RX_CONCATENATION:
        return RegularExpression(RE_CONCATENATION, regex_to_re(regex.lhs), regex_to_re(regex.rhs))

    if regex.type == RX_ALTERNATION:
        return RegularExpression(RE_ALTERNATION, regex_to_re(regex.lhs), regex_to_re(regex.rhs))
    def test_get_less_significant(self):
        regex = RegularExpression('(ab)*|ab')
        self.assertEqual(('|', 6), regex._get_less_significant())

        regex = RegularExpression('(a)|(b).(c)')
        self.assertEqual(('|', 3), regex._get_less_significant())

        regex = RegularExpression('a|bc')
        self.assertEqual(('|', 1), regex._get_less_significant())

        regex = RegularExpression('(ab)*(ba)*')
        self.assertEqual(('.', 6), regex._get_less_significant())

        regex = RegularExpression('a(ba)*b')
        self.assertEqual(('.', 1), regex._get_less_significant())

        regex = RegularExpression('(ba|a(ba)*a)*(ab)*')
        self.assertEqual(('.', 17), regex._get_less_significant())

        regex = RegularExpression('abab')
        self.assertEqual(('.', 1), regex._get_less_significant())

        regex = RegularExpression('(a*)')
        self.assertEqual(('*', 1), regex._get_less_significant())

        regex = RegularExpression('(a)*')
        self.assertEqual(('*', 3), regex._get_less_significant())

        regex = RegularExpression('a')
        self.assertEqual(('a', 0), regex._get_less_significant())

        regex = RegularExpression('')
        self.assertEqual(('&', -1), regex._get_less_significant())
Ejemplo n.º 19
0
def re_to_nfa(re):
    if re.type == reEMPTY_SET:
        return NFA("", {0, 1}, 0, {1}, {})
    elif re.type == reEMPTY_STRING:
        return NFA("", {0, 1}, 0, {1}, {(0, ""): {1}})
    elif re.type == reSYMBOL:
        return NFA(re.symbol, {0, 1}, 0, {1}, {(0, re.symbol): {1}})
    elif re.type == reCONCATENATION:
        l = re_to_nfa(re.lhs)
        r = re_to_nfa(re.rhs)
        ea = re_to_nfa(RegularExpression(reEMPTY_SET))

        rename_states(l, ea)
        ea.delta.update(l.delta)
        ea.delta.update({(ea.start_state, ""): {l.start_state}})
        ea.states = set(list(l.states) + list(ea.states))

        rename_states(r, ea)
        ea.alphabet = ''.join(list(set(ea.alphabet + r.alphabet + l.alphabet)))
        ea.delta.update(r.delta)
        ea.delta.update({(list(l.final_states)[0], ""): {r.start_state}})
        ea.delta.update({
            (list(r.final_states)[0], ""): {list(ea.final_states)[0]}
        })
        ea.states = set(list(r.states) + list(ea.states))
        return ea

    elif re.type == reSTAR:
        l = re_to_nfa(re.lhs)
        ea = re_to_nfa(RegularExpression(reEMPTY_SET))

        rename_states(l, ea)
        ea.delta.update(l.delta)
        ea.delta.update({
            (ea.start_state, ""): {l.start_state,
                                   list(ea.final_states)[0]}
        })

        ea.delta.update({
            (list(l.final_states)[0], ""):
            {list(ea.final_states)[0], l.start_state}
        })
        ea.alphabet = ''.join(list(set(ea.alphabet + l.alphabet)))
        ea.states = set(list(l.states) + list(ea.states))
        return ea

    elif re.type == reALTERNATION:
        l = re_to_nfa(re.lhs)
        r = re_to_nfa(re.rhs)
        ea = re_to_nfa(RegularExpression(reEMPTY_SET))

        rename_states(l, ea)
        ea.delta.update(l.delta)
        ea.delta.update({
            (list(l.final_states)[0], ""): {list(ea.final_states)[0]}
        })
        ea.states = set(list(l.states) + list(ea.states))

        rename_states(r, ea)
        ea.alphabet = ''.join(list(set(ea.alphabet + r.alphabet + l.alphabet)))
        ea.delta.update(r.delta)
        ea.delta.update({(ea.start_state, ""): {l.start_state, r.start_state}})
        ea.delta.update({
            (list(r.final_states)[0], ""): {list(ea.final_states)[0]}
        })
        ea.states = set(list(r.states) + list(ea.states))
        return ea
    def test_get_de_simone_tree(self):
        regex = RegularExpression('(ab)*|ab')
        self.assertEqual(
            '{{{{None[a]None}[.]{None[b]None}}[*]None}[|]{{None[a]None}[.]{None[b]None}}}',
            regex._get_de_simone_tree().__str__())

        regex = RegularExpression('(a)|(b).(c)')
        self.assertEqual('{{None[a]None}[|]{{None[b]None}[.]{None[c]None}}}',
                         regex._get_de_simone_tree().__str__())

        regex = RegularExpression('a|bc')
        self.assertEqual('{{None[a]None}[|]{{None[b]None}[.]{None[c]None}}}',
                         regex._get_de_simone_tree().__str__())

        regex = RegularExpression('(ab)*(ba)*')
        self.assertEqual(
            '{{{{None[a]None}[.]{None[b]None}}[*]None}[.]{{{None[b]None}[.]{None[a]None}}[*]None}}',
            regex._get_de_simone_tree().__str__())

        regex = RegularExpression('a(ba)*b')
        self.assertEqual(
            '{{None[a]None}[.]{{{{None[b]None}[.]{None[a]None}}[*]None}[.]{None[b]None}}}',
            regex._get_de_simone_tree().__str__())

        regex = RegularExpression('(ba|a(ba)*a)*(ab)*')
        self.assertEqual(
            '{{{{{None[b]None}[.]{None[a]None}}[|]{{None[a]None}[.]{{{{None[b]None}[.]{None[a]None}}[*]None}[.]{None[a]None}}}}[*]None}[.]{{{None[a]None}[.]{None[b]None}}[*]None}}',
            regex._get_de_simone_tree().__str__())

        regex = RegularExpression('abab')
        self.assertEqual(
            '{{None[a]None}[.]{{None[b]None}[.]{{None[a]None}[.]{None[b]None}}}}',
            regex._get_de_simone_tree().__str__())

        regex = RegularExpression('(a*)')
        self.assertEqual('{{None[a]None}[*]None}',
                         regex._get_de_simone_tree().__str__())

        regex = RegularExpression('(a)*')
        self.assertEqual('{{None[a]None}[*]None}',
                         regex._get_de_simone_tree().__str__())

        regex = RegularExpression('a')
        self.assertEqual('{None[a]None}',
                         regex._get_de_simone_tree().__str__())

        regex = RegularExpression('')
        self.assertEqual('{None[&]None}',
                         regex._get_de_simone_tree().__str__())
Ejemplo n.º 21
0
def convertRegEx(parsed_regex):
    if parsed_regex.type == EMPTY_STRING:
        regular_expression = RegularExpression(1)
        return regular_expression
    if parsed_regex.type == SYMBOL_SIMPLE:
        regular_expression = RegularExpression(2, str(parsed_regex))
        return regular_expression
    # CONCATENATION = 8
    if parsed_regex.type == 8:
        regular_expression = RegularExpression(4,
                                               convertRegEx(parsed_regex.lhs),
                                               convertRegEx(parsed_regex.rhs))
        return regular_expression
    # ALTERNATION = 9
    if parsed_regex.type == 9:
        regular_expression = RegularExpression(5,
                                               convertRegEx(parsed_regex.lhs),
                                               convertRegEx(parsed_regex.rhs))
        return regular_expression
    # SYMBOL_ANY = 2
    if parsed_regex.type == 2:
        regular_expression = RegularExpression(1)
        for i in alphabet:
            symbol = RegEx(SYMBOL_SIMPLE, i)
            regular_expression = RegularExpression(5, regular_expression,
                                                   convertRegEx(symbol))
        return regular_expression
    # MAYBE = 4
    if parsed_regex.type == 4:
        aux = RegularExpression(1)
        regular_expression = RegularExpression(5, aux,
                                               convertRegEx(parsed_regex.lhs))
        return regular_expression
    # STAR = 5
    if parsed_regex.type == 5:
        regular_expression = RegularExpression(3,
                                               convertRegEx(parsed_regex.lhs))
        return regular_expression
    # PLUS = 6
    if parsed_regex.type == 6:
        aux = convertRegEx(parsed_regex.lhs)
        aux2 = RegularExpression(3, aux)
        regular_expression = RegularExpression(4, aux, aux2)
        return regular_expression
    # RANGE = 8
    if parsed_regex.type == 7:
        x, y = parsed_regex.range
        if x == y:
            regular_expression = convertRegEx(parsed_regex.lhs)
            for i in range(x - 1):
                aux = convertRegEx(parsed_regex.lhs)
                regular_expression = RegularExpression(4, regular_expression,
                                                       aux)
            return regular_expression
        if x == -1:
            regular_expression = RegularExpression(1)
            for i in range(y + 1):
                if i != 0:
                    exp = RegEx(RANGE, parsed_regex.lhs, (i, i))
                    regular_expression = RegularExpression(
                        5, regular_expression, convertRegEx(exp))
            return regular_expression
        if y == -1:
            exp = RegEx(RANGE, parsed_regex.lhs, (x, x))
            star_exp = RegularExpression(3, convertRegEx(parsed_regex.lhs))
            regular_expression = RegularExpression(4, convertRegEx(exp),
                                                   star_exp)
            return regular_expression
        else:
            # intre x si y aparitii
            exp = RegEx(RANGE, parsed_regex.lhs, (x, x))
            regular_expression = convertRegEx(exp)
            for i in range(x + 1, y + 1):
                exp = RegEx(RANGE, parsed_regex.lhs, (i, i))
                regular_expression = RegularExpression(5, regular_expression,
                                                       convertRegEx(exp))
            return regular_expression
    # SYMBOL_SET = 3
    if parsed_regex.type == 3:
        regular_expression = None
        for i in parsed_regex.symbol_set:
            if type(i) is tuple:
                if i[0] in digits:
                    _range_ = RegularExpression(2, str(int(i[0]) + 1))
                    if regular_expression is not None:
                        aux = RegularExpression(2, i[0])
                        regular_expression = RegularExpression(
                            5, regular_expression, aux)
                    else:
                        regular_expression = RegularExpression(2, i[0])
                    for k in range(int(i[0]) + 2, int(i[1]) + 1):
                        symb = RegularExpression(2, str(k))
                        _range_ = RegularExpression(5, _range_, symb)
                    regular_expression = RegularExpression(
                        5, regular_expression, _range_)
                else:
                    _range_ = RegularExpression(2, chr(ord(i[0]) + 1))
                    if regular_expression is not None:
                        aux = RegularExpression(2, i[0])
                        regular_expression = RegularExpression(
                            5, regular_expression, aux)
                    else:
                        regular_expression = RegularExpression(2, i[0])
                    char = chr(ord(i[0]) + 2)
                    while char <= i[1]:
                        symb = RegularExpression(2, char)
                        _range_ = RegularExpression(5, _range_, symb)
                        char = chr(ord(char) + 1)
                    regular_expression = RegularExpression(
                        5, regular_expression, _range_)
        count = 0
        for i in parsed_regex.symbol_set:
            if type(i) is not tuple:
                if count == 0:
                    symbol = RegEx(SYMBOL_SIMPLE, i)
                    reg_symbol = convertRegEx(symbol)
                    if regular_expression is None:
                        regular_expression = reg_symbol
                else:
                    symbol = RegEx(SYMBOL_SIMPLE, i)
                    reg_symbol = convertRegEx(symbol)
                    regular_expression = RegularExpression(
                        5, regular_expression, reg_symbol)
            count = count + 1

        return regular_expression