Beispiel #1
0
def build_regex_tree(regex: str) -> "RegexTree":
    """
    Takes a valid string form regex, and returns the RegexTree that
    corresponds to it.

    >>> build_regex_tree('0')
    Leaf('0')
    >>> build_regex_tree('0*')
    StarTree(Leaf('0'))
    >>> build_regex_tree('(0.1)')
    DotTree(Leaf('0'), Leaf('1'))
    >>> build_regex_tree('(1|0)')
    BarTree(Leaf('1'), Leaf('0'))
    >>> build_regex_tree('(0*|1*)')
    BarTree(StarTree(Leaf('0')), StarTree(Leaf('1')))
    >>> build_regex_tree('((0.1).0)')
    DotTree(DotTree(Leaf('0'), Leaf('1')), Leaf('0'))
    """
    if len(regex) == 1:
        return Leaf(regex)
    else:
        if regex[-1] == "*":
            return StarTree(build_regex_tree(regex[:-1]))
        else:
            #this then builds either a bar or a star using the regex_split
            #function used eariler in is_regex
            spl = regex_split(regex)
            if spl[1] == "|":
                return BarTree(build_regex_tree(spl[0]),
                               build_regex_tree(spl[2]))
            else:
                return DotTree(build_regex_tree(spl[0]),
                               build_regex_tree(spl[2]))
def build_regex_tree(regex: str) -> "RegexTree":
    """Given regex, a valid regular expression regex, build the corresponding
    regular expression tree and returns its root.
    >>> regex = "(0.1)*"
    >>> build_regex_tree(regex)
    StarTree(DotTree(Leaf('0'), Leaf('1')))
    >>> regex = "(0*|(1.2)*)"
    >>> build_regex_tree(regex)
    BarTree(StarTree(Leaf('0')), StarTree(DotTree(Leaf('1'), Leaf('2'))))
    """

    if regex in ('0', '1', '2', 'e'):
        return Leaf(regex)
    elif regex[-1] is '*':
        return StarTree(build_regex_tree(regex[:-1]))
    elif (regex[0] is '(' and regex[-1] is ')' and len(regex) is 5
          and ('.' in regex or '|' in regex)):
        if '.' in regex:
            return DotTree(build_regex_tree(regex[1:2]),
                           build_regex_tree(regex[3:4]))
        elif '|' in regex:
            return BarTree(build_regex_tree(regex[1:2]),
                           build_regex_tree(regex[3:4]))
    elif regex[0] is '(' and regex[-1] is ')' and ('.' in regex
                                                   or '|' in regex):
        if regex[1:-1][0] is '(':
            index = index_of_left_closing_bracket(regex[1:-1])
            index_of_separator = index + 1
            if regex[1:-1][index_of_separator] is '*':
                index_of_separator += 1
            separator = regex[1:-1][index_of_separator]
        elif regex[1:-1][-1] is ')':
            index_of_first_open_bracket = regex[1:-1].find('(')
            index_of_separator = index_of_first_open_bracket - 1
            separator = regex[1:-1][index_of_separator]
        else:
            if '(' in regex[1:-1]:
                index_of_first_open_bracket = regex[1:-1].find('(')
                index_of_separator = index_of_first_open_bracket - 1
                separator = regex[1:-1][index_of_separator]
            else:
                index_of_separator = max(regex[1:-1].find('.'),
                                         regex[1:-1].find('|'))
                separator = regex[1:-1][index_of_separator]

        if separator is '.':
            return DotTree(
                build_regex_tree(regex[1:-1][:index_of_separator]),
                build_regex_tree(regex[1:-1][index_of_separator + 1:]))
        elif separator is '|':
            return BarTree(
                build_regex_tree(regex[1:-1][:index_of_separator]),
                build_regex_tree(regex[1:-1][index_of_separator + 1:]))
Beispiel #3
0
def build_regex_tree(regex):

    if regex in ('0', '1', '2', 'e'):
        return Leaf(regex)
    elif regex[-1] is '*':
        return StarTree(build_regex_tree(regex[:-1]))
    elif (regex[0] is '(' and regex[-1] is ')' and len(regex) is 5
          and ('.' in regex or '|' in regex)):
        if '.' in regex:
            return DotTree(build_regex_tree(regex[1:2]),
                           build_regex_tree(regex[3:4]))
        elif '|' in regex:
            return BarTree(build_regex_tree(regex[1:2]),
                           build_regex_tree(regex[3:4]))
    elif regex[0] is '(' and regex[-1] is ')' and ('.' in regex
                                                   or '|' in regex):
        if regex[1:-1][0] is '(':
            index = index_of_left_closing_bracket(regex[1:-1])
            index_of_separator = index + 1
            if regex[1:-1][index_of_separator] is '*':
                index_of_separator += 1
            separator = regex[1:-1][index_of_separator]
        elif regex[1:-1][-1] is ')':
            index_of_first_open_bracket = regex[1:-1].find('(')
            index_of_separator = index_of_first_open_bracket - 1
            separator = regex[1:-1][index_of_separator]
        else:
            if '(' in regex[1:-1]:
                index_of_first_open_bracket = regex[1:-1].find('(')
                index_of_separator = index_of_first_open_bracket - 1
                separator = regex[1:-1][index_of_separator]
            else:
                index_of_separator = max(regex[1:-1].find('.'),
                                         regex[1:-1].find('|'))
                separator = regex[1:-1][index_of_separator]

        if separator is '.':
            return DotTree(
                build_regex_tree(regex[1:-1][:index_of_separator]),
                build_regex_tree(regex[1:-1][index_of_separator + 1:]))
        elif separator is '|':
            return BarTree(
                build_regex_tree(regex[1:-1][:index_of_separator]),
                build_regex_tree(regex[1:-1][index_of_separator + 1:]))
def tree_helper(regex_list, ternaries=['0', '1', '2', 'e']):
    ''' (list) -> RegexTree
    Takes in a list form of the regex and creates a
    tree recursively
    >>> tree_helper(['e'])
    Leaf('e')
    >>> tree_helper(['1','.','0'])
    DotTree(Leaf('1'), Leaf('0'))
    >>> tree_helper(['1','|',['2','.','e'],'*'])
    BarTree(Leaf('1'), StarTree(DotTree(Leaf('2'), Leaf('e'))))
    '''
    # base check if there are dots or bars in the
    # regex
    if '.' in regex_list:
        # make a dot node
        # everything left of the dot is left, right of
        # dot is right
        result = DotTree(tree_helper(regex_list[:regex_list.index('.')]),
                         tree_helper(regex_list[regex_list.index('.') + 1:]))
    # check for bars
    elif '|' in regex_list:
        # make a bar node
        # everything left of the bar is left, right of
        # bar is right
        result = BarTree(tree_helper(regex_list[:regex_list.index('|')]),
                         tree_helper(regex_list[regex_list.index('|') + 1:]))
    # check for stars
    elif '*' in regex_list:
        # make a star node, everything to the left of it is its child
        result = StarTree(tree_helper(regex_list[:regex_list.index('*')]))
    # if we've gone through all the dots and bars
    else:
        # if its a list
        if isinstance(regex_list[0], list):
            # call the tree helper on that list
            result = tree_helper(regex_list[0])
        # if its a symbol
        elif regex_list[0] in ternaries:
            # make a new leaf node
            result = Leaf(regex_list[0])
    # return the result
    return result
Beispiel #5
0
def build_regex_tree(regex):
    '''(str) -> RegexTree
    REQ: str must be a valid regex
    Takes in a str that is a valid regex and creates a RegexTree for the regex
    and returns the root of it
    >>>build_regex_tree('1*')
    StarTree(Leaf('1'))
    >>>build_regex_tree('(1|0)*')
    StarTree(BarTree(Leaf('1'), Leaf('0')))
    >>>build_regex_tree('(2*.0)')
    DotTree(StarTree(Leaf('2')), Leaf('0'))
    >>>build_regex_tree('e*')
    StarTree(Leaf('e'))
    >>>build_regex_tree('((1.0)|(2.1))')
    BarTree(DotTree(Leaf('1'), Leaf('0')), DotTree(Leaf('2'), Leaf('1')))
    >>>build_regex_tree("((0|1*)|((2*.1*).(e*.0)))")
    BarTree(BarTree(Leaf('0'), StarTree(Leaf('1'))),
    DotTree(DotTree(StarTree(Leaf('2')), StarTree(Leaf('1'))),
    DotTree(StarTree(Leaf('e')), Leaf('0'))))
    '''
    # Checks if the len is 0, 1, 2, or e
    if len(regex) == 1:
        # Calling Leaf function from RegexTree because regex has no children
        return Leaf(regex)
    # To check if the regex has a '*' at the end
    elif regex[-1] == STAR:
        # Calling StarTree function from RegexTree because Regex ends with '*'
        return StarTree(build_regex_tree(regex[:len(regex)-1]))
    else:
        # Calling operationLocator helper function to determine what index
        # the operation is located at
        operation = operationLocator(regex)
        # If the main operation is a bar, it will call BarTree from RegexTree
        # because the index of the operation is a bar
        if regex[operation] == BAR:
            return BarTree(build_regex_tree(regex[1:operation]),
                           build_regex_tree(regex[operation+1:len(regex)-1]))
        # This case checks if the operation is a dot, it will call DotTree
        # from RegexTree because the index of the operation is a dot
        else:
            return DotTree(build_regex_tree(regex[1:operation]),
                           build_regex_tree(regex[operation+1:len(regex)-1]))
Beispiel #6
0
def build_regex_tree(regex):
    '''(string) -> RegexTree
    Builds a corresponding regex tree.
    REQ regex is a valid regular expression
    '''
    if len(regex) == 1:
        root = Leaf(regex)
    elif regex[-1] == star:
        child = build_regex_tree(regex[:-1])
        root = StarTree(child)
    else:
        # remove the brackets!
        nregex = regex[1:-1]
        r1, r2, symbol = split(nregex)
        childr1 = build_regex_tree(r1)
        childr2 = build_regex_tree(r2)
        if symbol == dot:
            root = DotTree(childr1, childr2)
        else:
            root = BarTree(childr1, childr2)
    return root
def build_regex_tree(regex):
    '''(str) -> Tree
    Given a regex inside a bracket, returns the node which represents that regex
    '''
    #while loop used for going through the regex
    i = 0
    #returns a list of list and str
    ret = [[], '']
    #iterates through the regex
    while i < len(regex):
        #a close bracket marks the end of the regex
        if regex[i] == ')':
            return ret
        #an open bracket marks the start of the left child
        elif regex[i] == '(':
            ret[0] += build_regex_tree(regex[i + 1:])
            #adds the number of items in the child + the number of brackets(2)
            #and an operator(1)
            i += len(ret[0][0]) + 3
        #adds to the first sublist if it is a valid regex char
        elif regex[i] in '012e':
            ret[0] += regex[i]
            i += 1
        #sets type by looking at operator
        elif regex[i] in '|.*':
            ret[1] = regex[i]
            i += 1
    #bug, had to remove extra brackets
    ret = ret[0]
    #converting to Tree
    if ret[1] == '|':
        ret = BarTree(ret[0][0], ret[0][1])
    elif ret[1] == '.':
        ret = DotTree(ret[0][0], ret[0][1])
    elif ret[1] == '*':
        ret = StarTree(ret[0][0])
    #returns Tree at the end
    return ret
Beispiel #8
0
def build_regex_tree(regex):
    '''(str) -> RegexTree
    takes a string regex, build a regex tree, and returns the root of the tree
    >>>build_regex_tree("(1.2)") == RegexTree('.', [Leaf('1'), Leaf('2')])
    True
    >>>build_regex_tree("(1*|e)") == RegexTree('|', [StarTree(Leaf('1')), Leaf(
    'e')])
    True
    >>>build_regex_tree("(((e.1).2*)|((0***.e).(1|2*)))") == RegexTree('|', [
    RegexTree('.', [RegexTree('.', [Leaf('e'), Leaf('1')]), StarTree(Leaf(
    '2'))]), RegexTree('.', [RegexTree('.', [StarTree(StarTree(StarTree(Leaf(
    '0')))), Leaf('e')]), RegexTree('|', [Leaf('1'), StarTree(Leaf('2'))])])])
    True
    '''
    degree = 0
    # base case if length is 1,regex is a leaf since it does not have any child
    if len(regex) == 1:
        return Leaf(regex)
    # if it ends with a "*"
    elif regex != "" and regex[-1] == '*':
        # set the star to be the parent of a tree built with the rest of regex
        return StarTree(build_regex_tree(regex[:-1]))
    # if it is in brackets
    elif regex != "" and regex[0] == "(" and regex[-1] == ")":
        # loop and index of the dot or bar that's in the first brackets
        for i in range(len(regex)):
            if regex[i] == "(":
                degree += 1
            elif regex[i] == ")":
                degree -= 1
            if degree == 1 and (regex[i] == "." or regex[i] == '|'):
                I = i
                # recursion: remove the two brackets
                # divide to 2 parts at the dot or bar found and build trees
                # for each of the parts
                return RegexTree(regex[I], [build_regex_tree(
                    regex[1:I]), build_regex_tree(regex[I+1:-1])])
Beispiel #9
0
def build_regex_tree(regex):
    '''(str) -> RegexTree
    Builds the RegexTree as specified by the valid regex parameter. Return
    this tree's root
    REQ: regex is a valid regex
    '''
    # Length 1 regex
    if len(regex) == 1:
        return Leaf(regex)
    # '*' regex
    elif regex[-1] == '*':
        return StarTree(build_regex_tree(regex[:-1]))
    # '.' or '|' regex
    else:
        # Find the '' or '|' associated with the outer brackets and build
        # two children rooted to the '.' or '|' tree

        # Traverse by index. Upon finding '.' or '|' split the string at that
        # point and call the init with the two parts as parameters,
        # excluding the most outer brackets. If '(' is found, continue until
        # all '(' on the left side are found and an equal number of ')' are
        # found; the '.' or '|' afterwards is the associated operation.

        lr_bracket_balance = 0
        count = 1
        while count < len(regex) - 1:
            if regex[count] == '.' and lr_bracket_balance == 0:
                return DotTree(build_regex_tree(regex[1:count]),
                               build_regex_tree(regex[count + 1: -1]))
            elif regex[count] == '|' and lr_bracket_balance == 0:
                return BarTree(build_regex_tree(regex[1:count]),
                               build_regex_tree(regex[count + 1: -1]))
            elif regex[count] == '(':
                lr_bracket_balance += 1
            elif regex[count] == ')':
                lr_bracket_balance -= 1
            count += 1
def build_regex_tree(regex):
    """
    (str) -> object
    This function takes in a regex and returns the root of the
    tree node. It returns None if the regex is not valid.

    >>>build_regex_tree("0")
    Leaf('0')

    >>>build_regex_tree("1")
    Leaf('1')

    >>>build_regex_tree("2")
    Leaf('2')

    >>>build_regex_tree("e")
    Leaf('e')

    >>>build_regex_tree("1*")
    StarTree(Leaf('1'))

    >>>build_regex_tree("(1|2)")
    BarTree(Leaf('1'), Leaf('2'))

    >>>build_regex_tree("(e.2)")
    DotTree(Leaf('e'), Leaf('2'))

    >>>build_regex_tree("(1|(0.2)*)")
    BarTree(Leaf('1'), StarTree(DotTree(Leaf('0'), Leaf('2'))))

    >>>build_regex_tree("(()")
    None

    >>>build_regex_tree("(1.(e*|(1*|2)))")
    DotTree(Leaf('1'), BarTree(StarTree(Leaf('e')), BarTree(StarTree(Leaf('1')), Leaf('2'))))

    """
    # If the regex is not correct return None
    if (not (is_regex(regex))):
        return None

    # If the length is one then return the leaf of the particular SINGLE_CHAR
    if (len(regex) == 1):
        return Leaf(regex)

    # If the last char is a Star, Create a star tree
    elif (regex[-1] == '*'):
        # Make the child equal to the rest of the regex
        return StarTree(build_regex_tree(regex[:-1]))

    # If regex has a bracket
    else:
        # Check if the expression in the bracket requires a Binary Tree
        try:
            # Get the left expression, operation and the right expression
            (left, operation, right) = splicer(regex)

        # If error raised check if it was a StarTree or a Leaf
        except Exception:
            # Remove Brackets
            return build_regex_tree(regex[1:-1])
        # Get the correct corresponding tree object
        tree = BINARY[operation]
        # Build the tree, also make the left and right children into trees
        return tree(build_regex_tree(left), build_regex_tree(right))
Beispiel #11
0
def build_regex_tree(regex):
    '''(str) -> Dot/Bar/Star/Leaf Tree
    REQ: regex must be valid
    This function will take the given valid regex string, then it will build
    the appropriate tree and finally it will return its root
    >>>build_regex_tree('1***')
    StarTree(StarTree(StarTree(Leaf('1'))))
    >>>build_regex_tree('(1|2)')
    BarTree(Leaf('1'), Leaf('2'))
    >>>build_regex_tree('(0.e)')
    DotTree(Leaf('0'), Leaf('e'))
    >>>build_regex_tree('(0.1)')
    DotTree(Leaf('0'), Leaf('1'))
    >>>build_regex_tree('((0.1).2)')
    DotTree(DotTree(Leaf('0'), Leaf('1')), Leaf('2'))
    >>>build_regex_tree('((0.1)|2)')
    BarTree(DotTree(Leaf('0'), Leaf('1')), Leaf('2'))
    >>>build_regex_tree('((0.1)|2*)')
    BarTree(DotTree(Leaf('0'), Leaf('1')), StarTree(Leaf('2')))
    >>>build_regex_tree('((0.1)*|2*)')
    BarTree(StarTree(DotTree(Leaf('0'), Leaf('1'))), StarTree(Leaf('2')))
    >>>build_regex_tree('((0.1)*|2*)*')
    StarTree(BarTree(StarTree(DotTree(Leaf('0'), Leaf('1'))),
    StarTree(Leaf('2'))))
    >>>build_regex_tree("((0.1)|(2.1))")
    BarTree(DotTree(Leaf('0'), Leaf('1')), DotTree(Leaf('2'), Leaf('1')))
    >>>build_regex_tree("((0.1).(2.1))")
    DotTree(DotTree(Leaf('0'), Leaf('1')), DotTree(Leaf('2'), Leaf('1')))
    >>>build_regex_tree("((0|1).(2.1))")
    DotTree(BarTree(Leaf('0'), Leaf('1')), DotTree(Leaf('2'), Leaf('1')))
    >>>build_regex_tree("((0|1).(2.1))*")
    StarTree(DotTree(BarTree(Leaf('0'), Leaf('1')), DotTree(Leaf('2'),
    Leaf('1'))))
    >>>build_regex_tree('(0.(1.(0|2)*))')
    DotTree(Leaf('0'), DotTree(Leaf('1'), StarTree(BarTree(Leaf('0'),
    Leaf('2')))))
    >>>build_regex_tree('(((0***.1)|2)|2***)')
    BarTree(BarTree(DotTree(StarTree(StarTree(StarTree(Leaf('0')))),
    Leaf('1')), Leaf('2')), StarTree(StarTree(StarTree(Leaf('2')))))
    >>>build_regex_tree("(1***.(0|(2.(1|e)*)**)***)*")
    StarTree(DotTree(StarTree(StarTree(StarTree(Leaf('1')))),
    StarTree(StarTree(StarTree(BarTree(Leaf('0'),
    StarTree(StarTree(DotTree(Leaf('2'), StarTree(BarTree(Leaf('1'),
    Leaf('e'))))))))))))
    '''
    # If we have a single letter then it has no children hence it will be
    # a leaf
    if len(regex) == 1:
        return Leaf(regex)
    # Otherwise we either have to create a Dot/Bar/Star Tree
    else:
        # If the last character is a star then we have to create a star tree
        if regex[-1] is star:
            return StarTree(build_regex_tree(regex[:-1]))
        else:
            # Remove the brackets
            regex = regex[1:-1]
            # If we have a left sided regex that means we have to divide the
            # string from the right hand side
            if regex[0] is left and regex[-1] is not right:
                # Find the position of the dot and the bar
                find_dot = regex.rfind(dot)
                find_bar = regex.rfind(bar)
                # If the dot is greater in index then partition it at the dot
                if find_dot > find_bar:
                    s = regex.rpartition(dot)
                    # Since we partition it at the dot, build a DotTree
                    return (DotTree(build_regex_tree(s[0]),
                                    build_regex_tree(s[2])))
                else:
                    # Else build the BarTree
                    s = regex.rpartition(bar)
                    return (BarTree(build_regex_tree(s[0]),
                                    build_regex_tree(s[2])))
            # If we have a right sided regex that means we have to divide the
            # string from the left hand side
            elif regex[-1] is right and regex[0] is not left:
                # Find the position of the dot and the bar
                find_dot = regex.find(dot)
                find_bar = regex.find(bar)
                # If the dot is lower in index then partition it at the dot
                if ((find_dot is not -1 and find_dot < find_bar) or
                   (find_bar is -1)):
                    s = regex.partition(dot)
                    # Since we partition it at the dot, build a DotTree
                    return (DotTree(build_regex_tree(s[0]),
                                    build_regex_tree(s[2])))
                else:
                    # Else build the BarTree
                    s = regex.partition(bar)
                    return (BarTree(build_regex_tree(s[0]),
                                    build_regex_tree(s[2])))
            # If we have an even regex
            elif regex[0] is left and regex[-1] is right:
                # Set the counter
                i = 0
                # Loop through the regex to find the position of either the
                # bar of the dot
                while i < len(regex):
                    if regex[i] is right and regex[i+2] is left:
                        index = i+1
                        # Once we get the index set i so we can exit loop
                        i = len(regex)
                    i += 1
                # If there is bar at the given index then build a BarTree
                if regex[index] is bar:
                    return (BarTree(build_regex_tree(regex[:index]),
                                    build_regex_tree(regex[index+1:])))
                # Otherwise just build a DotTree
                else:
                    return (DotTree(build_regex_tree(regex[:index]),
                                    build_regex_tree(regex[index+1:])))
            # If either side do not contain a bracket
            elif regex[0] is not left and regex[-1] is not right:
                # Then check if the last char is a star
                if regex[-1] is star:
                    # Find the dot and the bar since we will be spliting it
                    # from the left side
                    find_dot = regex.find(dot)
                    find_bar = regex.find(bar)
                    # If dot has lower index then bar
                    if ((find_dot is not -1 and find_dot < find_bar) or
                       (find_bar is -1)):
                        s = regex.partition(dot)
                        # Build a DotTree
                        return (DotTree(build_regex_tree(s[0]),
                                        build_regex_tree(s[2])))
                    # Otherwise build a BarTree
                    else:
                        s = regex.partition(bar)
                        return (BarTree(build_regex_tree(s[0]),
                                        build_regex_tree(s[2])))
                # If the regex is of form (r1*.r2) or (r1*|r2)
                elif regex[1] is star:
                    # Find the dot and the bar since we will be spliting it
                    # from the right side
                    find_dot = regex.rfind(dot)
                    find_bar = regex.rfind(bar)
                    # If the dot has higher index then bar
                    if find_dot > find_bar:
                        s = regex.rpartition(dot)
                        # Build the DotTree
                        return (DotTree(build_regex_tree(s[0]),
                                        build_regex_tree(s[2])))
                    else:
                        s = regex.rpartition(bar)
                        # Otherwise build the BarTree
                        return (BarTree(build_regex_tree(s[0]),
                                        build_regex_tree(s[2])))
                # Otherwise if we have regex of form (r1|r2)
                elif bar in regex:
                    # Build a BarTree
                    return (BarTree(build_regex_tree(regex[0]),
                                    build_regex_tree(regex[2])))
                # Otherwise we have regex of form (r1.r2)
                elif dot in regex:
                    # Build a DotTree
                    return (DotTree(build_regex_tree(regex[0]),
                                    build_regex_tree(regex[2])))
            else:
                # Look for the dot
                if dot in regex:
                    # Seperate the string at the dot so we can recurse on r1
                    # and r2
                    s = regex.partition(dot)
                    # Create the Dot Tree
                    return DotTree(Leaf(s[0]), Leaf(s[2]))
                # Look for the bar
                elif bar in regex:
                    # Seperate the string at the bar so we can recurse on r1
                    # and r2
                    s = regex.partition(bar)
                    # Create the Bar Tree
                    return BarTree(Leaf(s[0]), Leaf(s[2]))
Beispiel #12
0
def build_regex_tree(regex):
    ''' (str) -> set(str)
    The function takes a valid regex string and produces a regex tree in order
    to return the root.
    REQ: String must be a valid regex
    >>> build_regex_tree("((1.(0|1)*).2)")
    DotTree(DotTree(Leaf('1'), StarTree(BarTree(Leaf('0'), Leaf('1')))),
    Leaf('2'))
    >>> build_regex_tree("(1|2)")
    BarTree(Leaf('1'), Leaf('2'))
    >>> build_regex_tree("(1.2)")
    DotTree(Leaf('1'), Leaf('2'))
    >>>build_regex_tree("1****")
    StarTree(StarTree(StarTree(StarTree(Leaf('1')))))
    '''
    # Base Case
    if len(regex) <= 1:
        return Leaf(regex)

    # If Regex obtains a star in as the last element
    elif regex[-1] == "*":
        # Return the StarTree with the rest of the regex
        # without the last element
        return StarTree(build_regex_tree(regex[:-1]))

    # If regex is in the form of ( + r1 + |/. + r2 + )
    else:
        # Create counter variables
        break_location = 0
        bracket_count = 0
        length = len(regex)

        # Find the location to split the regex into r1 and r2 components
        # Search the elements in range of the regex
        # regex[1:-1] Accommodates for the outter brackets
        for element in range(len(regex[1:-1])):

            # If element is a left bracket
            if regex[1:-1][element] == bracket[0]:
                bracket_count += 1

            # If element is a right bracket
            elif regex[1:-1][element] == bracket[1]:
                bracket_count -= 1
            # If there is a set of brackets followed by a |.
            # Is the location where to split for components
            elif (bracket_count == 0 and regex[1:-1][element] in duo_regex):
                # accomadate for the outer bracket
                break_location += element + 1
        # If the node found after the set of brackets is a "|"
        if regex[break_location] == duo_regex[0]:
            # Return BarTree on the left side "r1" and the right side "r2"
            return BarTree(
                build_regex_tree(regex[1:break_location]),
                build_regex_tree(regex[break_location + 1:length - 1]))
        # If the node found after the set of brackets is a "."
        elif regex[break_location] == duo_regex[1]:
            # Return DotTree on the left side "r1" and the right side "r2"
            return DotTree(
                build_regex_tree(regex[1:break_location]),
                build_regex_tree(regex[break_location + 1:length - 1]))
Beispiel #13
0
def build_regex_tree(regex):
    '''
    (str) -> RegexTree

    This function takes a valid regex and build a tree correspoinding to its
    given permutation.

    REQ: regex must be string.

    Example:
    >>> build_regex_tree('(0*|1*)')
    BarTree(StarTree(Leaf('0')), StarTree(Leaf('1')))

    >>> build_regex_tree('((0.1).0)')
    DotTree(DotTree(Leaf('0'), Leaf('1')), Leaf('0'))

    >>> build_regex_tree('((1.(0|1)*).0)')
    DotTree(DotTree(Leaf('1'), StarTree(BarTree(Leaf('0'), Leaf('1')))),
            Leaf('0'))
    '''

    if len(regex) == 1:

        if regex in '012e':

            return Leaf(regex)

    if regex[-1] == '*':

        return StarTree(build_regex_tree(regex[:-1]))

    if regex[0] == '(' and regex[-1] == ')':

        num_left = 0
        num_right = 0
        o_index = 0
        bar = 0
        dot = 0

        for i in range(1, len(regex)):

            if regex[i] == '(':

                num_left += 1

            if regex[i] == ')':

                num_right += 1

            if num_left == num_right and num_left != 0 and i < len(regex):

                if i > len(regex) - 4:

                    o_index = regex[1:].index('(')

                elif regex[i + 1] == '*':

                    # the next char that is not a '*' must be a operator
                    for s in range(i + 2, len(regex)):

                        if regex[s] != '*':

                            o_index = s

                            break

                else:
                    o_index = i + 1

                break

        if o_index == 0:

            if '|' in regex:
                bar = regex.index('|')

            if '.' in regex:
                dot = regex.index('.')

            if not (bar == 0 and dot == 0) or (bar == 0 and dot == 0):

                o_index = max(bar, dot)

            # check left side and right side
        if regex[o_index] == '.':

            return DotTree(build_regex_tree(regex[1:o_index]),
                           build_regex_tree(regex[o_index + 1:-1]))

        else:

            return BarTree(build_regex_tree(regex[1:o_index]),
                           build_regex_tree(regex[o_index + 1:-1]))
Beispiel #14
0
def build_regex_tree(valid_r):
    ''' (str) -> RegexTree or subclass of RegexTree
    Return the root of the tree which has been tranformed from the valid regex
    expression valid_r.
    REQ: valid_r must be valid regex expression.
    >>> build_regex_tree('(1|(0*.e))')
    BarTree(Leaf('1'), DotTree(StarTree(Leaf('0')), Leaf('e')))
    >>> build_regex_tree('0***')
    StarTree(StarTree(StarTree(Leaf('0'))))
    '''
    # base case
    # if valid_r is the base regex which the length is one.
    # the form of 'base_regex'
    if valid_r in base_regex:
        root = Leaf(valid_r)
        return root
    # if the valid_r is the base regex with star(s).
    # the form of 'base_regex + star(s)'
    elif valid_r[0] in base_regex:
        # if there is with only one star
        l1 = Leaf(valid_r[0])
        root = StarTree(l1)
        # if there are more than one stars
        if len(valid_r) > 2:
            for i in range(len(valid_r) - 2):
                root = StarTree(root)
        return root
    # the form of '(regex)'
    elif valid_r[0] == '(' and valid_r[-1] == ')':
        # if valid_r takes bar operation with two leaves,
        # the form of (base_regex + star(s) operation base_regex)
        if valid_r[1] in base_regex and valid_r[-2] in base_regex:
            r_c1 = Leaf(valid_r[-2])
            l_c_r1 = build_regex_tree(valid_r[1:-3])
            opr1 = valid_r[-3]
            if opr1 == '.':
                root = DotTree(l_c_r1, r_c1)
            elif opr1 == '|':
                root = BarTree(l_c_r1, r_c1)
            return root
        # if the left side of the bar is not in base regex and the right side
        # of the bar is in base regex,
        # the form of ((regex) + star(s) operation base_regex)
        elif valid_r[1] == '(' and valid_r[-2] in base_regex:
            r_c2 = Leaf(valid_r[-2])
            l_c_r2 = build_regex_tree(valid_r[1:-3])
            opr2 = valid_r[-3]
            if opr2 == '.':
                root = DotTree(l_c_r2, r_c2)
            elif opr2 == '|':
                root = BarTree(l_cr2, r_c2)
            return root
        # the form of ((regex) + star(s) operation (regex)/base_regex  (star(s))
        elif valid_r[1] == '(' and valid_r[-2] == star:
            # find the operation and base regex on the left side
            ind = 0
            found = False
            while ind < len(valid_r) - 1 and not found:
                if valid_r[-ind - 2] in base_regex or valid_r[-ind - 2] == ')':
                    rr_index = -ind - 2
                    found = True
                ind += 1
            # the form of ((regex) operation(s) base_regex (star(s)))
            if valid_r[rr_index] in base_regex:
                r_c3 = build_regex_tree(valid_r[rr_index:-1])
                l_c_r3 = build_regex_tree(valid_r[1:rr_index - 1])
                opr3 = valid_r[rr_index - 1]
                if opr3 == '.':
                    root = DotTree(l_c_r3, r_c3)
                elif opr3 == '|':
                    root = BarTree(l_c_r3, r_c3)
                return root
            # the form of ((regex) operation(s) (r) + (star(s)))
            if valid_r[rr_index] == ')':
                seperated_list = seperate_parenthesis(valid_r[1:-1])
                l_c_r4 = build_regex_tree(seperate_list[0] +
                                          [seperate_list[1][:-1]])
                r_c_r4 = build_regex_tree(seperate_list[2] + seperate_list[3])
                opr4 = seperate_list[1][-1]
                if opr4 == '.':
                    root = DotTree(l_c_r4, r_c_r4)
                elif opr4 == '|':
                    root = BarTree(l_c_r4, r_c_r4)
                return root
        # if the right side of the bar is not in base regex and the left side
        # of the bar is in base_regex.
        # the form of (base_r operation(s) (r))
        elif valid_r[1] in base_regex and valid_r[-2] == ')':
            # find the parenthesis on the right side of the non star operation.
            j = 2
            found_right = False
            while j < len(valid_r) - 2 and not found_right:
                if valid_r[j] == '(':
                    found_right = True
                    right_ind = j
                j += 1
            l_c_r5 = build_regex_tree(valid_r[1:right_ind - 1])
            r_c_r5 = build_regex_tree(valid_r[right_ind:-1])
            opr5 = valid_r[right_ind - 1]
            if opr5 == '.':
                root = DotTree(l_c_r5, r_c_r5)
            elif opr5 == '|':
                root = BarTree(l_c_r5, r_c_r5)
            return root
        # the form of (base_r + operation(s) + (r)/base_r + star(s))
        elif valid_r[1] in base_regex and valid_r[-2] == star:
            k = 2
            found_r = False
            while k < len(valid_r) - 2 and not found_r:
                if valid_r[k] in non_star_oprs:
                    opr6_ind = k
                    found_r = True
                k += 1
            opr6 = valid_r[opr6_ind]
            right_r = valid_r[opr6_ind + 1]
            l_c6 = build_regex_tree(valid_r[1:opr6_ind])
            r_c6 = build_regex_tree(valid_r[opr6_ind + 1:-1])
            if opr6 == '.':
                root = DotTree(l_c6, r_c6)
            elif opr6 == '|':
                root = BarTree(l_c6, r_c6)
            return root
        # the form of ((r) + operations + (r))
        elif valid_r[1] == '(' and valid_r[-2] == ')':
            s_list = seperate_parenthesis(valid_r[1:-1])
            opr7 = s_list[1][-1]
            l_c_r7 = build_regex_tree(s_list[0])
            r_c_r7 = build_regex_tree(s_list[2])
            if opr7 == '.':
                root = DotTree(l_c_r7, r_c_r7)
            elif opr7 == '|':
                root = BarTree(l_c_r7, r_c_r7)
            return root
    # the form of (r)*
    elif valid_r[0] == '(' and valid_r[-1] == star:
        for i in range(len(valid_r)):
            if valid_r[-i - 1] == ')':
                r_ind = -i - 1
                c = build_regex_tree(valid_r[0:r_ind + 1])
                root = StarTree(c)
                if len(valid_r[r_ind:]) > 2:
                    for i in range(len(valid_r[r_ind:]) - 2):
                        root = StarTree(root)
                return root
Beispiel #15
0
                root = StarTree(c)
                if len(valid_r[r_ind:]) > 2:
                    for i in range(len(valid_r[r_ind:]) - 2):
                        root = StarTree(root)
                return root


if __name__ == '__main__':
    br1 = RegexTree('0', [])
    br2 = RegexTree('1', [])
    br3 = RegexTree('2', [])
    br4 = RegexTree('e', [])
    dt = DotTree(br1, br2)
    bt = BarTree(dt, br2)
    dt2 = DotTree(br1, br2)
    st = StarTree(dt)
    print('THEY MATCH -> ' + str(regex_match(st, '1111')))
    print('THEY MATCH -> ' + str(regex_match(dt, '01')))
    print('THEY MATCH -> ' + str(regex_match(bt, '01')))
    print('THEY MATCH -> ' + str(regex_match(dt2, '01')))
    print('THEY MATCH -> ' + str(regex_match(bt, '1')))
    #print('perm'+ str(all_regex_permutations('((1|0*).0)')))
    a = build_regex_tree('1')
    print(a)
    b = build_regex_tree('((1.0)|(0*.1))')
    print(b)
    c = build_regex_tree('(1*.0)')
    print(c)
    print(c.get_left_child().get_child().get_symbol())
    d = build_regex_tree('(1*|0***)')
    print(d)