def build_regex_tree(regex: str) -> "RegexTree": """ Takes a valid string form regex, and returns the RegexTree that corresponds to it. >>> build_regex_tree('0') Leaf('0') >>> build_regex_tree('0*') StarTree(Leaf('0')) >>> build_regex_tree('(0.1)') DotTree(Leaf('0'), Leaf('1')) >>> build_regex_tree('(1|0)') BarTree(Leaf('1'), Leaf('0')) >>> build_regex_tree('(0*|1*)') BarTree(StarTree(Leaf('0')), StarTree(Leaf('1'))) >>> build_regex_tree('((0.1).0)') DotTree(DotTree(Leaf('0'), Leaf('1')), Leaf('0')) """ if len(regex) == 1: return Leaf(regex) else: if regex[-1] == "*": return StarTree(build_regex_tree(regex[:-1])) else: #this then builds either a bar or a star using the regex_split #function used eariler in is_regex spl = regex_split(regex) if spl[1] == "|": return BarTree(build_regex_tree(spl[0]), build_regex_tree(spl[2])) else: return DotTree(build_regex_tree(spl[0]), build_regex_tree(spl[2]))
def build_regex_tree(regex: str) -> "RegexTree": """Given regex, a valid regular expression regex, build the corresponding regular expression tree and returns its root. >>> regex = "(0.1)*" >>> build_regex_tree(regex) StarTree(DotTree(Leaf('0'), Leaf('1'))) >>> regex = "(0*|(1.2)*)" >>> build_regex_tree(regex) BarTree(StarTree(Leaf('0')), StarTree(DotTree(Leaf('1'), Leaf('2')))) """ if regex in ('0', '1', '2', 'e'): return Leaf(regex) elif regex[-1] is '*': return StarTree(build_regex_tree(regex[:-1])) elif (regex[0] is '(' and regex[-1] is ')' and len(regex) is 5 and ('.' in regex or '|' in regex)): if '.' in regex: return DotTree(build_regex_tree(regex[1:2]), build_regex_tree(regex[3:4])) elif '|' in regex: return BarTree(build_regex_tree(regex[1:2]), build_regex_tree(regex[3:4])) elif regex[0] is '(' and regex[-1] is ')' and ('.' in regex or '|' in regex): if regex[1:-1][0] is '(': index = index_of_left_closing_bracket(regex[1:-1]) index_of_separator = index + 1 if regex[1:-1][index_of_separator] is '*': index_of_separator += 1 separator = regex[1:-1][index_of_separator] elif regex[1:-1][-1] is ')': index_of_first_open_bracket = regex[1:-1].find('(') index_of_separator = index_of_first_open_bracket - 1 separator = regex[1:-1][index_of_separator] else: if '(' in regex[1:-1]: index_of_first_open_bracket = regex[1:-1].find('(') index_of_separator = index_of_first_open_bracket - 1 separator = regex[1:-1][index_of_separator] else: index_of_separator = max(regex[1:-1].find('.'), regex[1:-1].find('|')) separator = regex[1:-1][index_of_separator] if separator is '.': return DotTree( build_regex_tree(regex[1:-1][:index_of_separator]), build_regex_tree(regex[1:-1][index_of_separator + 1:])) elif separator is '|': return BarTree( build_regex_tree(regex[1:-1][:index_of_separator]), build_regex_tree(regex[1:-1][index_of_separator + 1:]))
def build_regex_tree(regex): if regex in ('0', '1', '2', 'e'): return Leaf(regex) elif regex[-1] is '*': return StarTree(build_regex_tree(regex[:-1])) elif (regex[0] is '(' and regex[-1] is ')' and len(regex) is 5 and ('.' in regex or '|' in regex)): if '.' in regex: return DotTree(build_regex_tree(regex[1:2]), build_regex_tree(regex[3:4])) elif '|' in regex: return BarTree(build_regex_tree(regex[1:2]), build_regex_tree(regex[3:4])) elif regex[0] is '(' and regex[-1] is ')' and ('.' in regex or '|' in regex): if regex[1:-1][0] is '(': index = index_of_left_closing_bracket(regex[1:-1]) index_of_separator = index + 1 if regex[1:-1][index_of_separator] is '*': index_of_separator += 1 separator = regex[1:-1][index_of_separator] elif regex[1:-1][-1] is ')': index_of_first_open_bracket = regex[1:-1].find('(') index_of_separator = index_of_first_open_bracket - 1 separator = regex[1:-1][index_of_separator] else: if '(' in regex[1:-1]: index_of_first_open_bracket = regex[1:-1].find('(') index_of_separator = index_of_first_open_bracket - 1 separator = regex[1:-1][index_of_separator] else: index_of_separator = max(regex[1:-1].find('.'), regex[1:-1].find('|')) separator = regex[1:-1][index_of_separator] if separator is '.': return DotTree( build_regex_tree(regex[1:-1][:index_of_separator]), build_regex_tree(regex[1:-1][index_of_separator + 1:])) elif separator is '|': return BarTree( build_regex_tree(regex[1:-1][:index_of_separator]), build_regex_tree(regex[1:-1][index_of_separator + 1:]))
def tree_helper(regex_list, ternaries=['0', '1', '2', 'e']): ''' (list) -> RegexTree Takes in a list form of the regex and creates a tree recursively >>> tree_helper(['e']) Leaf('e') >>> tree_helper(['1','.','0']) DotTree(Leaf('1'), Leaf('0')) >>> tree_helper(['1','|',['2','.','e'],'*']) BarTree(Leaf('1'), StarTree(DotTree(Leaf('2'), Leaf('e')))) ''' # base check if there are dots or bars in the # regex if '.' in regex_list: # make a dot node # everything left of the dot is left, right of # dot is right result = DotTree(tree_helper(regex_list[:regex_list.index('.')]), tree_helper(regex_list[regex_list.index('.') + 1:])) # check for bars elif '|' in regex_list: # make a bar node # everything left of the bar is left, right of # bar is right result = BarTree(tree_helper(regex_list[:regex_list.index('|')]), tree_helper(regex_list[regex_list.index('|') + 1:])) # check for stars elif '*' in regex_list: # make a star node, everything to the left of it is its child result = StarTree(tree_helper(regex_list[:regex_list.index('*')])) # if we've gone through all the dots and bars else: # if its a list if isinstance(regex_list[0], list): # call the tree helper on that list result = tree_helper(regex_list[0]) # if its a symbol elif regex_list[0] in ternaries: # make a new leaf node result = Leaf(regex_list[0]) # return the result return result
def build_regex_tree(regex): '''(str) -> RegexTree REQ: str must be a valid regex Takes in a str that is a valid regex and creates a RegexTree for the regex and returns the root of it >>>build_regex_tree('1*') StarTree(Leaf('1')) >>>build_regex_tree('(1|0)*') StarTree(BarTree(Leaf('1'), Leaf('0'))) >>>build_regex_tree('(2*.0)') DotTree(StarTree(Leaf('2')), Leaf('0')) >>>build_regex_tree('e*') StarTree(Leaf('e')) >>>build_regex_tree('((1.0)|(2.1))') BarTree(DotTree(Leaf('1'), Leaf('0')), DotTree(Leaf('2'), Leaf('1'))) >>>build_regex_tree("((0|1*)|((2*.1*).(e*.0)))") BarTree(BarTree(Leaf('0'), StarTree(Leaf('1'))), DotTree(DotTree(StarTree(Leaf('2')), StarTree(Leaf('1'))), DotTree(StarTree(Leaf('e')), Leaf('0')))) ''' # Checks if the len is 0, 1, 2, or e if len(regex) == 1: # Calling Leaf function from RegexTree because regex has no children return Leaf(regex) # To check if the regex has a '*' at the end elif regex[-1] == STAR: # Calling StarTree function from RegexTree because Regex ends with '*' return StarTree(build_regex_tree(regex[:len(regex)-1])) else: # Calling operationLocator helper function to determine what index # the operation is located at operation = operationLocator(regex) # If the main operation is a bar, it will call BarTree from RegexTree # because the index of the operation is a bar if regex[operation] == BAR: return BarTree(build_regex_tree(regex[1:operation]), build_regex_tree(regex[operation+1:len(regex)-1])) # This case checks if the operation is a dot, it will call DotTree # from RegexTree because the index of the operation is a dot else: return DotTree(build_regex_tree(regex[1:operation]), build_regex_tree(regex[operation+1:len(regex)-1]))
def build_regex_tree(regex): '''(string) -> RegexTree Builds a corresponding regex tree. REQ regex is a valid regular expression ''' if len(regex) == 1: root = Leaf(regex) elif regex[-1] == star: child = build_regex_tree(regex[:-1]) root = StarTree(child) else: # remove the brackets! nregex = regex[1:-1] r1, r2, symbol = split(nregex) childr1 = build_regex_tree(r1) childr2 = build_regex_tree(r2) if symbol == dot: root = DotTree(childr1, childr2) else: root = BarTree(childr1, childr2) return root
def build_regex_tree(regex): '''(str) -> Tree Given a regex inside a bracket, returns the node which represents that regex ''' #while loop used for going through the regex i = 0 #returns a list of list and str ret = [[], ''] #iterates through the regex while i < len(regex): #a close bracket marks the end of the regex if regex[i] == ')': return ret #an open bracket marks the start of the left child elif regex[i] == '(': ret[0] += build_regex_tree(regex[i + 1:]) #adds the number of items in the child + the number of brackets(2) #and an operator(1) i += len(ret[0][0]) + 3 #adds to the first sublist if it is a valid regex char elif regex[i] in '012e': ret[0] += regex[i] i += 1 #sets type by looking at operator elif regex[i] in '|.*': ret[1] = regex[i] i += 1 #bug, had to remove extra brackets ret = ret[0] #converting to Tree if ret[1] == '|': ret = BarTree(ret[0][0], ret[0][1]) elif ret[1] == '.': ret = DotTree(ret[0][0], ret[0][1]) elif ret[1] == '*': ret = StarTree(ret[0][0]) #returns Tree at the end return ret
def build_regex_tree(regex): '''(str) -> RegexTree takes a string regex, build a regex tree, and returns the root of the tree >>>build_regex_tree("(1.2)") == RegexTree('.', [Leaf('1'), Leaf('2')]) True >>>build_regex_tree("(1*|e)") == RegexTree('|', [StarTree(Leaf('1')), Leaf( 'e')]) True >>>build_regex_tree("(((e.1).2*)|((0***.e).(1|2*)))") == RegexTree('|', [ RegexTree('.', [RegexTree('.', [Leaf('e'), Leaf('1')]), StarTree(Leaf( '2'))]), RegexTree('.', [RegexTree('.', [StarTree(StarTree(StarTree(Leaf( '0')))), Leaf('e')]), RegexTree('|', [Leaf('1'), StarTree(Leaf('2'))])])]) True ''' degree = 0 # base case if length is 1,regex is a leaf since it does not have any child if len(regex) == 1: return Leaf(regex) # if it ends with a "*" elif regex != "" and regex[-1] == '*': # set the star to be the parent of a tree built with the rest of regex return StarTree(build_regex_tree(regex[:-1])) # if it is in brackets elif regex != "" and regex[0] == "(" and regex[-1] == ")": # loop and index of the dot or bar that's in the first brackets for i in range(len(regex)): if regex[i] == "(": degree += 1 elif regex[i] == ")": degree -= 1 if degree == 1 and (regex[i] == "." or regex[i] == '|'): I = i # recursion: remove the two brackets # divide to 2 parts at the dot or bar found and build trees # for each of the parts return RegexTree(regex[I], [build_regex_tree( regex[1:I]), build_regex_tree(regex[I+1:-1])])
def build_regex_tree(regex): '''(str) -> RegexTree Builds the RegexTree as specified by the valid regex parameter. Return this tree's root REQ: regex is a valid regex ''' # Length 1 regex if len(regex) == 1: return Leaf(regex) # '*' regex elif regex[-1] == '*': return StarTree(build_regex_tree(regex[:-1])) # '.' or '|' regex else: # Find the '' or '|' associated with the outer brackets and build # two children rooted to the '.' or '|' tree # Traverse by index. Upon finding '.' or '|' split the string at that # point and call the init with the two parts as parameters, # excluding the most outer brackets. If '(' is found, continue until # all '(' on the left side are found and an equal number of ')' are # found; the '.' or '|' afterwards is the associated operation. lr_bracket_balance = 0 count = 1 while count < len(regex) - 1: if regex[count] == '.' and lr_bracket_balance == 0: return DotTree(build_regex_tree(regex[1:count]), build_regex_tree(regex[count + 1: -1])) elif regex[count] == '|' and lr_bracket_balance == 0: return BarTree(build_regex_tree(regex[1:count]), build_regex_tree(regex[count + 1: -1])) elif regex[count] == '(': lr_bracket_balance += 1 elif regex[count] == ')': lr_bracket_balance -= 1 count += 1
def build_regex_tree(regex): """ (str) -> object This function takes in a regex and returns the root of the tree node. It returns None if the regex is not valid. >>>build_regex_tree("0") Leaf('0') >>>build_regex_tree("1") Leaf('1') >>>build_regex_tree("2") Leaf('2') >>>build_regex_tree("e") Leaf('e') >>>build_regex_tree("1*") StarTree(Leaf('1')) >>>build_regex_tree("(1|2)") BarTree(Leaf('1'), Leaf('2')) >>>build_regex_tree("(e.2)") DotTree(Leaf('e'), Leaf('2')) >>>build_regex_tree("(1|(0.2)*)") BarTree(Leaf('1'), StarTree(DotTree(Leaf('0'), Leaf('2')))) >>>build_regex_tree("(()") None >>>build_regex_tree("(1.(e*|(1*|2)))") DotTree(Leaf('1'), BarTree(StarTree(Leaf('e')), BarTree(StarTree(Leaf('1')), Leaf('2')))) """ # If the regex is not correct return None if (not (is_regex(regex))): return None # If the length is one then return the leaf of the particular SINGLE_CHAR if (len(regex) == 1): return Leaf(regex) # If the last char is a Star, Create a star tree elif (regex[-1] == '*'): # Make the child equal to the rest of the regex return StarTree(build_regex_tree(regex[:-1])) # If regex has a bracket else: # Check if the expression in the bracket requires a Binary Tree try: # Get the left expression, operation and the right expression (left, operation, right) = splicer(regex) # If error raised check if it was a StarTree or a Leaf except Exception: # Remove Brackets return build_regex_tree(regex[1:-1]) # Get the correct corresponding tree object tree = BINARY[operation] # Build the tree, also make the left and right children into trees return tree(build_regex_tree(left), build_regex_tree(right))
def build_regex_tree(regex): '''(str) -> Dot/Bar/Star/Leaf Tree REQ: regex must be valid This function will take the given valid regex string, then it will build the appropriate tree and finally it will return its root >>>build_regex_tree('1***') StarTree(StarTree(StarTree(Leaf('1')))) >>>build_regex_tree('(1|2)') BarTree(Leaf('1'), Leaf('2')) >>>build_regex_tree('(0.e)') DotTree(Leaf('0'), Leaf('e')) >>>build_regex_tree('(0.1)') DotTree(Leaf('0'), Leaf('1')) >>>build_regex_tree('((0.1).2)') DotTree(DotTree(Leaf('0'), Leaf('1')), Leaf('2')) >>>build_regex_tree('((0.1)|2)') BarTree(DotTree(Leaf('0'), Leaf('1')), Leaf('2')) >>>build_regex_tree('((0.1)|2*)') BarTree(DotTree(Leaf('0'), Leaf('1')), StarTree(Leaf('2'))) >>>build_regex_tree('((0.1)*|2*)') BarTree(StarTree(DotTree(Leaf('0'), Leaf('1'))), StarTree(Leaf('2'))) >>>build_regex_tree('((0.1)*|2*)*') StarTree(BarTree(StarTree(DotTree(Leaf('0'), Leaf('1'))), StarTree(Leaf('2')))) >>>build_regex_tree("((0.1)|(2.1))") BarTree(DotTree(Leaf('0'), Leaf('1')), DotTree(Leaf('2'), Leaf('1'))) >>>build_regex_tree("((0.1).(2.1))") DotTree(DotTree(Leaf('0'), Leaf('1')), DotTree(Leaf('2'), Leaf('1'))) >>>build_regex_tree("((0|1).(2.1))") DotTree(BarTree(Leaf('0'), Leaf('1')), DotTree(Leaf('2'), Leaf('1'))) >>>build_regex_tree("((0|1).(2.1))*") StarTree(DotTree(BarTree(Leaf('0'), Leaf('1')), DotTree(Leaf('2'), Leaf('1')))) >>>build_regex_tree('(0.(1.(0|2)*))') DotTree(Leaf('0'), DotTree(Leaf('1'), StarTree(BarTree(Leaf('0'), Leaf('2'))))) >>>build_regex_tree('(((0***.1)|2)|2***)') BarTree(BarTree(DotTree(StarTree(StarTree(StarTree(Leaf('0')))), Leaf('1')), Leaf('2')), StarTree(StarTree(StarTree(Leaf('2'))))) >>>build_regex_tree("(1***.(0|(2.(1|e)*)**)***)*") StarTree(DotTree(StarTree(StarTree(StarTree(Leaf('1')))), StarTree(StarTree(StarTree(BarTree(Leaf('0'), StarTree(StarTree(DotTree(Leaf('2'), StarTree(BarTree(Leaf('1'), Leaf('e')))))))))))) ''' # If we have a single letter then it has no children hence it will be # a leaf if len(regex) == 1: return Leaf(regex) # Otherwise we either have to create a Dot/Bar/Star Tree else: # If the last character is a star then we have to create a star tree if regex[-1] is star: return StarTree(build_regex_tree(regex[:-1])) else: # Remove the brackets regex = regex[1:-1] # If we have a left sided regex that means we have to divide the # string from the right hand side if regex[0] is left and regex[-1] is not right: # Find the position of the dot and the bar find_dot = regex.rfind(dot) find_bar = regex.rfind(bar) # If the dot is greater in index then partition it at the dot if find_dot > find_bar: s = regex.rpartition(dot) # Since we partition it at the dot, build a DotTree return (DotTree(build_regex_tree(s[0]), build_regex_tree(s[2]))) else: # Else build the BarTree s = regex.rpartition(bar) return (BarTree(build_regex_tree(s[0]), build_regex_tree(s[2]))) # If we have a right sided regex that means we have to divide the # string from the left hand side elif regex[-1] is right and regex[0] is not left: # Find the position of the dot and the bar find_dot = regex.find(dot) find_bar = regex.find(bar) # If the dot is lower in index then partition it at the dot if ((find_dot is not -1 and find_dot < find_bar) or (find_bar is -1)): s = regex.partition(dot) # Since we partition it at the dot, build a DotTree return (DotTree(build_regex_tree(s[0]), build_regex_tree(s[2]))) else: # Else build the BarTree s = regex.partition(bar) return (BarTree(build_regex_tree(s[0]), build_regex_tree(s[2]))) # If we have an even regex elif regex[0] is left and regex[-1] is right: # Set the counter i = 0 # Loop through the regex to find the position of either the # bar of the dot while i < len(regex): if regex[i] is right and regex[i+2] is left: index = i+1 # Once we get the index set i so we can exit loop i = len(regex) i += 1 # If there is bar at the given index then build a BarTree if regex[index] is bar: return (BarTree(build_regex_tree(regex[:index]), build_regex_tree(regex[index+1:]))) # Otherwise just build a DotTree else: return (DotTree(build_regex_tree(regex[:index]), build_regex_tree(regex[index+1:]))) # If either side do not contain a bracket elif regex[0] is not left and regex[-1] is not right: # Then check if the last char is a star if regex[-1] is star: # Find the dot and the bar since we will be spliting it # from the left side find_dot = regex.find(dot) find_bar = regex.find(bar) # If dot has lower index then bar if ((find_dot is not -1 and find_dot < find_bar) or (find_bar is -1)): s = regex.partition(dot) # Build a DotTree return (DotTree(build_regex_tree(s[0]), build_regex_tree(s[2]))) # Otherwise build a BarTree else: s = regex.partition(bar) return (BarTree(build_regex_tree(s[0]), build_regex_tree(s[2]))) # If the regex is of form (r1*.r2) or (r1*|r2) elif regex[1] is star: # Find the dot and the bar since we will be spliting it # from the right side find_dot = regex.rfind(dot) find_bar = regex.rfind(bar) # If the dot has higher index then bar if find_dot > find_bar: s = regex.rpartition(dot) # Build the DotTree return (DotTree(build_regex_tree(s[0]), build_regex_tree(s[2]))) else: s = regex.rpartition(bar) # Otherwise build the BarTree return (BarTree(build_regex_tree(s[0]), build_regex_tree(s[2]))) # Otherwise if we have regex of form (r1|r2) elif bar in regex: # Build a BarTree return (BarTree(build_regex_tree(regex[0]), build_regex_tree(regex[2]))) # Otherwise we have regex of form (r1.r2) elif dot in regex: # Build a DotTree return (DotTree(build_regex_tree(regex[0]), build_regex_tree(regex[2]))) else: # Look for the dot if dot in regex: # Seperate the string at the dot so we can recurse on r1 # and r2 s = regex.partition(dot) # Create the Dot Tree return DotTree(Leaf(s[0]), Leaf(s[2])) # Look for the bar elif bar in regex: # Seperate the string at the bar so we can recurse on r1 # and r2 s = regex.partition(bar) # Create the Bar Tree return BarTree(Leaf(s[0]), Leaf(s[2]))
def build_regex_tree(regex): ''' (str) -> set(str) The function takes a valid regex string and produces a regex tree in order to return the root. REQ: String must be a valid regex >>> build_regex_tree("((1.(0|1)*).2)") DotTree(DotTree(Leaf('1'), StarTree(BarTree(Leaf('0'), Leaf('1')))), Leaf('2')) >>> build_regex_tree("(1|2)") BarTree(Leaf('1'), Leaf('2')) >>> build_regex_tree("(1.2)") DotTree(Leaf('1'), Leaf('2')) >>>build_regex_tree("1****") StarTree(StarTree(StarTree(StarTree(Leaf('1'))))) ''' # Base Case if len(regex) <= 1: return Leaf(regex) # If Regex obtains a star in as the last element elif regex[-1] == "*": # Return the StarTree with the rest of the regex # without the last element return StarTree(build_regex_tree(regex[:-1])) # If regex is in the form of ( + r1 + |/. + r2 + ) else: # Create counter variables break_location = 0 bracket_count = 0 length = len(regex) # Find the location to split the regex into r1 and r2 components # Search the elements in range of the regex # regex[1:-1] Accommodates for the outter brackets for element in range(len(regex[1:-1])): # If element is a left bracket if regex[1:-1][element] == bracket[0]: bracket_count += 1 # If element is a right bracket elif regex[1:-1][element] == bracket[1]: bracket_count -= 1 # If there is a set of brackets followed by a |. # Is the location where to split for components elif (bracket_count == 0 and regex[1:-1][element] in duo_regex): # accomadate for the outer bracket break_location += element + 1 # If the node found after the set of brackets is a "|" if regex[break_location] == duo_regex[0]: # Return BarTree on the left side "r1" and the right side "r2" return BarTree( build_regex_tree(regex[1:break_location]), build_regex_tree(regex[break_location + 1:length - 1])) # If the node found after the set of brackets is a "." elif regex[break_location] == duo_regex[1]: # Return DotTree on the left side "r1" and the right side "r2" return DotTree( build_regex_tree(regex[1:break_location]), build_regex_tree(regex[break_location + 1:length - 1]))
def build_regex_tree(regex): ''' (str) -> RegexTree This function takes a valid regex and build a tree correspoinding to its given permutation. REQ: regex must be string. Example: >>> build_regex_tree('(0*|1*)') BarTree(StarTree(Leaf('0')), StarTree(Leaf('1'))) >>> build_regex_tree('((0.1).0)') DotTree(DotTree(Leaf('0'), Leaf('1')), Leaf('0')) >>> build_regex_tree('((1.(0|1)*).0)') DotTree(DotTree(Leaf('1'), StarTree(BarTree(Leaf('0'), Leaf('1')))), Leaf('0')) ''' if len(regex) == 1: if regex in '012e': return Leaf(regex) if regex[-1] == '*': return StarTree(build_regex_tree(regex[:-1])) if regex[0] == '(' and regex[-1] == ')': num_left = 0 num_right = 0 o_index = 0 bar = 0 dot = 0 for i in range(1, len(regex)): if regex[i] == '(': num_left += 1 if regex[i] == ')': num_right += 1 if num_left == num_right and num_left != 0 and i < len(regex): if i > len(regex) - 4: o_index = regex[1:].index('(') elif regex[i + 1] == '*': # the next char that is not a '*' must be a operator for s in range(i + 2, len(regex)): if regex[s] != '*': o_index = s break else: o_index = i + 1 break if o_index == 0: if '|' in regex: bar = regex.index('|') if '.' in regex: dot = regex.index('.') if not (bar == 0 and dot == 0) or (bar == 0 and dot == 0): o_index = max(bar, dot) # check left side and right side if regex[o_index] == '.': return DotTree(build_regex_tree(regex[1:o_index]), build_regex_tree(regex[o_index + 1:-1])) else: return BarTree(build_regex_tree(regex[1:o_index]), build_regex_tree(regex[o_index + 1:-1]))
def build_regex_tree(valid_r): ''' (str) -> RegexTree or subclass of RegexTree Return the root of the tree which has been tranformed from the valid regex expression valid_r. REQ: valid_r must be valid regex expression. >>> build_regex_tree('(1|(0*.e))') BarTree(Leaf('1'), DotTree(StarTree(Leaf('0')), Leaf('e'))) >>> build_regex_tree('0***') StarTree(StarTree(StarTree(Leaf('0')))) ''' # base case # if valid_r is the base regex which the length is one. # the form of 'base_regex' if valid_r in base_regex: root = Leaf(valid_r) return root # if the valid_r is the base regex with star(s). # the form of 'base_regex + star(s)' elif valid_r[0] in base_regex: # if there is with only one star l1 = Leaf(valid_r[0]) root = StarTree(l1) # if there are more than one stars if len(valid_r) > 2: for i in range(len(valid_r) - 2): root = StarTree(root) return root # the form of '(regex)' elif valid_r[0] == '(' and valid_r[-1] == ')': # if valid_r takes bar operation with two leaves, # the form of (base_regex + star(s) operation base_regex) if valid_r[1] in base_regex and valid_r[-2] in base_regex: r_c1 = Leaf(valid_r[-2]) l_c_r1 = build_regex_tree(valid_r[1:-3]) opr1 = valid_r[-3] if opr1 == '.': root = DotTree(l_c_r1, r_c1) elif opr1 == '|': root = BarTree(l_c_r1, r_c1) return root # if the left side of the bar is not in base regex and the right side # of the bar is in base regex, # the form of ((regex) + star(s) operation base_regex) elif valid_r[1] == '(' and valid_r[-2] in base_regex: r_c2 = Leaf(valid_r[-2]) l_c_r2 = build_regex_tree(valid_r[1:-3]) opr2 = valid_r[-3] if opr2 == '.': root = DotTree(l_c_r2, r_c2) elif opr2 == '|': root = BarTree(l_cr2, r_c2) return root # the form of ((regex) + star(s) operation (regex)/base_regex (star(s)) elif valid_r[1] == '(' and valid_r[-2] == star: # find the operation and base regex on the left side ind = 0 found = False while ind < len(valid_r) - 1 and not found: if valid_r[-ind - 2] in base_regex or valid_r[-ind - 2] == ')': rr_index = -ind - 2 found = True ind += 1 # the form of ((regex) operation(s) base_regex (star(s))) if valid_r[rr_index] in base_regex: r_c3 = build_regex_tree(valid_r[rr_index:-1]) l_c_r3 = build_regex_tree(valid_r[1:rr_index - 1]) opr3 = valid_r[rr_index - 1] if opr3 == '.': root = DotTree(l_c_r3, r_c3) elif opr3 == '|': root = BarTree(l_c_r3, r_c3) return root # the form of ((regex) operation(s) (r) + (star(s))) if valid_r[rr_index] == ')': seperated_list = seperate_parenthesis(valid_r[1:-1]) l_c_r4 = build_regex_tree(seperate_list[0] + [seperate_list[1][:-1]]) r_c_r4 = build_regex_tree(seperate_list[2] + seperate_list[3]) opr4 = seperate_list[1][-1] if opr4 == '.': root = DotTree(l_c_r4, r_c_r4) elif opr4 == '|': root = BarTree(l_c_r4, r_c_r4) return root # if the right side of the bar is not in base regex and the left side # of the bar is in base_regex. # the form of (base_r operation(s) (r)) elif valid_r[1] in base_regex and valid_r[-2] == ')': # find the parenthesis on the right side of the non star operation. j = 2 found_right = False while j < len(valid_r) - 2 and not found_right: if valid_r[j] == '(': found_right = True right_ind = j j += 1 l_c_r5 = build_regex_tree(valid_r[1:right_ind - 1]) r_c_r5 = build_regex_tree(valid_r[right_ind:-1]) opr5 = valid_r[right_ind - 1] if opr5 == '.': root = DotTree(l_c_r5, r_c_r5) elif opr5 == '|': root = BarTree(l_c_r5, r_c_r5) return root # the form of (base_r + operation(s) + (r)/base_r + star(s)) elif valid_r[1] in base_regex and valid_r[-2] == star: k = 2 found_r = False while k < len(valid_r) - 2 and not found_r: if valid_r[k] in non_star_oprs: opr6_ind = k found_r = True k += 1 opr6 = valid_r[opr6_ind] right_r = valid_r[opr6_ind + 1] l_c6 = build_regex_tree(valid_r[1:opr6_ind]) r_c6 = build_regex_tree(valid_r[opr6_ind + 1:-1]) if opr6 == '.': root = DotTree(l_c6, r_c6) elif opr6 == '|': root = BarTree(l_c6, r_c6) return root # the form of ((r) + operations + (r)) elif valid_r[1] == '(' and valid_r[-2] == ')': s_list = seperate_parenthesis(valid_r[1:-1]) opr7 = s_list[1][-1] l_c_r7 = build_regex_tree(s_list[0]) r_c_r7 = build_regex_tree(s_list[2]) if opr7 == '.': root = DotTree(l_c_r7, r_c_r7) elif opr7 == '|': root = BarTree(l_c_r7, r_c_r7) return root # the form of (r)* elif valid_r[0] == '(' and valid_r[-1] == star: for i in range(len(valid_r)): if valid_r[-i - 1] == ')': r_ind = -i - 1 c = build_regex_tree(valid_r[0:r_ind + 1]) root = StarTree(c) if len(valid_r[r_ind:]) > 2: for i in range(len(valid_r[r_ind:]) - 2): root = StarTree(root) return root
root = StarTree(c) if len(valid_r[r_ind:]) > 2: for i in range(len(valid_r[r_ind:]) - 2): root = StarTree(root) return root if __name__ == '__main__': br1 = RegexTree('0', []) br2 = RegexTree('1', []) br3 = RegexTree('2', []) br4 = RegexTree('e', []) dt = DotTree(br1, br2) bt = BarTree(dt, br2) dt2 = DotTree(br1, br2) st = StarTree(dt) print('THEY MATCH -> ' + str(regex_match(st, '1111'))) print('THEY MATCH -> ' + str(regex_match(dt, '01'))) print('THEY MATCH -> ' + str(regex_match(bt, '01'))) print('THEY MATCH -> ' + str(regex_match(dt2, '01'))) print('THEY MATCH -> ' + str(regex_match(bt, '1'))) #print('perm'+ str(all_regex_permutations('((1|0*).0)'))) a = build_regex_tree('1') print(a) b = build_regex_tree('((1.0)|(0*.1))') print(b) c = build_regex_tree('(1*.0)') print(c) print(c.get_left_child().get_child().get_symbol()) d = build_regex_tree('(1*|0***)') print(d)