def snap_expression(stream, PatternDict): """expression: term term | expression """ __debug_entry("expression", stream) # -- term result = snap_term(stream, PatternDict) if result == None: return __debug_exit(None, stream) # -- optional '|' if not check(stream, '|'): return __debug_exit(result, stream) position_1 = stream.tell() __debug_print("'|' (in expression)") # -- expression result_2 = snap_expression(stream, PatternDict) __debug_print("expression(in expression):", result_2) if result_2 == None: stream.seek(position_1) return __debug_exit(result, stream) result = parallelize.do([result, result_2]) return __debug_exit(construct.beautify(result), stream)
def snap_term(stream, PatternDict): """term: primary primary term """ __debug_entry("term", stream) # -- primary result = snap_primary(stream, PatternDict) __debug_print("##primary(in term):", result) if result == None: return __debug_exit(None, stream) position_1 = stream.tell() # -- optional 'term' result_2 = snap_term(stream, PatternDict) __debug_print("##term(in term):", result_2) if result_2 == None: stream.seek(position_1) return __debug_exit(result, stream) ## print "##1:", result.get_string(NormalizeF=False) ## print "##2:", result_2.get_string(NormalizeF=False) result = sequentialize.do([result, result_2], MountToFirstStateMachineF=True, CloneRemainingStateMachinesF=False) return __debug_exit(construct.beautify(result), stream)
def snap_expression(stream, PatternDict): """expression: term term | expression """ __debug_entry("expression", stream) # -- term result = snap_term(stream, PatternDict) if result == None: return __debug_exit(None, stream) # -- optional '|' if stream.read(1) != '|': stream.seek(-1, 1) return __debug_exit(result, stream) position_1 = stream.tell() __debug_print("'|' (in expression)") # -- expression result_2 = snap_expression(stream, PatternDict) __debug_print("expression(in expression):", result_2) if result_2 == None: stream.seek(position_1) return __debug_exit(result, stream) result = parallelize.do([result, result_2]) return __debug_exit(__beautify(result), stream)
def snap_set_term(stream, PatternDict): __debug_entry("set_term", stream) operation_list = ["union", "intersection", "difference", "inverse"] character_set_list = special_character_set_db().keys() skip_whitespace(stream) position = stream.tell() # if there is no following '(', then enter the 'snap_expression' block below word = read_identifier(stream) if word in operation_list: set_list = snap_set_list(stream, word, PatternDict) # if an error occurs during set_list parsing, an exception is thrown about syntax error L = len(set_list) result = set_list[0] if word == "inverse": # The inverse of multiple sets, is to be the inverse of the union of these sets. if L > 1: for character_set in set_list[1:]: result.unite_with(character_set) result = result.inverse() if Setup.get_character_value_limit() != -1: result.intersect_with( Interval(0, Setup.get_character_value_limit())) return __debug_exit(result, stream) if L < 2: raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \ "two sets to operate on them.") if word == "union": for set in set_list[1:]: result.unite_with(set) elif word == "intersection": for set in set_list[1:]: result.intersect_with(set) elif word == "difference": for set in set_list[1:]: result.subtract(set) elif word in character_set_list: result = special_character_set_db()[word] elif word != "": verify_word_in_list(word, character_set_list + operation_list, "Unknown keyword '%s'." % word, stream) else: stream.seek(position) result = snap_set_expression(stream, PatternDict) return __debug_exit(result, stream)
def snap_set_term(stream, PatternDict): __debug_entry("set_term", stream) operation_list = [ "union", "intersection", "difference", "inverse"] character_set_list = special_character_set_db().keys() skip_whitespace(stream) position = stream.tell() # if there is no following '(', then enter the 'snap_expression' block below word = read_identifier(stream) if word in operation_list: set_list = snap_set_list(stream, word, PatternDict) # if an error occurs during set_list parsing, an exception is thrown about syntax error L = len(set_list) result = set_list[0] if word == "inverse": # The inverse of multiple sets, is to be the inverse of the union of these sets. if L > 1: for character_set in set_list[1:]: result.unite_with(character_set) result = result.inverse() if Setup.get_character_value_limit() != -1: result.intersect_with(Interval(0, Setup.get_character_value_limit())) return __debug_exit(result, stream) if L < 2: raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \ "two sets to operate on them.") if word == "union": for set in set_list[1:]: result.unite_with(set) elif word == "intersection": for set in set_list[1:]: result.intersect_with(set) elif word == "difference": for set in set_list[1:]: result.subtract(set) elif word in character_set_list: result = special_character_set_db()[word] elif word != "": verify_word_in_list(word, character_set_list + operation_list, "Unknown keyword '%s'." % word, stream) else: stream.seek(position) result = snap_set_expression(stream, PatternDict) return __debug_exit(result, stream)
def snap_set_term(stream): __debug_entry("set_term", stream) skip_whitespace(stream) position = stream.tell() # if there is no following '(', then enter the 'snap_expression' block below try: word = read_until_non_letter(stream) stream.seek(-1, 1) # putback the non-letter except: word = "not a valid word" word = word.strip() if word in [ "union", "intersection", "difference", "inverse"]: set_list = snap_set_list(stream, word) # if an error occurs during set_list parsing, an exception is thrown about syntax error L = len(set_list) result = set_list[0] if word == "inverse": # The inverse of multiple sets, is to be the inverse of the union of these sets. if L > 1: for set in set_list[1:]: result.unite_with(set) result = result.inverse() return __debug_exit(result, stream) if L < 2: raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \ "two sets to operate on them.") if word == "union": for set in set_list[1:]: result.unite_with(set) elif word == "intersection": for set in set_list[1:]: result.intersect_with(set) elif word == "difference": for set in set_list[1:]: result.subtract(set) elif word in special_character_set_db.keys(): result = special_character_set_db[word] else: # try to snap an expression out of it stream.seek(position) result = snap_set_expression(stream) return __debug_exit(result, stream)
def snap_set_term(stream): __debug_entry("set_term", stream) skip_whitespace(stream) position = stream.tell() # if there is no following '(', then enter the 'snap_expression' block below try: word = read_until_non_letter(stream) stream.seek(-1, 1) # putback the non-letter except: word = "not a valid word" word = word.strip() if word in ["union", "intersection", "difference", "inverse"]: set_list = snap_set_list(stream, word) # if an error occurs during set_list parsing, an exception is thrown about syntax error L = len(set_list) result = set_list[0] if word == "inverse": # The inverse of multiple sets, is to be the inverse of the union of these sets. if L > 1: for set in set_list[1:]: result.unite_with(set) result = result.inverse() return __debug_exit(result, stream) if L < 2: raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \ "two sets to operate on them.") if word == "union": for set in set_list[1:]: result.unite_with(set) elif word == "intersection": for set in set_list[1:]: result.intersect_with(set) elif word == "difference": for set in set_list[1:]: result.subtract(set) elif word in special_character_set_db.keys(): result = special_character_set_db[word] else: # try to snap an expression out of it stream.seek(position) result = snap_set_expression(stream) return __debug_exit(result, stream)
def snap_set_expression(stream, PatternDict): assert stream.__class__.__name__ == "StringIO" \ or stream.__class__.__name__ == "file" __debug_entry("set_expression", stream) result = snap_property_set(stream) if result != None: return result x = stream.read(2) if x == "\\C": return case_fold_expression.do(stream, PatternDict, snap_set_expression=snap_set_expression) elif x == "[:": result = snap_set_term(stream, PatternDict) skip_whitespace(stream) x = stream.read(2) if x != ":]": raise RegularExpressionException("Missing closing ':]' for character set expression.\n" + \ "found: '%s'" % x) elif x[0] == "[": stream.seek(-1, 1) result = traditional_character_set.do(stream) elif x[0] == "{": stream.seek(-1, 1) result = snap_replacement(stream, PatternDict, StateMachineF=False) else: result = None return __debug_exit(result, stream)
def snap_set_expression(stream): assert stream.__class__.__name__ == "StringIO" \ or stream.__class__.__name__ == "file" __debug_entry("set_expression", stream) result = snap_property_set(stream) if result != None: return result x = stream.read(2) if x == "[:": result = snap_set_term(stream) skip_whitespace(stream) x = stream.read(2) if x != ":]": raise RegularExpressionException("Missing closing ':]' for character set expression.\n" + \ "found: '%s'" % x) elif x[0] == "[": stream.seek(-1, 1) result = traditional_character_set.do(stream) elif x == "\\P": stream.seek(-2, 1) result = property.do(stream) elif x == "\\N": stream.seek(-2, 1) result = property.do_shortcut(stream, "N", "na") # UCS Property: Name elif x == "\\G": stream.seek(-2, 1) result = property.do_shortcut(stream, "G", "gc") # UCS Property: General_Category else: result = None return __debug_exit(result, stream)
def snap_set_list(stream, set_operation_name): __debug_entry("set_list", stream) skip_whitespace(stream) if stream.read(1) != "(": raise RegularExpressionException( "Missing opening bracket '%s' operation." % set_operation_name) set_list = [] while 1 + 1 == 2: skip_whitespace(stream) result = snap_set_term(stream) if result == None: raise RegularExpressionException( "Missing set expression list after '%s' operation." % set_operation_name) set_list.append(result) skip_whitespace(stream) tmp = stream.read(1) if tmp != ",": if tmp != ")": stream.seek(-1, 1) raise RegularExpressionException( "Missing closing ')' after after '%s' operation." % set_operation_name) return __debug_exit(set_list, stream)
def snap_non_control_character(stream, PatternDict): __debug_entry("non-control characters", stream) # (*) read first character char_code = utf8.__read_one_utf8_code_from_stream(stream) if char_code == 0xFF: error_msg( "Character could not be interpreted as UTF8 code or End of File reached prematurely.", stream) result = StateMachine() result.add_transition(result.init_state_index, char_code, AcceptanceF=True) return __debug_exit(result, stream)
def snap_term(stream, PatternDict): """term: primary primary term """ __debug_entry("term", stream) # -- primary result = snap_primary(stream, PatternDict) __debug_print("##primary(in term):", result) if result == None: return __debug_exit(None, stream) position_1 = stream.tell() # -- optional 'term' result_2 = snap_term(stream, PatternDict) __debug_print("##term(in term):", result_2) if result_2 == None: stream.seek(position_1) return __debug_exit(result, stream) result = sequentialize.do([result, result_2], MountToFirstStateMachineF=True, CloneRemainingStateMachinesF=False) return __debug_exit(__beautify(result), stream)
def do(stream, PatternDict): trigger_set = snap_set_expression(stream, PatternDict) if trigger_set == None: raise RegularExpressionException("Regular Expression: character_set_expression called for something\n" + \ "that does not start with '[:', '[' or '\\P'") if trigger_set.is_empty(): raise RegularExpressionException("Regular Expression: Character set expression results in empty set.") # Create state machine that triggers with the trigger set to SUCCESS # NOTE: The default for the ELSE transition is FAIL. sm = StateMachine() sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True) return __debug_exit(sm, stream)
def do(stream): trigger_set = snap_set_expression(stream) if trigger_set == None: raise RegularExpressionException("Regular Expression: character_set_expression called for something\n" + \ "that does not start with '[:', '[' or '\\P'") if trigger_set.is_empty(): raise RegularExpressionException( "Regular Expression: Character set expression results in empty set." ) # Create state machine that triggers with the trigger set to SUCCESS # NOTE: The default for the ELSE transition is FAIL. sm = StateMachine() sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True) return __debug_exit(sm, stream)
def snap_conditional_expression(stream, PatternDict): """conditional expression: expression expression / expression = post conditioned expression expression / expression / = pre conditioned expression expression / expression / expression = pre and post conditioned expression TODO: <- ($8592) for pre-conditions -> ($8594) for post-conditions """ __debug_entry("conditional expression", stream) # -- expression pattern_0 = snap_expression(stream, PatternDict) if pattern_0 == None: return __debug_exit(None, stream) # -- '/' if stream.read(1) != '/': # (1) expression without pre and post condition stream.seek(-1, 1) # pattern_0 is already beautified by 'snap_expression()' result = __construct(pattern_0) return __debug_exit(result, stream) # -- expression pattern_1 = snap_expression(stream, PatternDict) if pattern_1 == None: return __debug_exit(pattern_0, stream) # -- '/' if stream.read(1) != '/': # (2) expression with only a post condition stream.seek(-1, 1) # NOTE: setup_post_context() marks state origins! result = __construct(pattern_0, post_context=pattern_1) return __debug_exit(result, stream) # -- expression pattern_2 = snap_expression(stream, PatternDict) if pattern_2 == None: # (3) expression with only a pre condition # NOTE: setup_pre_context() marks the state origins! result = __construct(pattern_1, pre_context=pattern_0) return __debug_exit(result, stream) # (4) expression with post and pre-condition result = __construct(pattern_1, pre_context=pattern_0, post_context=pattern_2) return __debug_exit(result, stream)
def snap_set_list(stream, set_operation_name, PatternDict): __debug_entry("set_list", stream) skip_whitespace(stream) if stream.read(1) != "(": raise RegularExpressionException("Missing opening bracket '%s' operation." % set_operation_name) set_list = [] while 1 + 1 == 2: skip_whitespace(stream) result = snap_set_term(stream, PatternDict) if result == None: raise RegularExpressionException("Missing set expression list after '%s' operation." % set_operation_name) set_list.append(result) skip_whitespace(stream) tmp = stream.read(1) if tmp != ",": if tmp != ")": stream.seek(-1, 1) raise RegularExpressionException("Missing closing ')' after after '%s' operation." % set_operation_name) return __debug_exit(set_list, stream)
def snap_non_control_characters(stream): """Snaps any 'non_control_character' using UTF8 encoding from the given string. Note, that in UTF8 a character may consist of more than one byte. Creates a state machine that contains solely one trigger for each character to a acceptance state. This function **concatinates** incoming characters, but **repetition** has preceedence over concatination, so it checks after each character whether it is followed by a repetition ('*', '+', '?', '{..}'). In such a case, the repetition of the character is appended. """ __debug_entry("non-control characters", stream) result = StateMachine() state_index = result.init_state_index # (*) read first character position = stream.tell() char_code = utf8.__read_one_utf8_code_from_stream(stream) while char_code != 0xFF: # (1) check against occurence of control characters # this needs to come **before** the backslashed character interpretation. # NOTE: A backslashed character can be a whitespace (for example '\n'). # (check against 0xFF to avoid overflow in function 'chr()') if char_code < 0xFF \ and (chr(char_code) in CONTROL_CHARACTERS or chr(char_code).isspace()): stream.seek(-1, 1) break # (2) treat backslashed characters if char_code == ord('\\'): stream.seek(-1, 1) trigger_set = character_set_expression.snap_property_set(stream) if trigger_set == None: stream.seek( 1, 1) # snap_property_set() leaves tream right before '\\' char_code = snap_backslashed_character.do(stream) if char_code == None: raise RegularExpressionException( "Backslash followed by unrecognized character code.") trigger_set = char_code else: trigger_set = char_code # (3) read next character position = stream.tell() next_char_code = utf8.__read_one_utf8_code_from_stream(stream) # -- check for repetition (repetition has preceedence over concatination) if next_char_code in [ord("+"), ord("*"), ord("?"), ord("{")]: # (*) create state machine that consist of a single transition tmp = StateMachine() tmp.add_transition(tmp.init_state_index, trigger_set, AcceptanceF=True) # -- repeat the single character state machine stream.seek(position) tmp_repeated = __snap_repetition_range(tmp, stream) # -- append it to the result (last state must be set to acceptance for concatenation) result.states[state_index].set_acceptance() result = sequentialize.do([result, tmp_repeated], MountToFirstStateMachineF=True) # as soon as there is repetition there might be more than one acceptance # state and thus simple concatination via 'add_transition' fails. # let us return and check treat the remaining chars # at the next call to this function. return __debug_exit(result, stream) else: # (*) add new transition from current state to a new state triggering # on the given character. state_index = result.add_transition(state_index, trigger_set) char_code = next_char_code # last character in the chain triggers an 'acceptance state' result.states[state_index].set_acceptance() return __debug_exit(result, stream)
def snap_primary(stream, PatternDict): """primary: " non_double_quote * " = character string [ non_rect_bracket_close ] = set of characters { identifier } = pattern replacement ( expression ) non_control_character+ = lonely characters primary repetition_cmd """ __debug_entry("primary", stream) x = stream.read(1) if x == "": return __debug_exit(None, stream) def eat_this(supposed_first_char, the_string): if len(the_string) < 1 or the_string[0] != supposed_first_char: raise RegularExpressionException("missing '%s'" % supposed_first_char + "\n" + \ "remaining string = '%s'" % the_string) return the_string[1:] # -- 'primary' primary if x == "\"": result = snap_character_string.do(stream) elif x == "[": stream.seek(-1, 1); result = character_set_expression.do(stream) elif x == "{": result = snap_replacement(stream, PatternDict) elif x == ".": result = create_ALL_BUT_NEWLINE_state_machine() elif x == "(": __start_position = stream.tell() result = snap_expression(stream, PatternDict) if stream.read(1) != ")": stream.seek(-1, 1) raise RegularExpressionException("missing closing ')' after expression. found '%s'" % stream.read()) if result == None: __expression_length = stream.tell() - __start_position stream.seek(__start_position) raise RegularExpressionException("expression in brackets has invalid syntax '%s'" % \ stream.read(__expression_length)) elif x.isspace(): # a lonestanding space ends the regular expression stream.seek(-1, 1) return __debug_exit(None, stream) elif x in ["*", "+", "?"]: raise RegularExpressionException("lonely operator '%s' without expression proceeding." % x) elif x not in CONTROL_CHARACTERS: # NOTE: The '\' is not inside the control characters---for a reason. # It is used to define for example character codes using '\x' etc. stream.seek(-1, 1) result = snap_non_control_characters(stream) else: # NOTE: This includes the '$' sign which means 'end of line' # because the '$' sign is in CONTROL_CHARACTERS, but is not checked # against. Thus, it it good to leave here on '$' because the # '$' sign is handled on the very top level. # this is not a valid primary stream.seek(-1, 1) return __debug_exit(None, stream) # -- optional repetition command? result_repeated = __snap_repetition_range(result, stream) ## print "##imr:", result.get_string(NormalizeF=False) if result_repeated != None: result = result_repeated return __debug_exit(__beautify(result), stream)
def snap_primary(stream, PatternDict): """primary: " non_double_quote * " = character string [ non_rect_bracket_close ] = set of characters { identifier } = pattern replacement ( expression ) non_control_character+ = lonely characters primary repetition_cmd """ __debug_entry("primary", stream) x = stream.read(1) lookahead = stream.read(1) if x != "" and lookahead != "": stream.seek(-1, 1) if x == "": return __debug_exit(None, stream) # -- 'primary' primary if x == "\"": result = snap_character_string.do(stream) elif x == "[": stream.seek(-1, 1) result = character_set_expression.do(stream, PatternDict) elif x == "{": result = snap_replacement(stream, PatternDict) elif x == ".": result = create_ALL_BUT_NEWLINE_state_machine() elif x == "(": result = snap_bracketed_expression(stream, PatternDict) elif x.isspace(): # a lonestanding space ends the regular expression stream.seek(-1, 1) return __debug_exit(None, stream) elif x in ["*", "+", "?"]: raise RegularExpressionException( "lonely operator '%s' without expression proceeding." % x) elif x == "\\": if lookahead == "C": stream.read(1) result = snap_case_folded_pattern(stream, PatternDict) else: stream.seek(-1, 1) trigger_set = character_set_expression.snap_property_set(stream) if trigger_set == None: stream.seek( 1, 1) # snap_property_set() leaves tream right before '\\' char_code = snap_backslashed_character.do(stream) if char_code == None: raise RegularExpressionException( "Backslash followed by unrecognized character code.") trigger_set = char_code result = StateMachine() result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) elif x not in CONTROL_CHARACTERS: # NOTE: The '\' is not inside the control characters---for a reason. # It is used to define for example character codes using '\x' etc. stream.seek(-1, 1) result = snap_non_control_character(stream, PatternDict) else: # NOTE: This includes the '$' sign which means 'end of line' # because the '$' sign is in CONTROL_CHARACTERS, but is not checked # against. Thus, it it good to leave here on '$' because the # '$' sign is handled on the very top level. # this is not a valid primary stream.seek(-1, 1) return __debug_exit(None, stream) # -- optional repetition command? result_repeated = __snap_repetition_range(result, stream) ## print "##imr:", result.get_string(NormalizeF=False) if result_repeated != None: result = result_repeated return __debug_exit(construct.beautify(result), stream)
def snap_non_control_characters(stream): """Snaps any 'non_control_character' using UTF8 encoding from the given string. Note, that in UTF8 a character may consist of more than one byte. Creates a state machine that contains solely one trigger for each character to a acceptance state. This function **concatinates** incoming characters, but **repetition** has preceedence over concatination, so it checks after each character whether it is followed by a repetition ('*', '+', '?', '{..}'). In such a case, the repetition of the character is appended. """ __debug_entry("non-control characters", stream) result = StateMachine() state_index = result.init_state_index # (*) read first character position = stream.tell() char_code = utf8.__read_one_utf8_code_from_stream(stream) while char_code != 0xFF: # (1) check against occurence of control characters # this needs to come **before** the backslashed character interpretation. # NOTE: A backslashed character can be a whitespace (for example '\n'). # (check against 0xFF to avoid overflow in function 'chr()') if char_code < 0xFF \ and (chr(char_code) in CONTROL_CHARACTERS or chr(char_code).isspace()): stream.seek(-1, 1) break # (2) treat backslashed characters if char_code == ord('\\'): stream.seek(-1, 1) trigger_set = character_set_expression.snap_property_set(stream) if trigger_set == None: stream.seek(1, 1) # snap_property_set() leaves tream right before '\\' char_code = snap_backslashed_character.do(stream) if char_code == None: raise RegularExpressionException("Backslash followed by unrecognized character code.") trigger_set = char_code else: trigger_set = char_code # (3) read next character position = stream.tell() next_char_code = utf8.__read_one_utf8_code_from_stream(stream) # -- check for repetition (repetition has preceedence over concatination) if next_char_code in [ord("+"), ord("*"), ord("?"), ord("{")]: # (*) create state machine that consist of a single transition tmp = StateMachine() tmp.add_transition(tmp.init_state_index, trigger_set, AcceptanceF=True) # -- repeat the single character state machine stream.seek(position) tmp_repeated = __snap_repetition_range(tmp, stream) # -- append it to the result (last state must be set to acceptance for concatenation) result.states[state_index].set_acceptance() result = sequentialize.do([result, tmp_repeated], MountToFirstStateMachineF=True) # as soon as there is repetition there might be more than one acceptance # state and thus simple concatination via 'add_transition' fails. # let us return and check treat the remaining chars # at the next call to this function. return __debug_exit(result, stream) else: # (*) add new transition from current state to a new state triggering # on the given character. state_index = result.add_transition(state_index, trigger_set) char_code = next_char_code # last character in the chain triggers an 'acceptance state' result.states[state_index].set_acceptance() return __debug_exit(result, stream)
def snap_primary(stream, PatternDict): """primary: " non_double_quote * " = character string [ non_rect_bracket_close ] = set of characters { identifier } = pattern replacement ( expression ) non_control_character+ = lonely characters primary repetition_cmd """ __debug_entry("primary", stream) x = stream.read(1) if x == "": return __debug_exit(None, stream) def eat_this(supposed_first_char, the_string): if len(the_string) < 1 or the_string[0] != supposed_first_char: raise RegularExpressionException("missing '%s'" % supposed_first_char + "\n" + \ "remaining string = '%s'" % the_string) return the_string[1:] # -- 'primary' primary if x == "\"": result = snap_character_string.do(stream) elif x == "[": stream.seek(-1, 1) result = character_set_expression.do(stream) elif x == "{": result = snap_replacement(stream, PatternDict) elif x == ".": result = create_ALL_BUT_NEWLINE_state_machine() elif x == "(": __start_position = stream.tell() result = snap_expression(stream, PatternDict) if stream.read(1) != ")": stream.seek(-1, 1) raise RegularExpressionException( "missing closing ')' after expression. found '%s'" % stream.read()) if result == None: __expression_length = stream.tell() - __start_position stream.seek(__start_position) raise RegularExpressionException("expression in brackets has invalid syntax '%s'" % \ stream.read(__expression_length)) elif x.isspace(): # a lonestanding space ends the regular expression stream.seek(-1, 1) return __debug_exit(None, stream) elif x in ["*", "+", "?"]: raise RegularExpressionException( "lonely operator '%s' without expression proceeding." % x) elif x not in CONTROL_CHARACTERS: # NOTE: The '\' is not inside the control characters---for a reason. # It is used to define for example character codes using '\x' etc. stream.seek(-1, 1) result = snap_non_control_characters(stream) else: # NOTE: This includes the '$' sign which means 'end of line' # because the '$' sign is in CONTROL_CHARACTERS, but is not checked # against. Thus, it it good to leave here on '$' because the # '$' sign is handled on the very top level. # this is not a valid primary stream.seek(-1, 1) return __debug_exit(None, stream) # -- optional repetition command? result_repeated = __snap_repetition_range(result, stream) ## print "##imr:", result.get_string(NormalizeF=False) if result_repeated != None: result = result_repeated return __debug_exit(__beautify(result), stream)