def snap_set_expression(stream, PatternDict): assert stream.__class__.__name__ == "StringIO" \ or stream.__class__.__name__ == "file" __debug_entry("set_expression", stream) result = snap_property_set(stream) if result is not None: return result x = stream.read(2) if x == "\\C": return snap_case_folded_pattern(stream, PatternDict, NumberSetF=True) elif x == "[:": result = snap_set_term(stream, PatternDict) skip_whitespace(stream) x = stream.read(2) if x != ":]": raise RegularExpressionException("Missing closing ':]' for character set expression.\n" + \ "found: '%s'" % x) elif x[0] == "[": stream.seek(-1, 1) result = traditional_character_set.do(stream) elif x[0] == "{": stream.seek(-1, 1) result = snap_replacement(stream, PatternDict, StateMachineF=False) else: result = None return __debug_exit(result, stream)
def snap_primary(stream, PatternDict): """primary: " non_double_quote * " = character string [ non_rect_bracket_close ] = set of characters { identifier } = pattern replacement ( expression ) non_control_character+ = lonely characters primary repetition_cmd """ __debug_entry("primary", stream) x = stream.read(1) lookahead = stream.read(1) if x != "" and lookahead != "": stream.seek(-1, 1) if x == "": return __debug_exit(None, stream) # -- 'primary' primary if x == "\"": result = snap_character_string.do(stream) elif x == "[": stream.seek(-1, 1) result = character_set_expression.do(stream, PatternDict) elif x == "{": result = snap_replacement(stream, PatternDict) elif x == ".": result = create_ALL_BUT_NEWLINE_state_machine() elif x == "(": result = snap_bracketed_expression(stream, PatternDict) elif x.isspace(): # a lonestanding space ends the regular expression stream.seek(-1, 1) return __debug_exit(None, stream) elif x in ["*", "+", "?"]: raise RegularExpressionException( "lonely operator '%s' without expression proceeding." % x) elif x == "\\": if lookahead == "C": stream.read(1) result = snap_case_folded_pattern(stream, PatternDict) elif lookahead == "R": result = get_expression_in_brackets(stream, PatternDict, "reverse operator", "R").get_inverse() elif lookahead == "A": result = get_expression_in_brackets(stream, PatternDict, "anti-pattern operator", "A") result.transform_to_anti_pattern() else: stream.seek(-1, 1) trigger_set = character_set_expression.snap_property_set(stream) if trigger_set is None: stream.seek( 1, 1) # snap_property_set() leaves tream right before '\\' char_code = snap_backslashed_character.do(stream) if char_code is None: raise RegularExpressionException( "Backslash followed by unrecognized character code.") trigger_set = char_code result = StateMachine() result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) elif x not in CONTROL_CHARACTERS: # NOTE: The '\' is not inside the control characters---for a reason. # It is used to define for example character codes using '\x' etc. stream.seek(-1, 1) result = snap_non_control_character(stream, PatternDict) else: # NOTE: This includes the '$' sign which means 'end of line' # because the '$' sign is in CONTROL_CHARACTERS, but is not checked # against. Thus, it it good to leave here on '$' because the # '$' sign is handled on the very top level. # this is not a valid primary stream.seek(-1, 1) return __debug_exit(None, stream) # -- optional repetition command? result_repeated = __snap_repetition_range(result, stream) if result_repeated is not None: result = result_repeated return __debug_exit(beautifier.do(result), stream)
def snap_primary(stream, PatternDict): """primary: " non_double_quote * " = character string [ non_rect_bracket_close ] = set of characters { identifier } = pattern replacement ( expression ) non_control_character+ = lonely characters primary repetition_cmd """ __debug_entry("primary", stream) x = stream.read(1); lookahead = stream.read(1); if x != "" and lookahead != "": stream.seek(-1, 1) if x == "": return __debug_exit(None, stream) # -- 'primary' primary if x == "\"": result = snap_character_string.do(stream) elif x == "[": stream.seek(-1, 1); result = character_set_expression.do(stream, PatternDict) elif x == "{": result = snap_replacement(stream, PatternDict) elif x == ".": result = create_ALL_BUT_NEWLINE_state_machine() elif x == "(": result = snap_bracketed_expression(stream, PatternDict) elif x.isspace(): # a lonestanding space ends the regular expression stream.seek(-1, 1) return __debug_exit(None, stream) elif x in ["*", "+", "?"]: raise RegularExpressionException("lonely operator '%s' without expression proceeding." % x) elif x == "\\": if lookahead == "C": stream.read(1) result = snap_case_folded_pattern(stream, PatternDict) elif lookahead == "R": result = get_expression_in_brackets(stream, PatternDict, "reverse operator", "R").get_inverse() elif lookahead == "A": result = get_expression_in_brackets(stream, PatternDict, "anti-pattern operator", "A") result.transform_to_anti_pattern() else: stream.seek(-1, 1) trigger_set = character_set_expression.snap_property_set(stream) if trigger_set is None: stream.seek(1, 1) # snap_property_set() leaves tream right before '\\' char_code = snap_backslashed_character.do(stream) if char_code is None: raise RegularExpressionException("Backslash followed by unrecognized character code.") trigger_set = char_code result = StateMachine() result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) elif x not in CONTROL_CHARACTERS: # NOTE: The '\' is not inside the control characters---for a reason. # It is used to define for example character codes using '\x' etc. stream.seek(-1, 1) result = snap_non_control_character(stream, PatternDict) else: # NOTE: This includes the '$' sign which means 'end of line' # because the '$' sign is in CONTROL_CHARACTERS, but is not checked # against. Thus, it it good to leave here on '$' because the # '$' sign is handled on the very top level. # this is not a valid primary stream.seek(-1, 1) return __debug_exit(None, stream) # -- optional repetition command? result_repeated = __snap_repetition_range(result, stream) if result_repeated is not None: result = result_repeated return __debug_exit(beautifier.do(result), stream)
def snap_primary(stream, PatternDict): """primary: " non_double_quote * " = character string [ non_rect_bracket_close ] = set of characters { identifier } = pattern replacement ( expression ) non_control_character+ = lonely characters primary repetition_cmd """ global SPECIAL_TERMINATOR __debug_entry("primary", stream) x = stream.read(1) if x == "": return __debug_exit(None, stream) # -- 'primary' primary if x == "\"": result = snap_character_string.do(stream) elif x == "[": stream.seek(-1, 1); result = snap_character_set_expression(stream, PatternDict) elif x == "{": result = snap_replacement(stream, PatternDict) elif x == ".": result = create_ALL_BUT_NEWLINE_state_machine(stream) elif x == "(": result = snap_bracketed_expression(stream, PatternDict) elif x.isspace(): # a lonestanding space ends the regular expression stream.seek(-1, 1) return __debug_exit(None, stream) elif x in ["*", "+", "?"]: raise RegularExpressionException("lonely operator '%s' without expression proceeding." % x) elif x == "\\": result = snap_command(stream, PatternDict) if result is None: stream.seek(-1, 1) trigger_set = snap_property_set(stream) if trigger_set is None: # snap the '\' stream.read(1) char_code = snap_backslashed_character.do(stream) if char_code is None: raise RegularExpressionException("Backslash followed by unrecognized character code.") trigger_set = char_code result = StateMachine() result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) elif x not in CONTROL_CHARACTERS and x != SPECIAL_TERMINATOR: # NOTE: The '\' is not inside the control characters---for a reason. # It is used to define for example character codes using '\x' etc. stream.seek(-1, 1) result = snap_non_control_character(stream, PatternDict) else: # NOTE: This includes the '$' sign which means 'end of line' # because the '$' sign is in CONTROL_CHARACTERS, but is not checked # against. Thus, it it good to leave here on '$' because the # '$' sign is handled on the very top level. # this is not a valid primary stream.seek(-1, 1) return __debug_exit(None, stream) # -- optional repetition command? result_repeated = __snap_repetition_range(result, stream) if result_repeated is not None: result = result_repeated # There's something going wrong with pseudo-ambigous post context # if we do not clean-up here. TODO: Investigate why? # See tests in generator/TEST directory. return __debug_exit(beautifier.do(result), stream)