def parse(fh): """This function parses a mode description and enters it into the 'blackboard.mode_description_db'. Once all modes are parsed they can be translated into 'real' modes and are located in 'blackboard.mode_db'. """ # NOTE: Catching of EOF happens in caller: parse_section(...) skip_whitespace(fh) mode_name = read_identifier( fh, OnMissingStr="Missing identifier at beginning of mode definition.") # NOTE: constructor does register this mode in the mode_db new_mode = ModeDescription(mode_name, SourceRef.from_FileHandle(fh)) # (*) inherited modes / option_db skip_whitespace(fh) dummy = fh.read(1) if dummy not in [":", "{"]: error.log("missing ':' or '{' after mode '%s'" % mode_name, fh) if dummy == ":": __parse_option_list(new_mode, fh) # (*) read in pattern-action pairs and events while __parse_element(new_mode, fh): pass
def __parse_base_mode_list(fh, new_mode): new_mode.base_modes = [] trailing_comma_f = False while 1 + 1 == 2: if check(fh, "{"): fh.seek(-1, 1); break elif check(fh, "<"): fh.seek(-1, 1); break skip_whitespace(fh) identifier = read_identifier(fh) if identifier == "": break new_mode.base_modes.append(identifier) trailing_comma_f = False if not check(fh, ","): break trailing_comma_f = True if trailing_comma_f: error_msg("Trailing ',' after base mode '%s'." % new_mode.base_modes[-1], fh, DontExitF=True, WarningF=True) elif len(new_mode.base_modes) != 0: # This check is a 'service' -- for those who follow the old convention pos = fh.tell() skip_whitespace(fh) dummy_identifier = read_identifier(fh) if dummy_identifier != "": error_msg("Missing separating ',' between base modes '%s' and '%s'.\n" \ % (new_mode.base_modes[-1], dummy_identifier) + \ "(The comma separator is mandatory since quex 0.53.1)", fh) fh.seek(pos)
def __parse_base_mode_list(fh, new_mode): new_mode.derived_from_list = [] trailing_comma_f = False while 1 + 1 == 2: if check(fh, "{"): fh.seek(-1, 1) break elif check(fh, "<"): fh.seek(-1, 1) break skip_whitespace(fh) identifier = read_identifier(fh) if identifier == "": break new_mode.derived_from_list.append(identifier) trailing_comma_f = False if not check(fh, ","): break trailing_comma_f = True if trailing_comma_f: error.warning( "Trailing ',' after base mode '%s'." % new_mode.derived_from_list[-1], fh) elif len(new_mode.derived_from_list) != 0: # This check is a 'service' -- for those who follow the old convention pos = fh.tell() skip_whitespace(fh) dummy_identifier = read_identifier(fh) if dummy_identifier != "": error.log("Missing separating ',' between base modes '%s' and '%s'.\n" \ % (new_mode.derived_from_list[-1], dummy_identifier) + \ "(The comma separator is mandatory since quex 0.53.1)", fh) fh.seek(pos)
def parse(fh, CodeFragmentName, ErrorOnFailureF=True, AllowBriefTokenSenderF=True, ContinueF=True): """RETURNS: An object of class CodeUser containing line number, filename, and the code fragment. None in case of failure. """ assert type(ErrorOnFailureF) == bool assert type(AllowBriefTokenSenderF) == bool skip_whitespace(fh) word = fh.read(2) if len(word) >= 1 and word[0] == "{": if len(word) > 1: fh.seek(-1, 1) # unput the second character return __parse_normal(fh, CodeFragmentName) elif AllowBriefTokenSenderF and word == "=>": return __parse_brief_token_sender(fh, ContinueF) elif not ErrorOnFailureF: fh.seek(-2,1) return None else: error.log("Missing code fragment after %s definition." % CodeFragmentName, fh)
def __parse_section(fh, descriptor, already_defined_list): global token_type_code_fragment_db assert type(already_defined_list) == list SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \ + token_type_code_fragment_db.keys() position = fh.tell() skip_whitespace(fh) word = read_identifier(fh) if word == "": fh.seek(position) if check(fh, "}"): fh.seek(position) return False error_msg("Missing token_type section ('standard', 'distinct', or 'union').", fh) verify_word_in_list(word, SubsectionList, "Subsection '%s' not allowed in token_type section." % word, fh) if word == "name": if not check(fh, "="): error_msg("Missing '=' in token_type 'name' specification.", fh) descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name(fh, "token_type") if not check(fh, ";"): error_msg("Missing terminating ';' in token_type 'name' specification.", fh) elif word == "inheritable": descriptor.open_for_derivation_f = True check_or_die(fh, ";") elif word == "noid": descriptor.token_contains_token_id_f = False; check_or_die(fh, ";") elif word == "file_name": if not check(fh, "="): error_msg("Missing '=' in token_type 'file_name' specification.", fh) descriptor.set_file_name(read_until_letter(fh, ";")) if not check(fh, ";"): error_msg("Missing terminating ';' in token_type 'file_name' specification.", fh) elif word in ["standard", "distinct", "union"]: if word == "standard": parse_standard_members(fh, word, descriptor, already_defined_list) elif word == "distinct": parse_distinct_members(fh, word, descriptor, already_defined_list) elif word == "union": parse_union_members(fh, word, descriptor, already_defined_list) if not check(fh, "}"): fh.seek(position) error_msg("Missing closing '}' at end of token_type section '%s'." % word, fh); elif word in token_type_code_fragment_db.keys(): fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) descriptor.__dict__[word] = fragment else: assert False, "This code section section should not be reachable because 'word'\n" + \ "was checked to fit in one of the 'elif' cases." return True
def parse(fh): """This function parses a mode description and enters it into the 'blackboard.mode_description_db'. Once all modes are parsed they can be translated into 'real' modes and are located in 'blackboard.mode_db'. """ # NOTE: Catching of EOF happens in caller: parse_section(...) skip_whitespace(fh) mode_name = read_identifier(fh, OnMissingStr="Missing identifier at beginning of mode definition.") # NOTE: constructor does register this mode in the mode_db new_mode = ModeDescription(mode_name, SourceRef.from_FileHandle(fh)) # (*) inherited modes / option_db skip_whitespace(fh) dummy = fh.read(1) if dummy not in [":", "{"]: error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh) if dummy == ":": __parse_option_list(new_mode, fh) # (*) read in pattern-action pairs and events while __parse_element(new_mode, fh): pass
def __parse_property_expression(stream, PropertyLetter, EqualConditionPossibleF=True): """Parses an expression of the form '\? { X [ = Y] }' where ? = PropertyLetter. If the '=' operator is present then two fields are returned first = left hand side, second = right hand side. Othewise an element is returned. """ assert len(PropertyLetter) == 1 assert type(PropertyLetter) == str assert type(EqualConditionPossibleF) == bool # verify '\?' x = stream.read(2) if x != "\\" + PropertyLetter: raise RegularExpressionException("Unicode property letter '\\%s' expected, received '%s'." % x) skip_whitespace(stream) x = stream.read(1) if x != "{": raise RegularExpressionException("Unicode property '\\%s' not followed by '{'." % PropertyLetter) content = __snap_until(stream, "}") fields = content.split("=") if len(fields) == 0: raise RegularExpressionException("Unicode property expression '\\%s{}' cannot have no content.") if len(fields) > 2: raise RegularExpressionException("Unicode property expression '\\%s' can have at maximum one '='.") if not EqualConditionPossibleF and len(fields) == 2: raise RegularExpressionException("Unicode property expression '\\%s' does not allow '=' conditions") return map(lambda x: x.strip(), fields)
def _parse_pattern(fh): name = read_identifier(fh, OnMissingStr="Missing identifier for pattern definition.") if blackboard.shorthand_db.has_key(name): error.log("Second definition of pattern '%s'.\n" % name + \ "Pattern names must be unique.", fh) skip_whitespace(fh) if check(fh, "}"): error.log("Missing regular expression for pattern definition '%s'." % \ name, fh) # No encoding transformation, here. Transformation happens after # expansion in a mode. pattern = regular_expression.parse(fh, AllowNothingIsFineF = True) if pattern.has_pre_or_post_context(): error.log("Pattern definition with pre- and/or post-context.\n" + \ "Pre- and Post-Contexts can only be defined inside mode definitions.", fh) state_machine = pattern.extract_sm() value = PatternShorthand(name, state_machine, SourceRef.from_FileHandle(fh), pattern.pattern_string()) return name, value
def __parse(fh, result, IndentationSetupF=False): """Parses pattern definitions of the form: [ \t] => grid 4; [:intersection([:alpha:], [\X064-\X066]):] => space 1; In other words the right hand side *must* be a character set. """ # NOTE: Catching of EOF happens in caller: parse_section(...) # while 1 + 1 == 2: skip_whitespace(fh) if check(fh, ">"): break # A regular expression state machine pattern, identifier, sr = __parse_definition_head(fh, result) if pattern is None and IndentationSetupF: error.log("Keyword '\\else' cannot be used in indentation setup.", fh) # '__parse_definition_head()' ensures that only identifiers mentioned in # 'result' are accepted. if not IndentationSetupF: value = read_value_specifier(fh, identifier, 1) result.specify(identifier, pattern, value, sr) else: result.specify(identifier, pattern, sr) if not check(fh, ";"): error.log("Missing ';' after '%s' specification." % identifier, fh) return result
def __parse_element(new_mode, fh): """Returns: False, if a closing '}' has been found. True, else. """ position = fh.tell() try: description = "pattern or event handler" skip_whitespace(fh) # NOTE: Do not use 'read_word' since we need to continue directly after # whitespace, if a regular expression is to be parsed. position = fh.tell() word = read_until_whitespace(fh) if word == "}": return False # -- check for 'on_entry', 'on_exit', ... if __parse_event(new_mode, fh, word): return True fh.seek(position) description = "start of mode element: regular expression" pattern = regular_expression.parse(fh) pattern.set_source_reference(SourceRef.from_FileHandle(fh, new_mode.name)) position = fh.tell() description = "start of mode element: code fragment for '%s'" % pattern.pattern_string() __parse_action(new_mode, fh, pattern.pattern_string(), pattern) except EndOfStreamException: fh.seek(position) error.error_eof(description, fh) return True
def __parse_element(new_mode, fh): """Returns: False, if a closing '}' has been found. True, else. """ position = fh.tell() try: description = "pattern or event handler" skip_whitespace(fh) # NOTE: Do not use 'read_word' since we need to continue directly after # whitespace, if a regular expression is to be parsed. position = fh.tell() word = read_until_whitespace(fh) if word == "}": return False # -- check for 'on_entry', 'on_exit', ... if __parse_event(new_mode, fh, word): return True fh.seek(position) description = "start of mode element: regular expression" pattern = regular_expression.parse(fh) pattern.set_source_reference(SourceRef.from_FileHandle(fh, new_mode.name)) position = fh.tell() description = "start of mode element: code fragment for '%s'" % pattern.pattern_string() __parse_action(new_mode, fh, pattern.pattern_string(), pattern) except EndOfStreamException: fh.seek(position) error_eof(description, fh) return True
def snap_set_expression(stream, PatternDict): assert stream.__class__.__name__ == "StringIO" \ or stream.__class__.__name__ == "file" __debug_entry("set_expression", stream) result = snap_property_set(stream) if result is not None: return result x = stream.read(2) if x == "\\C": return snap_case_folded_pattern(stream, PatternDict, NumberSetF=True) elif x == "[:": result = snap_set_term(stream, PatternDict) skip_whitespace(stream) x = stream.read(2) if x != ":]": raise RegularExpressionException("Missing closing ':]' for character set expression.\n" + \ "found: '%s'" % x) elif x[0] == "[": stream.seek(-1, 1) result = traditional_character_set.do(stream) elif x[0] == "{": stream.seek(-1, 1) result = snap_replacement(stream, PatternDict, StateMachineF=False) else: result = None return __debug_exit(result, stream)
def parse(fh, CodeFragmentName, ErrorOnFailureF=True, AllowBriefTokenSenderF=True, ContinueF=True): """RETURNS: An object of class CodeUser containing line number, filename, and the code fragment. None in case of failure. """ assert type(ErrorOnFailureF) == bool assert type(AllowBriefTokenSenderF) == bool skip_whitespace(fh) word = fh.read(2) if len(word) >= 1 and word[0] == "{": if len(word) > 1: fh.seek(-1, 1) # unput the second character return __parse_normal(fh, CodeFragmentName) elif AllowBriefTokenSenderF and word == "=>": return __parse_brief_token_sender(fh, ContinueF) elif not ErrorOnFailureF: fh.seek(-2, 1) return None else: error_msg( "Missing code fragment after %s definition." % CodeFragmentName, fh)
def parse(fh, mode_prep_prep_db): """This function parses a mode description and enters it into the 'blackboard.mode_prep_prep_db'. Modes are represented by Mode_PrepPrep objects. """ # NOTE: Catching of EOF happens in caller: parse_section(...) skip_whitespace(fh) mode_name = read_identifier( fh, OnMissingStr="Missing identifier at beginning of mode definition.") error.insight("Mode '%s'" % mode_name) # NOTE: constructor does register this mode in the mode_db new_mode = Mode_PrepPrep(mode_name, SourceRef.from_FileHandle(fh)) if new_mode.name in mode_prep_prep_db: error.log("Mode '%s' has been defined twice.\n" % new_mode.name, new_mode.sr, DontExitF=True) error.log("Earlier definition here.", mode_prep_prep_db[new_mode.name].sr) mode_prep_prep_db[new_mode.name] = new_mode # (*) inherited modes / option_db skip_whitespace(fh) dummy = fh.read(1) if dummy not in [":", "{"]: error.log("missing ':' or '{' after mode '%s'" % mode_name, fh) if dummy == ":": __parse_option_list(new_mode, fh) # (*) read in pattern-action pairs and events while __parse_element(new_mode, fh): pass
def _base_parse(self, fh, IndentationSetupF=False): """Parses pattern definitions of the form: [ \t] => grid 4; [:intersection([:alpha:], [\X064-\X066]):] => space 1; In other words the right hand side *must* be a character set. ADAPTS: result to contain parsing information. """ # NOTE: Catching of EOF happens in caller: parse_section(...) # while 1 + 1 == 2: skip_whitespace(fh) if check(fh, ">"): break # A regular expression state machine pattern, identifier, sr = _parse_definition_head(fh, self.identifier_list) if pattern is None and IndentationSetupF: error.log("Keyword '\\else' cannot be used in indentation setup.", fh) # '_parse_definition_head()' ensures that only identifiers mentioned in # 'result' are accepted. if self.requires_count(): count = _read_value_specifier(fh, identifier, 1) self.specify(identifier, pattern, count, sr) else: self.specify(identifier, pattern, sr) if not check(fh, ";"): error.log("Missing ';' after '%s' specification." % identifier, fh) return self.finalize()
def parse(fh): """This function parses a mode description and enters it into the 'mode_description_db'. Once all modes are parsed they can be translated into 'real' modes and are located in 'blackboard.mode_db'. """ # NOTE: Catching of EOF happens in caller: parse_section(...) skip_whitespace(fh) mode_name = read_identifier(fh) if mode_name == "": error_msg("missing identifier at beginning of mode definition.", fh) # NOTE: constructor does register this mode in the mode_db new_mode = ModeDescription(mode_name, fh.name, get_current_line_info_number(fh)) # (*) inherited modes / options skip_whitespace(fh) dummy = fh.read(1) if dummy not in [":", "{"]: error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh) if dummy == ":": __parse_option_list(new_mode, fh) # (*) read in pattern-action pairs and events while __parse_element(new_mode, fh): pass # (*) check for modes w/o pattern definitions if not new_mode.has_event_handler() and not new_mode.has_own_matches(): if new_mode.options["inheritable"] != "only": new_mode.options["inheritable"] = "only" error_msg("Mode without pattern and event handlers needs to be 'inheritable only'.\n" + \ "<inheritable: only> has been added automatically.", fh, DontExitF=True)
def snap_property_set(stream): position = stream.tell() x = stream.read(2) if x == "\\P": stream.seek(position) return property.do(stream) elif x == "\\N": stream.seek(position) return property.do_shortcut(stream, "N", "na") # UCS Property: Name elif x == "\\G": stream.seek(position) return property.do_shortcut(stream, "G", "gc") # UCS Property: General_Category elif x == "\\E": skip_whitespace(stream) if check(stream, "{") == False: error.log("Missing '{' after '\\E'.", stream) encoding_name = __snap_until(stream, "}").strip() result = codec_db.get_supported_unicode_character_set(encoding_name) if result is None: error.log("Error occured at this place.", stream) return result else: stream.seek(position) return None
def snap_curly_bracketed_expression(stream, PatternDict, Name, TriggerChar, MinN=1, MaxN=1): """Snaps a list of RE's in '{' and '}'. The separator between the patterns is whitespace. 'MinN' and 'MaxN' determine the number of expected patterns. Set 'MaxN=INTEGER_MAX' for an arbitrary number of patterns. RETURNS: result = list of patterns, if MinN <= len(result) <= MaxN else, the function sys.exit()-s. """ assert MinN <= MaxN assert MinN > 0 skip_whitespace(stream) # Read over the trigger character if not check(stream, "{"): error.log("Missing opening '{' after %s %s." % (Name, TriggerChar), stream) result = [] while 1 + 1 == 2: pattern = snap_expression(stream, PatternDict) if pattern is not None: result.append(pattern) if check(stream, "}"): break elif check_whitespace(stream): continue elif check(stream, "/") or check(stream, "$"): error.log( "Pre- or post contexts are not allowed in %s \\%s{...} expressions." % (Name, TriggerChar), stream) else: error.log( "Missing closing '}' %s in \\%s{...}." % (Name, TriggerChar), stream) if MinN != MaxN: if len(result) < MinN: error.log("At minimum %i pattern%s required between '{' and '}'" \ % (MinN, "" if MinN == 1 else "s"), stream) if len(result) > MaxN: error.log("At maximum %i pattern%s required between '{' and '}'" \ % (MaxN, "" if MaxN == 1 else "s"), stream) else: if len(result) != MinN: error.log("Exactly %i pattern%s required between '{' and '}'" \ % (MinN, "" if MinN == 1 else "s"), stream) def ensure_dfa(sm): if not sm.is_DFA_compliant(): return nfa_to_dfa.do(sm) else: return sm return [ensure_dfa(sm) for sm in result]
def snap_replacement(stream, PatternDict, StateMachineF=True): """Snaps a predefined pattern from the input string and returns the resulting state machine. """ skip_whitespace(stream) pattern_name = read_identifier(stream) if pattern_name == "": raise RegularExpressionException( "Pattern replacement expression misses identifier after '{'.") skip_whitespace(stream) if not check(stream, "}"): raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \ % pattern_name) error.verify_word_in_list( pattern_name, PatternDict.keys(), "Specifier '%s' not found in any preceeding 'define { ... }' section." % pattern_name, stream) reference = PatternDict[pattern_name] assert reference.__class__ == PatternShorthand # The replacement may be a state machine or a number set if StateMachineF: # Get a cloned version of state machine state_machine = reference.get_state_machine() assert isinstance(state_machine, DFA) # It is essential that state machines defined as patterns do not # have origins. Otherwise, the optimization of patterns that # contain pattern replacements might get confused and can # not find all optimizations. assert not state_machine.has_specific_acceptance_id() # A state machine, that contains pre- or post- conditions cannot be part # of a replacement. The addition of new post-contexts would mess up the pattern. ## if state_machine.has_pre_or_post_context(): ## error.log("Pre- or post-conditioned pattern was used in replacement.\n" + \ ## "Quex's regular expression grammar does not allow this.", stream) return state_machine else: # Get a cloned version of character set character_set = reference.get_character_set() if character_set is None: error.log( "Replacement in character set expression must be a character set.\n" "Specifier '%s' relates to a pattern state machine." % pattern_name, stream) if character_set.is_empty(): error.log( "Referenced character set '%s' is empty.\nAborted." % pattern_name, stream) return character_set
def _parse_function(fh): signature_str = read_until_character(fh, ":") signature = Signature.from_string(signature_str) skip_whitespace(fh) # The function body remains a string until it is parsed at expansion time. function_body = read_until_character("\n").strip() name = signature.function_name value = FunctionCall(signature, function_body, Sr=SourceRef.from_FileHandle(fh)) return name, value
def snap_set_term(stream, PatternDict): global special_character_set_db __debug_entry("set_term", stream) operation_list = ["union", "intersection", "difference", "inverse"] character_set_list = special_character_set_db.keys() skip_whitespace(stream) position = stream.tell() # if there is no following '(', then enter the 'snap_expression' block below word = read_identifier(stream) if word in operation_list: set_list = snap_set_list(stream, word, PatternDict) # if an error occurs during set_list parsing, an exception is thrown about syntax error L = len(set_list) result = set_list[0] if word == "inverse": # The inverse of multiple sets, is to be the inverse of the union of these sets. if L > 1: for character_set in set_list[1:]: result.unite_with(character_set) return __debug_exit( result.get_complement(Setup.buffer_codec.source_set), stream) if L < 2: raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \ "two sets to operate on them.") if word == "union": for set in set_list[1:]: result.unite_with(set) elif word == "intersection": for set in set_list[1:]: result.intersect_with(set) elif word == "difference": for set in set_list[1:]: result.subtract(set) elif word in character_set_list: reg_expr = special_character_set_db[word] result = traditional_character_set.do_string(reg_expr) elif word != "": error.verify_word_in_list(word, character_set_list + operation_list, "Unknown keyword '%s'." % word, stream) else: stream.seek(position) result = snap_set_expression(stream, PatternDict) return __debug_exit(result, stream)
def _read_value_specifier(fh, Keyword, Default=None): skip_whitespace(fh) value = read_integer(fh) if value is not None: return value # not a number received, is it an identifier? variable = read_identifier(fh) if variable != "": return variable elif Default is not None: return Default error.log("Missing integer or variable name after keyword '%s'." % Keyword, fh)
def read_character_code(fh): # NOTE: This function is tested with the regeression test for feature request 2251359. # See directory $QUEX_PATH/TEST/2251359. pos = fh.tell() start = fh.read(1) if start == "": fh.seek(pos); return -1 elif start == "'": # read an utf-8 char an get the token-id # Example: '+' if check(fh, "\\"): # snap_backslashed_character throws an exception if 'backslashed char' is nonsense. character_code = snap_backslashed_character.do(fh, ReducedSetOfBackslashedCharactersF=True) else: character_code = __read_one_utf8_code_from_stream(fh) if character_code is None: error.log("Missing utf8-character for definition of character code by character.", fh) elif fh.read(1) != '\'': error.log("Missing closing ' for definition of character code by character.", fh) return character_code if start == "U": if fh.read(1) != "C": fh.seek(pos); return -1 # read Unicode Name # Example: UC MATHEMATICAL_MONOSPACE_DIGIT_FIVE skip_whitespace(fh) ucs_name = __read_token_identifier(fh) if ucs_name == "": fh.seek(pos); return -1 # Get the character set related to the given name. Note, the size of the set # is supposed to be one. character_code = ucs_property_db.get_character_set("Name", ucs_name) if type(character_code) in [str, unicode]: error.verify_word_in_list(ucs_name, ucs_property_db["Name"].code_point_db, "The string %s\ndoes not identify a known unicode character." % ucs_name, fh) elif type(character_code) not in [int, long]: error.log("%s relates to more than one character in unicode database." % ucs_name, fh) return character_code fh.seek(pos) character_code = read_integer(fh) if character_code is not None: return character_code # Try to interpret it as something else ... fh.seek(pos) return -1
def snap_set_term(stream, PatternDict): global special_character_set_db __debug_entry("set_term", stream) operation_list = [ "union", "intersection", "difference", "inverse"] character_set_list = special_character_set_db.keys() skip_whitespace(stream) position = stream.tell() # if there is no following '(', then enter the 'snap_expression' block below word = read_identifier(stream) if word in operation_list: set_list = snap_set_list(stream, word, PatternDict) # if an error occurs during set_list parsing, an exception is thrown about syntax error L = len(set_list) result = set_list[0] if word == "inverse": # The inverse of multiple sets, is to be the inverse of the union of these sets. if L > 1: for character_set in set_list[1:]: result.unite_with(character_set) return __debug_exit(result.get_complement(Setup.buffer_codec.source_set), stream) if L < 2: raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \ "two sets to operate on them.") if word == "union": for set in set_list[1:]: result.unite_with(set) elif word == "intersection": for set in set_list[1:]: result.intersect_with(set) elif word == "difference": for set in set_list[1:]: result.subtract(set) elif word in character_set_list: reg_expr = special_character_set_db[word] result = traditional_character_set.do_string(reg_expr) elif word != "": verify_word_in_list(word, character_set_list + operation_list, "Unknown keyword '%s'." % word, stream) else: stream.seek(position) result = snap_set_expression(stream, PatternDict) return __debug_exit(result, stream)
def __parse_element(new_mode, fh): """Returns: False, if a closing '}' has been found. True, else. """ position = fh.tell() try: description = "pattern or event handler" skip_whitespace(fh) # NOTE: Do not use 'read_word' since we need to continue directly after # whitespace, if a regular expression is to be parsed. position = fh.tell() identifier = read_identifier(fh) if identifier == "keyword_list": return __parse_keyword_list(new_mode, fh) elif similarity.get(identifier, ["keyword_list", "key words"]) != -1: error.warning( "'%s' is similar to keyword 'keyword_list'.\n" "For clarity, use quotes." % identifier, fh) elif identifier == "brief": return __parse_brief(new_mode, fh) elif similarity.get(identifier, ["brief", "briefing", "briefly"]) != -1: error.warning( "'%s' is similar to keyword 'brief'.\n" "For clarity, use quotes." % identifier, fh) fh.seek(position) word = read_until_whitespace(fh) if word == "}": return False # -- check for 'on_entry', 'on_exit', ... elif __parse_event(new_mode, fh, word): return True fh.seek(position) description = "start of mode element: regular expression" pattern = regular_expression.parse(fh) pattern.set_source_reference( SourceRef.from_FileHandle(fh, new_mode.name)) position = fh.tell() description = "start of mode element: code fragment for '%s'" % pattern.pattern_string( ) __parse_action(new_mode, fh, pattern.pattern_string(), pattern) except EndOfStreamException: fh.seek(position) error.error_eof(description, fh) return True
def snap_replacement(stream, PatternDict, StateMachineF=True): """Snaps a predefined pattern from the input string and returns the resulting state machine. """ skip_whitespace(stream) pattern_name = read_identifier(stream) if pattern_name == "": raise RegularExpressionException("Pattern replacement expression misses identifier after '{'.") skip_whitespace(stream) if not check(stream, "}"): raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \ % pattern_name) verify_word_in_list(pattern_name, PatternDict.keys(), "Specifier '%s' not found in any preceeding 'define { ... }' section." % pattern_name, stream) reference = PatternDict[pattern_name] assert reference.__class__.__name__ == "PatternShorthand" # The replacement may be a state machine or a number set if StateMachineF: # Get a cloned version of state machine state_machine = reference.get_state_machine() assert isinstance(state_machine, StateMachine) # It is essential that state machines defined as patterns do not # have origins. Otherwise, the optimization of patterns that # contain pattern replacements might get confused and can # not find all optimizations. assert state_machine.has_origins() == False # A state machine, that contains pre- or post- conditions cannot be part # of a replacement. The addition of new post-contexts would mess up the pattern. ## if state_machine.has_pre_or_post_context(): ## error_msg("Pre- or post-conditioned pattern was used in replacement.\n" + \ ## "Quex's regular expression grammar does not allow this.", stream) return state_machine else: # Get a cloned version of character set character_set = reference.get_character_set() if character_set is None: error_msg("Replacement in character set expression must be a character set.\n" "Specifier '%s' relates to a pattern state machine." % pattern_name, stream) if character_set.is_empty(): error_msg("Referenced character set '%s' is empty.\nAborted." % pattern_name, stream) return character_set
def snap_curly_bracketed_expression(stream, PatternDict, Name, TriggerChar, MinN=1, MaxN=1): """Snaps a list of RE's in '{' and '}'. The separator between the patterns is whitespace. 'MinN' and 'MaxN' determine the number of expected patterns. Set 'MaxN=sys.maxint' for an arbitrary number of patterns. RETURNS: result = list of patterns. it holds: len(result) >= MinN len(result) <= MaxN if not, the function sys.exit()-s. """ assert MinN <= MaxN assert MinN > 0 skip_whitespace(stream) # Read over the trigger character if not check(stream, "{"): error_msg("Missing opening '{' after %s %s." % (Name, TriggerChar), stream) result = [] while 1 + 1 == 2: pattern = snap_expression(stream, PatternDict) if pattern is not None: result.append(pattern) if check(stream, "}"): break elif check_whitespace(stream): continue elif check(stream, "/") or check(stream, "$"): error_msg("Pre- or post contexts are not allowed in %s \\%s{...} expressions." % (Name, TriggerChar), stream) else: error_msg("Missing closing '}' %s in \\%s{...}." % (Name, TriggerChar), stream) if MinN != MaxN: if len(result) < MinN: error_msg("At minimum %i pattern%s required between '{' and '}'" \ % (MinN, "" if MinN == 1 else "s"), stream) if len(result) > MaxN: error_msg("At maximum %i pattern%s required between '{' and '}'" \ % (MaxN, "" if MaxN == 1 else "s"), stream) else: if len(result) != MinN: error_msg("Exactly %i pattern%s required between '{' and '}'" \ % (MinN, "" if MinN == 1 else "s"), stream) return result
def parse_token_id_definitions(fh, NamesOnlyF=False): # NOTE: Catching of EOF happens in caller: parse_section(...) # token_prefix = Setup.token_id_prefix token_prefix_plain = Setup.token_id_prefix_plain # i.e. without name space included if NamesOnlyF: db = {} else: db = blackboard.token_id_db skip_whitespace(fh) if not check(fh, "{"): error_msg("missing opening '{' for after 'token' section identifier.\n", fh) while check(fh, "}") == False: skip_whitespace(fh) candidate = read_identifier(fh, TolerantF=True) if candidate == "": error_msg("Missing valid token identifier." % candidate, fh) # -- check the name, if it starts with the token prefix paste a warning if candidate.find(token_prefix) == 0: error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \ "Token prefix is mounted automatically. This token id appears in the source\n" + \ "code as '%s%s'." % (token_prefix, candidate), \ fh, DontExitF=True) elif candidate.find(token_prefix_plain) == 0: error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \ "Token prefix is mounted automatically. This token id appears in the source\n" + \ "code as '%s%s'." % (token_prefix, candidate), \ fh, DontExitF=True) skip_whitespace(fh) if NamesOnlyF: db[token_prefix + candidate] = True if check(fh, ";") == False: error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \ "This is mandatory since Quex version 0.50.1.", fh) continue # Parse a possible numeric value after '=' numeric_value = None if check(fh, "="): skip_whitespace(fh) numeric_value = read_integer(fh) if numeric_value is None: error_msg("Missing number after '=' for token identifier '%s'." % candidate, fh) if check(fh, ";") == False: error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \ "This is mandatory since Quex version 0.50.1.", fh) db[candidate] = TokenInfo(candidate, numeric_value, Filename=fh.name, LineN=get_current_line_info_number(fh)) if NamesOnlyF: result = db.keys() result.sort() return result
def __parse_option_list(new_mode, fh): position = fh.tell() try: # ':' => inherited modes/options follow skip_whitespace(fh) __parse_base_mode_list(fh, new_mode) while __parse_option(fh, new_mode): pass except EndOfStreamException: fh.seek(position) error_msg("End of file reached while parsing options of mode '%s'." % new_mode.name, fh)
def __parse_option_list(new_mode, fh): position = fh.tell() try: # ':' => inherited modes/option_db follow skip_whitespace(fh) __parse_base_mode_list(fh, new_mode) while mode_option.parse(fh, new_mode): pass except EndOfStreamException: fh.seek(position) error_eof("mode '%s'." % new_mode.name, fh)
def __parse_option_list(new_mode, fh): position = fh.tell() try: # ':' => inherited modes/option_db follow skip_whitespace(fh) __parse_base_mode_list(fh, new_mode) while mode_option.parse(fh, new_mode): pass except EndOfStreamException: fh.seek(position) error.error_eof("mode '%s'." % new_mode.name, fh)
def __parse_skip_option(fh, new_mode, identifier): """A skipper 'eats' characters at the beginning of a pattern that belong to a specified set of characters. A useful application is most probably the whitespace skipper '[ \t\n]'. The skipper definition allows quex to implement a very effective way to skip these regions.""" pattern, trigger_set = regular_expression.parse_character_set(fh, ">") skip_whitespace(fh) if fh.read(1) != ">": error.log("missing closing '>' for mode option '%s'." % identifier, fh) elif trigger_set.is_empty(): error.log("Empty trigger set for skipper." % identifier, fh) return pattern, trigger_set
def __parse_option_list(new_mode, fh): position = fh.tell() try: # ':' => inherited modes/options follow skip_whitespace(fh) __parse_base_mode_list(fh, new_mode) while __parse_option(fh, new_mode): pass except EndOfStreamException: fh.seek(position) error_msg( "End of file reached while parsing options of mode '%s'." % new_mode.name, fh)
def __parse_action(new_mode, fh, pattern_str, pattern): position = fh.tell() try: skip_whitespace(fh) position = fh.tell() code_obj = code_fragment.parse(fh, "regular expression", ErrorOnFailureF=False) if code_obj is not None: new_mode.add_match(pattern_str, code_obj, pattern) return fh.seek(position) word = read_until_letter(fh, [";"]) if word == "PRIORITY-MARK": # This mark 'lowers' the priority of a pattern to the priority of the current # pattern index (important for inherited patterns, that have higher precedence). # The parser already constructed a state machine for the pattern that is to # be assigned a new priority. Since, this machine is not used, let us just # use its id. fh.seek(-1, 1) check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.") new_mode.add_match_priority(pattern_str, pattern, pattern.sm.get_id(), fh.name, get_current_line_info_number(fh)) elif word == "DELETION": # This mark deletes any pattern that was inherited with the same 'name' fh.seek(-1, 1) check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.") new_mode.add_match_deletion(pattern_str, pattern, fh.name, get_current_line_info_number(fh)) else: error_msg("Missing token '{', 'PRIORITY-MARK', 'DELETION', or '=>' after '%s'.\n" % pattern_str + \ "found: '%s'. Note, that since quex version 0.33.5 it is required to add a ';'\n" % word + \ "to the commands PRIORITY-MARK and DELETION.", fh) except EndOfStreamException: fh.seek(position) error_msg("End of file reached while parsing action code for pattern.", fh)
def __create_mode_transition_and_token_sender(fh, Command): assert Command in ["GOTO", "GOSUB", "GOUP"] position = fh.tell() LanguageDB = Setup.language_db target_mode = "" token_sender = "" if check(fh, "("): skip_whitespace(fh) if Command != "GOUP": target_mode = __read_token_identifier(fh) skip_whitespace(fh) if check(fh, ")"): token_sender = "" elif Command == "GOUP" or check(fh, ","): skip_whitespace(fh) token_name = __read_token_identifier(fh) skip_whitespace(fh) if check(fh, ","): error_msg( "Missing opening '(' after token name specification.\n" "Note, that since version 0.50.1 the syntax for token senders\n" "inside brief mode transitions is like:\n\n" " => GOTO(MYMODE, QUEX_TKN_MINE(Argument0, Argument1, ...));\n", fh) token_sender = __create_token_sender_by_token_name(fh, token_name) if check(fh, ")") == False: error_msg("Missing closing ')' or ',' after '%s'." % Command, fh) else: fh.seek(position) error_msg("Missing closing ')' or ',' after '%s'." % Command, fh) if check(fh, ";") == False: error_msg("Missing ')' or ';' after '%s'." % Command, fh) if Command in ["GOTO", "GOSUB"] and target_mode == "": error_msg( "Command %s requires at least one argument: The target mode." % Command, fh) # Code for mode change if Command == "GOTO": txt = LanguageDB.MODE_GOTO(target_mode) elif Command == "GOSUB": txt = LanguageDB.MODE_GOSUB(target_mode) else: txt = LanguageDB.MODE_GOUP() # Code for token sending txt += token_sender return txt
def parse_pattern_name_definitions(fh): """Parses pattern definitions of the form: WHITESPACE [ \t\n] IDENTIFIER [a-zA-Z0-9]+ OP_PLUS "+" That means: 'name' whitespace 'regular expression' whitespace newline. Comments can only be '//' nothing else and they have to appear at the beginning of the line. One regular expression can have more than one name, but one name can only have one regular expression. """ skip_whitespace(fh) if not check(fh, "{"): error.log("define region must start with opening '{'.", fh) while 1 + 1 == 2: skip_whitespace(fh) if check(fh, "}"): return # -- get the name of the pattern skip_whitespace(fh) pattern_name = read_identifier( fh, OnMissingStr="Missing identifier for pattern definition.") if blackboard.shorthand_db.has_key(pattern_name): error.log("Second definition of pattern '%s'.\n" % pattern_name + \ "Pattern names must be unique.", fh) skip_whitespace(fh) if check(fh, "}"): error.log("Missing regular expression for pattern definition '%s'." % \ pattern_name, fh) # A regular expression state machine # (No possible transformation into a particular codec whatever. # the state machines are transformed once, after they are expanded # as patterns in a mode.) pattern = regular_expression.parse(fh, AllowNothingIsFineF=True) if pattern.has_pre_or_post_context(): error.log("Pattern definition with pre- and/or post-context.\n" + \ "Pre- and Post-Contexts can only be defined inside mode definitions.", fh) state_machine = pattern.sm blackboard.shorthand_db[pattern_name] = \ PatternShorthand(pattern_name, state_machine, SourceRef.from_FileHandle(fh), pattern.pattern_string())
def parse_pattern_name_definitions(fh): """Parses pattern definitions of the form: WHITESPACE [ \t\n] IDENTIFIER [a-zA-Z0-9]+ OP_PLUS "+" That means: 'name' whitespace 'regular expression' whitespace newline. Comments can only be '//' nothing else and they have to appear at the beginning of the line. One regular expression can have more than one name, but one name can only have one regular expression. """ skip_whitespace(fh) if not check(fh, "{"): error_msg("define region must start with opening '{'.", fh) while 1 + 1 == 2: skip_whitespace(fh) if check(fh, "}"): return # -- get the name of the pattern skip_whitespace(fh) pattern_name = read_identifier(fh) if pattern_name == "": error_msg("Missing identifier for pattern definition.", fh) skip_whitespace(fh) if check(fh, "}"): error_msg("Missing regular expression for pattern definition '%s'." % \ pattern_name, fh) # A regular expression state machine # (No possible transformation into a particular codec whatever. # the state machines are transformed once, after they are expanded # as patterns in a mode.) regular_expression_str, pattern = \ regular_expression.parse(fh, AllowNothingIsFineF = True, AllowStateMachineTrafoF = False) if pattern.has_pre_or_post_context(): error_msg("Pattern definition with pre- and/or post-context.\n" + \ "Pre- and Post-Contexts can only be defined inside mode definitions.", fh) state_machine = pattern.sm blackboard.shorthand_db[pattern_name] = \ blackboard.PatternShorthand(pattern_name, state_machine, fh.name, get_current_line_info_number(fh), regular_expression_str)
def __create_mode_transition_and_token_sender(fh, Op): assert Op in ["GOTO", "GOSUB", "GOUP"] position = fh.tell() target_mode = "" token_sender = "" if check(fh, "("): skip_whitespace(fh) if Op != "GOUP": target_mode = __read_token_identifier(fh) skip_whitespace(fh) if check(fh, ")"): token_sender = "" elif Op == "GOUP" or check(fh, ","): skip_whitespace(fh) token_name = __read_token_identifier(fh) skip_whitespace(fh) if check(fh, ","): error.log("Missing opening '(' after token name specification.\n" "Note, that since version 0.50.1 the syntax for token senders\n" "inside brief mode transitions is like:\n\n" " => GOTO(MYMODE, QUEX_TKN_MINE(Argument0, Argument1, ...));\n", fh) token_sender = __create_token_sender_by_token_name(fh, token_name) if check(fh, ")") == False: error.log("Missing closing ')' or ',' after '%s'." % Op, fh) else: fh.seek(position) error.log("Missing closing ')' or ',' after '%s'." % Op, fh) if check(fh, ";") == False: error.log("Missing ')' or ';' after '%s'." % Op, fh) if Op in ["GOTO", "GOSUB"] and target_mode == "": error.log("Op %s requires at least one argument: The target mode." % Op, fh) # Code for mode change if Op == "GOTO": txt = Lng.MODE_GOTO(target_mode) elif Op == "GOSUB": txt = Lng.MODE_GOSUB(target_mode) else: txt = Lng.MODE_GOUP() # Code for token sending txt += token_sender return txt
def parse_pattern_name_definitions(fh): """Parses pattern definitions of the form: WHITESPACE [ \t\n] IDENTIFIER [a-zA-Z0-9]+ OP_PLUS "+" That means: 'name' whitespace 'regular expression' whitespace newline. Comments can only be '//' nothing else and they have to appear at the beginning of the line. One regular expression can have more than one name, but one name can only have one regular expression. """ skip_whitespace(fh) if not check(fh, "{"): error.log("define region must start with opening '{'.", fh) while 1 + 1 == 2: skip_whitespace(fh) if check(fh, "}"): return # -- get the name of the pattern skip_whitespace(fh) pattern_name = read_identifier(fh, OnMissingStr="Missing identifier for pattern definition.") if blackboard.shorthand_db.has_key(pattern_name): error.log("Second definition of pattern '%s'.\n" % pattern_name + \ "Pattern names must be unique.", fh) skip_whitespace(fh) if check(fh, "}"): error.log("Missing regular expression for pattern definition '%s'." % \ pattern_name, fh) # A regular expression state machine # (No possible transformation into a particular codec whatever. # the state machines are transformed once, after they are expanded # as patterns in a mode.) pattern = regular_expression.parse(fh, AllowNothingIsFineF = True) if pattern.has_pre_or_post_context(): error.log("Pattern definition with pre- and/or post-context.\n" + \ "Pre- and Post-Contexts can only be defined inside mode definitions.", fh) state_machine = pattern.sm blackboard.shorthand_db[pattern_name] = \ PatternShorthand(pattern_name, state_machine, SourceRef.from_FileHandle(fh), pattern.pattern_string())
def __parse_element(new_mode, fh): """Returns: False, if a closing '}' has been found. True, else. """ position = fh.tell() try: description = "Pattern or event handler name.\n" + \ "Missing closing '}' for end of mode" skip_whitespace(fh) # NOTE: Do not use 'read_word' since we need to continue directly after # whitespace, if a regular expression is to be parsed. position = fh.tell() word = read_until_whitespace(fh) if word == "}": return False # -- check for 'on_entry', 'on_exit', ... if __parse_event(new_mode, fh, word): return True fh.seek(position) description = "Start of mode element: regular expression" pattern_str, pattern = regular_expression.parse(fh) if new_mode.has_pattern(pattern_str): previous = new_mode.get_pattern_action_pair(pattern_str) error_msg("Pattern has been defined twice.", fh, DontExitF=True) error_msg("First defined here.", previous.action().filename, previous.action().line_n) position = fh.tell() description = "Start of mode element: code fragment for '%s'" % pattern_str __parse_action(new_mode, fh, pattern_str, pattern) except EndOfStreamException: fh.seek(position) error_msg("End of file reached while parsing %s." % description, fh) return True
def snap_property_set(stream): position = stream.tell() x = stream.read(2) if x == "\\P": stream.seek(position) return property.do(stream) elif x == "\\N": stream.seek(position) return property.do_shortcut(stream, "N", "na") # UCS Property: Name elif x == "\\G": stream.seek(position) return property.do_shortcut(stream, "G", "gc") # UCS Property: General_Category elif x == "\\E": skip_whitespace(stream) if check(stream, "{") == False: error_msg("Missing '{' after '\\E'.", stream) encoding_name = __snap_until(stream, "}").strip() return codec_db.get_supported_unicode_character_set(encoding_name, FH=stream) else: stream.seek(position) return None
def __parse_range_skipper_option(fh, identifier, new_mode): """A non-nesting skipper can contain a full fledged regular expression as opener, since it only effects the trigger. Not so the nested range skipper-see below. """ # Range state machines only accept 'strings' not state machines # Pattern: opener 'white space' closer 'white space' '>' skip_whitespace(fh) opener_pattern = regular_expression.parse_non_precontexted_pattern( fh, identifier, ">", AllowNothingIsFineF=False) _assert_pattern_constaints(opener_pattern, "Skip range opener", fh) skip_whitespace(fh) closer_pattern = regular_expression.parse_non_precontexted_pattern( fh, identifier, ">", AllowNothingIsFineF=True) _assert_pattern_constaints(closer_pattern, "Skip range closer", fh) opener_pattern.set_pattern_string("<%s open>" % identifier) closer_pattern.set_pattern_string("<%s close>" % identifier) # -- closer skip_whitespace(fh) if fh.read(1) != ">": error.log("missing closing '>' for mode option '%s'" % identifier, fh) return SkipRangeData(opener_pattern, closer_pattern)
def parse(fh): """Parses pattern definitions of the form: WHITESPACE [ \t\n] IDENTIFIER [a-zA-Z0-9]+ OP_PLUS "+" \function SOMETHING(sm = X, set = Y, number = N): That means: 'name' whitespace 'regular expression' whitespace newline. Comments can only be '//' nothing else and they have to appear at the beginning of the line. One regular expression can have more than one name, but one name can only have one regular expression. """ skip_whitespace(fh) if not check(fh, "{"): error.log("define region must start with opening '{'.", fh) while 1 + 1 == 2: skip_whitespace(fh) if check(fh, "}"): return # Get the name of the pattern skip_whitespace(fh) if check(fh, "\\function"): name, value = _parse_function(fh) else: name, value = _parse_pattern(fh) blackboard.shorthand_db[name] = value
def __parse_range_skipper_option(fh, identifier, new_mode): """A non-nesting skipper can contain a full fledged regular expression as opener, since it only effects the trigger. Not so the nested range skipper-see below. """ # Range state machines only accept 'strings' not state machines # Pattern: opener 'white space' closer 'white space' '>' skip_whitespace(fh) opener_pattern, opener_sequence = regular_expression.parse_character_string( fh, ">") skip_whitespace(fh) closer_pattern, closer_sequence = regular_expression.parse_character_string( fh, ">") # -- closer skip_whitespace(fh) if fh.read(1) != ">": error.log("missing closing '>' for mode option '%s'" % identifier, fh) elif len(opener_sequence) == 0: error.log("Empty sequence for opening delimiter.", fh) elif len(closer_sequence) == 0: error.log("Empty sequence for closing delimiter.", fh) return SkipRangeData(opener_pattern, opener_sequence, \ closer_pattern, closer_sequence)
def snap_set_list(stream, set_operation_name, PatternDict): __debug_entry("set_list", stream) skip_whitespace(stream) if stream.read(1) != "(": raise RegularExpressionException( "Missing opening bracket '%s' operation." % set_operation_name) set_list = [] while 1 + 1 == 2: skip_whitespace(stream) result = snap_set_term(stream, PatternDict) if result is None: raise RegularExpressionException( "Missing set expression list after '%s' operation." % set_operation_name) set_list.append(result) skip_whitespace(stream) tmp = stream.read(1) if tmp != ",": if tmp != ")": stream.seek(-1, 1) raise RegularExpressionException( "Missing closing ')' after after '%s' operation." % set_operation_name) return __debug_exit(set_list, stream)
def __parse_brief_token_sender(fh, ContinueF): # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); } position = fh.tell() try: skip_whitespace(fh) position = fh.tell() code = __parse_token_id_specification_by_character_code(fh) if code != -1: code = __create_token_sender_by_character_code(fh, code) else: skip_whitespace(fh) identifier = __read_token_identifier(fh) skip_whitespace(fh) if identifier in ["GOTO", "GOSUB", "GOUP"]: code = __create_mode_transition_and_token_sender( fh, identifier) else: code = __create_token_sender_by_token_name(fh, identifier) check_or_die(fh, ";") if len(code) != 0: if ContinueF: code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n" return CodeUser(code, SourceRef.from_FileHandle(fh)) else: return None except EndOfStreamException: fh.seek(position) error_eof("token", fh)
def __parse_brief_token_sender(fh, ContinueF): # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); } LanguageDB = Setup.language_db position = fh.tell() line_n = get_current_line_info_number(fh) + 1 try: skip_whitespace(fh) position = fh.tell() code = __parse_token_id_specification_by_character_code(fh) if code != -1: code = __create_token_sender_by_character_code(fh, code) else: skip_whitespace(fh) identifier = __read_token_identifier(fh) skip_whitespace(fh) if identifier in ["GOTO", "GOSUB", "GOUP"]: code = __create_mode_transition_and_token_sender(fh, identifier) else: code = __create_token_sender_by_token_name(fh, identifier) check_or_die(fh, ";") if code != "": if ContinueF: code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n" return UserCodeFragment(code, fh.name, line_n, LanguageDB) else: return None except EndOfStreamException: fh.seek(position) error_msg("End of file reached while parsing token shortcut.", fh)
def __parse_brief_token_sender(fh, ContinueF): # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); } position = fh.tell() try: skip_whitespace(fh) position = fh.tell() code = __parse_token_id_specification_by_character_code(fh) if code != -1: code = __create_token_sender_by_character_code(fh, code) else: skip_whitespace(fh) identifier = __read_token_identifier(fh) skip_whitespace(fh) if identifier in ["GOTO", "GOSUB", "GOUP"]: code = __create_mode_transition_and_token_sender(fh, identifier) else: code = __create_token_sender_by_token_name(fh, identifier) check_or_die(fh, ";") if len(code) != 0: if ContinueF: code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n" return CodeUser(code, SourceRef.from_FileHandle(fh)) else: return None except EndOfStreamException: fh.seek(position) error.error_eof("token", fh)
def read_option_start(fh): skip_whitespace(fh) # (*) base modes if fh.read(1) != "<": ##fh.seek(-1, 1) return None skip_whitespace(fh) identifier = read_identifier(fh, OnMissingStr="Missing identifer after start of mode option '<'").strip() skip_whitespace(fh) if fh.read(1) != ":": error.log("missing ':' after option name '%s'" % identifier, fh) skip_whitespace(fh) return identifier
def get_codec_transformation_info(Codec=None, FileName=None, FH=-1, LineN=None): """Provides the information about the relation of character codes in a particular coding to unicode character codes. It is provided in the following form: # Codec Values Unicode Values [ (Source0_Begin, Source0_End, TargetInterval0_Begin), (Source1_Begin, Source1_End, TargetInterval1_Begin), (Source2_Begin, Source2_End, TargetInterval2_Begin), ... ] Arguments FH and LineN correspond to the arguments of error_msg. """ assert Codec is not None or FileName is not None if FileName is not None: file_name = FileName else: distinct_codec = __get_distinct_codec_name_for_alias(Codec) file_name = __codec_db_path + "/%s.dat" % distinct_codec fh = open_file_or_die(file_name, "rb") # Read coding into data structure transformation_list = [] try: while 1 + 1 == 2: skip_whitespace(fh) source_begin = read_integer(fh) if source_begin is None: error_msg("Missing integer (source interval begin) in codec file.", fh) skip_whitespace(fh) source_size = read_integer(fh) if source_size is None: error_msg("Missing integer (source interval size) in codec file.", fh) skip_whitespace(fh) target_begin = read_integer(fh) if target_begin is None: error_msg("Missing integer (target interval begin) in codec file.", fh) source_end = source_begin + source_size transformation_list.append([source_begin, source_end, target_begin]) except EndOfStreamException: pass return transformation_list
def do(section_list, fh): """Parses a codec information file. The described codec can only be a 'static character length' encoding. That is every character in the code occupies the same number of bytes. RETURNS: [0] Set of characters in unicode which are covered by the described codec. [1] Range of values in the codec elements. """ source_set = NumberSet() drain_set = NumberSet() error_str = None try: while error_str is None: skip_whitespace(fh) source_begin = read_integer(fh) if source_begin is None: error_str = "Missing integer (source interval begin) in codec file." continue skip_whitespace(fh) source_size = read_integer(fh) if source_size is None: error_str = "Missing integer (source interval size) in codec file." continue skip_whitespace(fh) target_begin = read_integer(fh) if target_begin is None: error_str = "Missing integer (target interval begin) in codec file." continue source_end = source_begin + source_size list.append(section_list, [source_begin, source_end, target_begin]) source_set.add_interval(Interval(source_begin, source_end)) drain_set.add_interval(Interval(target_begin, target_begin + source_size)) except EndOfStreamException: pass return source_set, drain_set, error_str