def get_test_application(counter_db, ReferenceP, CT): # Setup.buffer_element_specification_prepare() if codec == "utf_32_le" or codec == "ascii": Setup.buffer_codec_set(bc_factory.do("unicode"), LexatomSizeInBytes=4) elif codec == "utf_8": Setup.buffer_codec_set(bc_factory.do("utf8"), LexatomSizeInBytes=1) elif codec == "utf_16_le": Setup.buffer_codec_set(bc_factory.do("utf16"), LexatomSizeInBytes=2) else: Setup.buffer_codec_set(bc_factory.do(codec), LexatomSizeInBytes=1) # (*) Generate Code ccfactory = CountInfoMap.from_LineColumnCount(counter_db, Setup.buffer_codec.source_set, Lng.INPUT_P()) counter_function_name, \ counter_str = counter.get(ccfactory, "TEST_MODE") counter_str = counter_str.replace("static void", "void") # Make sure that the counter is implemented using reference pointer found_n = 0 for i, line in enumerate(counter_str.splitlines()): if line.find("reference_p") != -1: found_n += 1 if found_n == 3: break # [RP] Verify that a reference pointer has been used or not used according # to what was specified. # 1. place: definition, 2. place: reference pointer set, 3. place: add. if ReferenceP: assert found_n >= 3, "Counter has not been setup using a reference pointer." else: assert found_n == 0, "Counter has been setup using a reference pointer." open("./data/test.c", "wb").write("#include <data/check.h>\n\n" + counter_str) # (*) Compile os.system("rm -f test") compile_str = "gcc -Wall -Werror -I. -ggdb ./data/check.c ./data/test.c " \ + " -D__QUEX_OPTION_COUNTER" \ + " -DDEF_COUNTER_FUNCTION='%s' " % counter_function_name \ + " -DDEF_FILE_NAME='\"data/input.txt\"' " \ + " -DDEF_CHARACTER_TYPE=%s " % CT \ + " -o test" # + " -DDEF_DEBUG_TRACE " print "## %s" % compile_str os.system(compile_str)
def get_transition_function(iid_map, Codec): if Codec == "UTF8": Setup.buffer_codec_set(bc_factory.do("utf8"), 1) else: Setup.buffer_codec_set(bc_factory.do("unicode"), -1) sm = StateMachine.from_IncidenceIdMap(iid_map) dummy, sm = Setup.buffer_codec.do_state_machine(sm, beautifier) analyzer = analyzer_generator.do(sm, engine.CHARACTER_COUNTER) tm_txt = do_analyzer(analyzer) tm_txt = Lng.GET_PLAIN_STRINGS(tm_txt) tm_txt.append("\n") #label = dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.MATCH_FAILURE)) for character_set, iid in iid_map: tm_txt.append("%s return (int)%s;\n" % (Lng.LABEL(DoorID.incidence(iid)), iid)) tm_txt.append("%s return (int)-1;\n" % Lng.LABEL(DoorID.drop_out(-1))) return "".join(tm_txt)
def test(TestString): TestString = TestString.replace("\n", "\\n").replace("\t", "\\t") if "BeginOfLine" in sys.argv: TestString = "^%s" % TestString print ("expr. = " + TestString).replace("\n", "\\n").replace("\t", "\\t") pattern = core.do(TestString, {}) # Prepare transformation info according to choice. # Setup.buffer_element_specification_prepare() if "UTF8" in sys.argv: Setup.buffer_codec_set(bc_factory.do("utf8"), 1) else: Setup.buffer_codec_set(bc_factory.do("utf16"), 2) # Count pattern.prepare_count_info(counter_db, Setup.buffer_codec) print ("info = {\n %s\n}\n" % str(pattern.count_info()).replace("\n", "\n "))
def do(PatternActionPairList, TestStr, PatternDictionary={}, Language="ANSI-C-PlainMemory", QuexBufferSize=15, # DO NOT CHANGE! SecondPatternActionPairList=[], QuexBufferFallbackN=-1, ShowBufferLoadsF=False, AssertsActionvation_str="-DQUEX_OPTION_ASSERTS"): BufferLimitCode = 0 Setup.buffer_limit_code = BufferLimitCode Setup.buffer_codec_set(bc_factory.do("unicode", None), LexatomSizeInBytes=1) __Setup_init_language_database(Language) CompileOptionStr = "" computed_goto_f = False FullLanguage = Language if Language.find("StrangeStream") != -1: CompileOptionStr += " -DQUEX_OPTION_STRANGE_ISTREAM_IMPLEMENTATION " if Language.find("-CG") != -1: Language = Language.replace("-CG", "") CompileOptionStr += " -DQUEX_OPTION_COMPUTED_GOTOS " computed_goto_f = True if Language == "Cpp-Template": Language = "Cpp" # Shall template compression be used? Setup.compression_type_list = [ E_Compression.TEMPLATE ] Setup.compression_template_min_gain = 0 elif Language == "Cpp-Path": Language = "Cpp" Setup.compression_type_list = [ E_Compression.PATH ] elif Language == "Cpp-PathUniform": Language = "Cpp" Setup.compression_type_list = [ E_Compression.PATH_UNIFORM ] elif Language == "ANSI-C-PathTemplate": Language = "ANSI-C" Setup.compression_type_list = [ E_Compression.PATH, E_Compression.TEMPLATE ] Setup.compression_template_min_gain = 0 try: adapted_dict = {} for key, regular_expression in PatternDictionary.items(): string_stream = StringIO(regular_expression) pattern = regex.do(string_stream, adapted_dict) # It is ESSENTIAL that the state machines of defined patterns do not # have origins! Actually, there are not more than patterns waiting # to be applied in regular expressions. The regular expressions # can later be origins. assert pattern.sm.has_origins() == False adapted_dict[key] = PatternShorthand(key, pattern.sm) except RegularExpressionException, x: print "Dictionary Creation:\n" + repr(x)
def buffer_setup(self, LexatomTypeName, LexatomSizeInByte, BufferEncoding, BufferEncodingFileName=""): import quex.engine.state_machine.transformation.core as bc_factory encoding = bc_factory.do(BufferEncoding, BufferEncodingFileName) self.lexatom_set( Lexatom(self.language_db, encoding, LexatomTypeName, LexatomSizeInByte)) encoding.adapt_ranges_to_lexatom_type_range(self.lexatom.type_range) self.__buffer_encoding = encoding
def setup_buffer(BufferLimitCode): BufferLimitCode = 0 Setup.buffer_limit_code = BufferLimitCode Setup.buffer_element_specification_prepare() Setup.buffer_codec_set(bc_factory.do("unicode", None))
if len(sys.argv) < 2: print "Argument not acceptable, use --hwut-info" sys.exit(0) BS = int(sys.argv[1]) if BS not in [5, 6, 7, 8]: print "Argument not acceptable, use --hwut-info" sys.exit(0) Language = "Cpp" __Setup_init_language_database(Language) trigger_set = NumberSet([Interval(0x600, 0x700)]) Setup.buffer_codec_set(bc_factory.do("utf8"), 1) def make(TriggerSet, BufferSize): Language = "ANSI-C-from-file" code = create_character_set_skipper_code(Language, "", TriggerSet, QuexBufferSize=BufferSize, InitialSkipF=False, OnePassOnlyF=True) exe_name, tmp_file_name = compile(Language, code) return exe_name, tmp_file_name def core(Executable, BufferSize, TestStr):
#! /usr/bin/env python # vim: set fileencoding=utf8 : import sys import os sys.path.insert(0, os.environ["QUEX_PATH"]) import quex.input.regular_expression.engine as core import quex.engine.state_machine.transformation.core as bc_factory from quex.blackboard import setup as Setup Setup.buffer_limit_code = -1 Setup.path_limit_code = -1 # Setup.buffer_element_specification_prepare() Setup.bad_lexatom_detection_f = False Setup.buffer_codec_set(bc_factory.do("utf8"), 1) if "--hwut-info" in sys.argv: print "Transformations" sys.exit(0) def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}) pattern.transform(Setup.buffer_codec) pattern.mount_post_context_sm() pattern.mount_pre_context_sm() print "pattern\n", pattern.get_string(NormalizeF=True, Option="hex") test('µ/µ+/µ')
import quex.output.core.skipper.nested_range as nested_range_skipper import quex.output.core.skipper.indentation_counter as indentation_counter from quex.output.core.TEST.generator_test import * from quex.output.core.variable_db import variable_db from quex.output.core.TEST.generator_test import __Setup_init_language_database from quex.input.code.base import CodeFragment from quex.output.core.base import do_state_router from quex.engine.state_machine.core import StateMachine from quex.engine.analyzer.door_id_address_label import get_plain_strings from quex.input.files.counter import LineColumnCount_Default from quex.input.regular_expression.construct import Pattern import quex.engine.analyzer.engine_supply_factory as engine import quex.engine.state_machine.transformation.core as bc_factory # Setup.buffer_element_specification_prepare() Setup.buffer_codec_set(bc_factory.do("unicode", None), 1) class MiniAnalyzer: def __init__(self): self.reload_state = None self.engine_type = engine.FORWARD Analyzer = MiniAnalyzer() def __prepare(Language, TokenQueueF=False): end_str = ' printf("end\\n");\n' if not TokenQueueF: end_str += ' return false;\n'
def do( PatternActionPairList, TestStr, PatternDictionary={}, Language="ANSI-C-PlainMemory", QuexBufferSize=15, # DO NOT CHANGE! SecondPatternActionPairList=[], QuexBufferFallbackN=-1, ShowBufferLoadsF=False, AssertsActionvation_str="-DQUEX_OPTION_ASSERTS"): BufferLimitCode = 0 Setup.buffer_limit_code = BufferLimitCode Setup.buffer_codec_set(bc_factory.do("unicode", None), LexatomSizeInBytes=1) __Setup_init_language_database(Language) CompileOptionStr = "" computed_goto_f = False FullLanguage = Language if Language.find("StrangeStream") != -1: CompileOptionStr += " -DQUEX_OPTION_STRANGE_ISTREAM_IMPLEMENTATION " if Language.find("-CG") != -1: Language = Language.replace("-CG", "") CompileOptionStr += " -DQUEX_OPTION_COMPUTED_GOTOS " computed_goto_f = True if Language == "Cpp-Template": Language = "Cpp" # Shall template compression be used? Setup.compression_type_list = [E_Compression.TEMPLATE] Setup.compression_template_min_gain = 0 elif Language == "Cpp-Path": Language = "Cpp" Setup.compression_type_list = [E_Compression.PATH] elif Language == "Cpp-PathUniform": Language = "Cpp" Setup.compression_type_list = [E_Compression.PATH_UNIFORM] elif Language == "ANSI-C-PathTemplate": Language = "ANSI-C" Setup.compression_type_list = [ E_Compression.PATH, E_Compression.TEMPLATE ] Setup.compression_template_min_gain = 0 try: adapted_dict = {} for key, regular_expression in PatternDictionary.items(): string_stream = StringIO(regular_expression) pattern = regex.do(string_stream, adapted_dict) # It is ESSENTIAL that the state machines of defined patterns do not # have origins! Actually, there are not more than patterns waiting # to be applied in regular expressions. The regular expressions # can later be origins. assert pattern.sm.has_origins() == False adapted_dict[key] = PatternShorthand(key, pattern.sm) except RegularExpressionException, x: print "Dictionary Creation:\n" + repr(x)
#! /usr/bin/env python import sys import os sys.path.append(os.environ["QUEX_PATH"]) from StringIO import StringIO import quex.input.files.counter as counter import quex.engine.misc.error as error from quex.engine.misc.file_in import EndOfStreamException from quex.blackboard import setup as Setup import quex.engine.state_machine.transformation.core as bc_factory # Setup.buffer_element_specification_prepare() Setup.buffer_codec_set(bc_factory.do("unicode"), 1) if "--hwut-info" in sys.argv: print "Parse Counter Setup;" print "CHOICES: basic, twice, intersection, intersection-2, non-numeric;" sys.exit() # choice = sys.argv[1] count_n = 0 def test(Text): global count_n count_n += 1 if Text.find("\n") == -1: print "(%i) |%s|\n" % (count_n, Text) else: print "(%i)\n::\n%s\n::\n" % (count_n, Text)
def prepare(command_line, argv): """RETURN: True, if process needs to be started. False, if job is done. """ global Setup # (*) Classes and their namespace __setup_analyzer_class(Setup) __setup_token_class(Setup) __setup_token_id_prefix(Setup) __setup_lexeme_null(Setup) # Requires 'token_class_name_space' # (*) Output programming language Setup.language = Setup.language.upper() error.verify_word_in_list(Setup.language, output_language_db.keys(), "Programming language '%s' is not supported." % Setup.language) Setup.language_db = output_language_db[Setup.language] Setup.extension_db = global_extension_db[Setup.language] # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) if Setup.extension_db.has_key(Setup.output_file_naming_scheme) == False: error.log("File extension scheme '%s' is not provided for language '%s'.\n" \ % (Setup.output_file_naming_scheme, Setup.language) + \ "Available schemes are: %s." % repr(Setup.extension_db.keys())[1:-1]) if Setup.buffer_byte_order == "<system>": Setup.buffer_byte_order = sys.byteorder Setup.byte_order_is_that_of_current_system_f = True else: Setup.byte_order_is_that_of_current_system_f = False lexatom_size_in_byte = __prepare_buffer_element_specification(Setup) buffer_codec = bc_factory.do(Setup.buffer_codec_name, Setup.buffer_codec_file) Setup.buffer_codec_set(buffer_codec, lexatom_size_in_byte) # AFTER: Setup.buffer_codec_prepare() !!! if Setup.language not in ["DOT"]: prepare_file_names(Setup) type_info = global_character_type_db.get(Setup.buffer_lexatom_type) if type_info is not None and len(type_info) >= 4 \ and type_info[3] != -1 and Setup.buffer_lexatom_size_in_byte != -1 \ and type_info[3] != Setup.buffer_lexatom_size_in_byte: error.log("\nBuffer element type ('--bet' or '--buffer-element-type') was set to '%s'.\n" \ % Setup.buffer_lexatom_type \ + "It is well known to be of size %s[byte]. However, the buffer element size\n" \ % type_info[3] \ + "('-b' or '--buffer-element-type') was specified as '%s'.\n\n" \ % Setup.buffer_lexatom_size_in_byte \ + "Quex can continue, but the result is questionable.\n", \ DontExitF=True) Setup.converter_f = False if Setup.converter_iconv_f or Setup.converter_icu_f or len(Setup.converter_user_new_func) != 0: Setup.converter_f = True # The only case where no converter helper is required is where ASCII # (Unicode restricted to [0, FF] is used. Setup.converter_helper_required_f = True if Setup.converter_f == False and Setup.buffer_lexatom_size_in_byte == 1 and Setup.buffer_codec.name == "unicode": Setup.converter_helper_required_f = False validation.do(Setup, command_line, argv) if Setup.converter_ucs_coding_name == "": if global_character_type_db.has_key(Setup.buffer_lexatom_type): if Setup.buffer_byte_order == "little": index = 1 else: index = 2 Setup.converter_ucs_coding_name = global_character_type_db[Setup.buffer_lexatom_type][index] if len(Setup.token_id_foreign_definition) != 0: if len(Setup.token_id_foreign_definition) > 3: error.log("Option '--foreign-token-id-file' received > 3 followers.\n" "Found: %s" % str(Setup.token_id_foreign_definition)[1:-1]) if len(Setup.token_id_foreign_definition) > 1: Setup.token_id_foreign_definition_file_region_begin_re = \ __compile_regular_expression(Setup.token_id_foreign_definition[1], "token id region begin") if len(Setup.token_id_foreign_definition) > 2: Setup.token_id_foreign_definition_file_region_end_re = \ __compile_regular_expression(Setup.token_id_foreign_definition[2], "token id region end") Setup.token_id_foreign_definition_file = \ Setup.token_id_foreign_definition[0] CommentDelimiterList = [["//", "\n"], ["/*", "*/"]] token_id_file_parse(Setup.token_id_foreign_definition_file, CommentDelimiterList) # (*) Compression Types compression_type_list = [] for name, ctype in [("compression_template_f", E_Compression.TEMPLATE), ("compression_template_uniform_f", E_Compression.TEMPLATE_UNIFORM), ("compression_path_f", E_Compression.PATH), ("compression_path_uniform_f", E_Compression.PATH_UNIFORM)]: if command_line_args_defined(command_line, name): compression_type_list.append((command_line_arg_position(name), ctype)) compression_type_list.sort(key=itemgetter(0)) Setup.compression_type_list = map(lambda x: x[1], compression_type_list) # (*) return Setup ___________________________________________________________________ return True
CA_1 = CountAction(E_CharacterCountType.LINE, 1) CA_2 = CountAction(E_CharacterCountType.GRID, 2) CA_3 = CountAction(E_CharacterCountType.WHITESPACE, 3) CA_4 = CountAction(E_CharacterCountType.WHITESPACE, 4) # Mini Appendix Sm-s are generated during the test. appendix_sm_id_0 = 815L appendix_sm_id_1 = 4711L appendix_sm_id_2 = 33L #______________________________________________________________________________ # # CHOICE: --> encoding # --> column_n_per_code_unit # Setup.buffer_codec_set(bc_factory.do(encoding), LexatomSizeInBytes=1) event_handler = LoopEventHandlers(column_n_per_code_unit, MaintainLexemeF = False, LexemeEndCheckF = False, EngineType = engine.FORWARD, ReloadStateExtern = None, UserOnLoopExit = []) loop_map = [ LoopMapEntry(NS_A, CA_0, CA_0.get_incidence_id(), None), LoopMapEntry(NS_B, CA_1, CA_1.get_incidence_id(), appendix_sm_id_0, HasTransitionsF=True), LoopMapEntry(NS_C, CA_2, CA_2.get_incidence_id(), appendix_sm_id_0, HasTransitionsF=True), LoopMapEntry(NS_D, CA_3, CA_0.get_incidence_id(), appendix_sm_id_1), LoopMapEntry(NS_E, CA_4, CA_4.get_incidence_id(), appendix_sm_id_2, HasTransitionsF=False) ]
sys.path.insert(0, os.environ["QUEX_PATH"]) import quex.engine.state_machine.transformation.core as bc_factory from quex.engine.state_machine.transformation.TEST.helper import test_on_UCS_range from quex.engine.misc.interval_handling import Interval from quex.blackboard import setup as Setup, E_IncidenceIDs if "--hwut-info" in sys.argv: print "Table-Based Transformations (fixed size): Examplary Unicode Ranges" print "CHOICES: ascii, EBCDIC-CP-BE, arabic, cp037, cp1140, hebrew, iso8859_10, macgreek;" sys.exit() encoding_name = sys.argv[1] trafo = bc_factory.do(encoding_name) SourceInterval = Interval(0, 0x100) DrainInterval = Interval(0, 0x100) def transform_forward(X): global trafo interval = Interval(X, X+1) verdict_f, result = interval.transform_by_table(trafo) if not verdict_f: return None assert len(result) == 1 return result[0].begin backward_db = dict( (transform_forward(x), x) for x in range(SourceInterval.begin, SourceInterval.end)
CA_1 = CountAction(E_CharacterCountType.LINE, 1) CA_2 = CountAction(E_CharacterCountType.GRID, 2) CA_3 = CountAction(E_CharacterCountType.WHITESPACE, 3) CA_4 = CountAction(E_CharacterCountType.WHITESPACE, 4) # Mini Appendix Sm-s are generated during the test. appendix_sm_id_0 = 815L appendix_sm_id_1 = 4711L appendix_sm_id_2 = 33L #______________________________________________________________________________ # # CHOICE: --> encoding # --> column_n_per_code_unit # Setup.buffer_codec_set(bc_factory.do(encoding), LexatomSizeInBytes=1) event_handler = LoopEventHandlers(column_n_per_code_unit, MaintainLexemeF=False, LexemeEndCheckF=False, EngineType=engine.FORWARD, ReloadStateExtern=None, UserOnLoopExit=[]) loop_map = [ LoopMapEntry(NS_A, CA_0, CA_0.get_incidence_id(), None), LoopMapEntry(NS_B, CA_1, CA_1.get_incidence_id(), appendix_sm_id_0, HasTransitionsF=True),
from quex.engine.state_machine.core import StateMachine from quex.engine.analyzer.door_id_address_label import DoorID import quex.engine.analyzer.core as analyzer_generator import quex.engine.state_machine.algorithm.beautifier as beautifier from quex.engine.analyzer.door_id_address_label import dial_db from quex.engine.analyzer.transition_map import TransitionMap import quex.engine.state_machine.transformation.core as bc_factory from quex.blackboard import setup as Setup, \ E_MapImplementationType, \ E_IncidenceIDs, \ Lng from collections import defaultdict Setup.language_db = languages.db["C++"] Setup.buffer_lexatom_type = "uint32_t" Setup.buffer_codec_set(bc_factory.do("unicode", None), -1) dial_db.clear() if "--hwut-info" in sys.argv: print "Single State: Transition Code Generation;" print "CHOICES: A, B, C, A-UTF8, B-UTF8, C-UTF8;" sys.exit(0) choice, codec = { "A": ("A", ""), "B": ("B", ""), "C": ("C", ""), "A-UTF8": ("A", "UTF8"), "B-UTF8": ("B", "UTF8"), "C-UTF8": ("C", "UTF8"),