def test(TestString): TestString = TestString.replace("\n", "\\n").replace("\t", "\\t") if "BeginOfLine" in sys.argv: TestString = "^%s" % TestString print("expr. = " + TestString).replace("\n", "\\n").replace("\t", "\\t") pattern = core.do(TestString, {}) # Prepare transformation info according to choice. # Setup.buffer_element_specification_prepare() if "UTF8" in sys.argv: Setup.buffer_setup("", 1, "utf8") else: Setup.buffer_setup("", 2, "utf16") # Count pattern = pattern.finalize(ca_map) print("info = {\n %s\n}\n" % str(pattern.lcci).replace("\n", "\n "))
def get_test_application(encoding, ca_map): # (*) Setup the buffer encoding ___________________________________________ # if encoding == "utf_32_le": byte_n_per_code_unit = 4 elif encoding == "ascii": byte_n_per_code_unit = 1 elif encoding == "utf_8": byte_n_per_code_unit = 1 elif encoding == "utf_16_le": byte_n_per_code_unit = 2 elif encoding == "cp737": byte_n_per_code_unit = 1 else: assert False Setup.buffer_setup("", byte_n_per_code_unit, encoding.replace("_le", "").replace("_", "")) Setup.analyzer_class_name = "Lexer" # (*) Generate Code _______________________________________________________ # counter_str = run_time_counter.get(ca_map, "TEST_MODE") counter_str = counter_str.replace("static void", "void") # Double check if reference delta counting has been implemented as expected. expect_reference_p_f = ca_map.get_column_number_per_code_unit() is not None assert_reference_delta_count_implementation(counter_str, expect_reference_p_f) counter_str = adapt.do(counter_str, "data", "") open("./data/test.c", "wb").write("#include <data/check.h>\n\n" + counter_str) # (*) Compile _____________________________________________________________ # counter_function_name = Lng.DEFAULT_COUNTER_FUNCTION_NAME("TEST_MODE") os.system("rm -f test") compile_str = "gcc -Wall -Werror -I. -ggdb ./data/check.c ./data/test.c " \ + " -DQUEX_OPTION_COUNTER" \ + " -DDEF_COUNTER_FUNCTION='%s' " % counter_function_name \ + " -DDEF_FILE_NAME='\"data/input.txt\"' " \ + " -DDEF_CHARACTER_TYPE=%s" % Setup.lexatom.type \ + " -o test" # + " -DDEF_DEBUG_TRACE " print "## %s" % compile_str os.system(compile_str)
def get_transition_function(iid_map, Codec): global dial_db if Codec == "UTF8": Setup.buffer_setup("uint8_t", 1, "utf8") else: Setup.buffer_setup("uint32_t", 4, "none") Setup.bad_lexatom_detection_f = False sm = DFA.from_IncidenceIdMap(iid_map) analyzer = analyzer_generator.do(sm, engine.CHARACTER_COUNTER, dial_db=dial_db, CutF=False) tm_txt = do_analyzer(analyzer) tm_txt = Lng.GET_PLAIN_STRINGS(tm_txt, dial_db=dial_db) tm_txt.append("\n") #label = dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.MATCH_FAILURE)) for character_set, iid in iid_map: tm_txt.append("%s return (int)%s;\n" % (Lng.LABEL(DoorID.incidence(iid, dial_db)), iid)) tm_txt.append("%s return (int)-1;\n" % Lng.LABEL(DoorID.drop_out(-1, dial_db))) return "".join(tm_txt)
def test_plug_sequence(ByteSequenceDB): L = len(ByteSequenceDB[0]) for seq in ByteSequenceDB: assert len(seq) == L for x in seq: assert isinstance(x, Interval) first_different_byte_index = -1 for i in range(L): x0 = ByteSequenceDB[0][i] for seq in ByteSequenceDB[1:]: if not seq[i].is_equal(x0): first_different_byte_index = i break if first_different_byte_index != -1: break if first_different_byte_index == -1: first_different_byte_index = 0 print "# Best To be Displayed by:" print "#" print "# > " + sys.argv[0] + " " + sys.argv[1] + " | dot -Tsvg -o tmp.svg" print "#" print "# -------------------------" print "# Byte Sequences: " i = -1 for seq in ByteSequenceDB: i += 1 print "# (%i) " % i, for x in seq: print " " + x.get_string(Option="hex"), print print "# L = %i" % L print "# DIdx = %i" % first_different_byte_index sm = DFA() end_index = state_machine.index.get() sm.states[end_index] = DFA_State() Setup.buffer_setup("", 1, "utf8") if Setup.bad_lexatom_detection_f: bad_lexatom_si = index.get() else: bad_lexatom_si = None trafo = Setup.buffer_encoding new_first_tm, \ new_state_db = trafo.plug_interval_sequences(sm.init_state_index, end_index, ByteSequenceDB, BadLexatomSi=bad_lexatom_si) if bad_lexatom_si is not None: new_first_tm[bad_lexatom_si] = trafo._error_range_by_code_unit_db[0] # Generate the 'bad lexatom accepter'. bad_lexatom_state = DFA_State(AcceptanceF=True) bad_lexatom_state.mark_acceptance_id(E_IncidenceIDs.BAD_LEXATOM) sm.states[bad_lexatom_si] = bad_lexatom_state first_tm = sm.get_init_state().target_map.get_map() if end_index in first_tm: del first_tm[end_index] first_tm.update(new_first_tm) sm.states.update(new_state_db) sm = beautifier.do(sm) if len(sm.get_orphaned_state_index_list()) != 0: print "Error: Orphaned States Detected!" # Double check, that there are no 'circles' predecessor_db = sm.get_predecessor_db() assert not any(si in predecessor_db[si] for si in sm.states) show_graphviz(sm)
def prepare(command_line, argv): """RETURN: True, if process needs to be started. False, if job is done. """ # (*) Classes and their namespace __setup_analyzer_class(Setup) __setup_token_class(Setup) __setup_token_id_prefix(Setup) # (*) Line and Column number counting if Setup.__no_count_line_and_column_f: Setup.count_line_number_f = False Setup.count_column_number_f = False # (*) Output programming language Setup.language = Setup.language.upper() error.verify_word_in_list( Setup.language, output_language_db.keys(), "Programming language '%s' is not supported." % Setup.language) Setup.language_db = output_language_db[Setup.language]() # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) Setup.language_db.extension_db = Setup.language_db.all_extension_db.get( Setup.output_file_naming_scheme) if Setup.language_db.extension_db is None: error.log("File extension scheme '%s' is not provided for language '%s'.\n" \ % (Setup.output_file_naming_scheme, Setup.language) + \ "Available schemes are: %s." % repr(sorted(Setup.language_db.all_extension_db.keys()))[1:-1]) if Setup.__buffer_lexatom_size_in_byte == "wchar_t": error.log( "Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n" "with option '--buffer-element-size' or '-bes'. Please, specify\n" "'--buffer-element-type wchar_t' or '--bet'.") Setup.buffer_setup(Setup.__buffer_lexatom_type, Setup.__buffer_lexatom_size_in_byte, Setup.buffer_encoding_name, Setup.buffer_encoding_file) type_info = global_character_type_db.get(Setup.lexatom.type) if type_info is not None and len(type_info) >= 4 \ and type_info[3] != -1 and Setup.lexatom.size_in_byte != -1 \ and type_info[3] != Setup.lexatom.size_in_byte: error.log("\nBuffer element type ('--bet' or '--buffer-element-type') was set to '%s'.\n" \ % Setup.lexatom.type \ + "It is well known to be of size %s[byte]. However, the buffer element size\n" \ % type_info[3] \ + "('-b' or '--buffer-element-type') was specified as '%s'.\n\n" \ % Setup.lexatom.size_in_byte \ + "Quex can continue, but the result is questionable.\n", \ DontExitF=True) if Setup.extern_token_id_specification: if len(Setup.extern_token_id_specification) > 3: error.log( "Option '--foreign-token-id-file' received > 3 followers.\n" "Found: %s" % str(Setup.extern_token_id_specification)[1:-1]) if len(Setup.extern_token_id_specification) > 1: Setup.token_id_foreign_definition_file_region_begin_re = \ __compile_regular_expression(Setup.extern_token_id_specification[1], "token id region begin") if len(Setup.extern_token_id_specification) > 2: Setup.token_id_foreign_definition_file_region_end_re = \ __compile_regular_expression(Setup.extern_token_id_specification[2], "token id region end") Setup.extern_token_id_file = \ Setup.extern_token_id_specification[0] token_id_file_parse(Setup.extern_token_id_file) # AFTER: Setup.extern_token_id_file !!! Setup.prepare_output_directory() if Setup.language not in ["DOT"]: Setup.prepare_all_file_names() # (*) Compression Types compression_type_list = [] for name, ctype in [ ("compression_template_f", E_Compression.TEMPLATE), ("compression_template_uniform_f", E_Compression.TEMPLATE_UNIFORM), ("compression_path_f", E_Compression.PATH), ("compression_path_uniform_f", E_Compression.PATH_UNIFORM) ]: if command_line_args_defined(command_line, name): compression_type_list.append( (command_line_arg_position(name), ctype)) compression_type_list.sort(key=itemgetter(0)) Setup.compression_type_list = map(lambda x: x[1], compression_type_list) validation.do(Setup, command_line, argv) # (*) return Setup ___________________________________________________________________ return True
from operator import attrgetter from collections import namedtuple # Test Loop Map Entry TestLME = namedtuple("TestLME", ("character_set", "iid_couple_terminal", "appendix_sm_id")) if "--hwut-info" in sys.argv: print "Loop: Get All Analyzers." print "CHOICES: loop, appendix, appendix-wot, non-const;" sys.exit() dial_db = DialDB() Setup.language_db = languages.db["C++"]() Setup.buffer_setup("none", 1, "utf8") def test(LoopMap, ColumnNPerCodeUnit): global dial_db Setup.buffer_encoding.source_set = NumberSet_All() # Generate sample state machines from what the loop map tells. appendix_sm_list = _get_appendix_sm_list(LoopMap) UserOnLoopExitDoorId = dial_db.new_door_id() events = loop.LoopEvents(ColumnNPerCodeUnit, None, UserOnLoopExitDoorId) config = loop.LoopConfig(ColumnNPerCodeUnit = ColumnNPerCodeUnit, LexemeEndCheckF = False, EngineType = engine.FORWARD, ReloadStateExtern = None,
def do( PatternActionPairList, TestStr, PatternDictionary={}, Language="ANSI-C-PlainMemory", QuexBufferSize=15, # DO NOT CHANGE! SecondPatternActionPairList=[], QuexBufferFallbackN=0, ShowBufferLoadsF=False, AssertsActionvation_str="-DQUEX_OPTION_ASSERTS", Encoding="unicode", CodeUnitSize_inByte=1): assert type(TestStr) == list or isinstance(TestStr, (str, unicode)) assert QuexBufferFallbackN >= 0 __Setup_init_language_database(Language) BufferLimitCode = 0 Setup.buffer_limit_code = BufferLimitCode Setup.buffer_setup("", CodeUnitSize_inByte, Encoding) CompileOptionStr = "" Setup.computed_gotos_f = False FullLanguage = Language if Language.find("StrangeStream") != -1: CompileOptionStr += " -DQUEX_OPTION_STRANGE_ISTREAM_IMPLEMENTATION_EXT " if Language.find("-CG") != -1: Language = Language.replace("-CG", "") Setup.computed_gotos_f = True if Language == "Cpp-Template": Language = "Cpp" # Shall template compression be used? Setup.compression_type_list = [E_Compression.TEMPLATE] Setup.compression_template_min_gain = 0 elif Language == "Cpp-Path": Language = "Cpp" Setup.compression_type_list = [E_Compression.PATH] elif Language == "Cpp-PathUniform": Language = "Cpp" Setup.compression_type_list = [E_Compression.PATH_UNIFORM] elif Language == "ANSI-C-PathTemplate": Language = "ANSI-C" Setup.compression_type_list = [ E_Compression.PATH, E_Compression.TEMPLATE ] Setup.compression_template_min_gain = 0 try: adapted_dict = {} for key, regular_expression in PatternDictionary.items(): string_stream = StringIO(regular_expression) pattern = regex.do(string_stream, adapted_dict) # It is ESSENTIAL that the state machines of defined patterns do not # have origins! Actually, there are not more than patterns waiting # to be applied in regular expressions. The regular expressions # can later be origins. ## assert not pattern.sm.has_specific_acceptance_id() adapted_dict[key] = PatternShorthand(key, pattern.extract_sm()) except RegularExpressionException, x: print "Dictionary Creation:\n" + repr(x)
import sys import os sys.path.append(os.environ["QUEX_PATH"]) from StringIO import StringIO import quex.input.files.specifier.counter as counter import quex.engine.misc.error as error from quex.engine.misc.file_in import EndOfStreamException from quex.blackboard import setup as Setup import quex.engine.state_machine.transformation.core as bc_factory # Setup.buffer_element_specification_prepare() import quex.output.languages.core as languages Setup.language_db = languages.db["C++"]() Setup.buffer_setup("", 1, "unicode", "") if "--hwut-info" in sys.argv: print "Parse Counter Setup;" print "CHOICES: basic, twice, intersection, intersection-2, non-numeric;" sys.exit() # choice = sys.argv[1] count_n = 0 def test(Text): global count_n count_n += 1 if Text.find("\n") == -1:
# 0x010000 - 0x110000: 2 code units = 4 byte = constructed from UCS code # Range of 1st code unit: 0xD800..0xDBFF # 2nd code unit: 0xDC00..0xDFFF boarders = [0x000080, 0x00D7FF, 0x00E000, 0x00FFFF, 0x010000, 0x10FFFF] good_sequences = [unicode_to_utf16(x) for x in boarders] # Boarders of code unit ragnes which are encoding errors: bad_1st_s = [0xDC00, 0xDFFF] # boarders of disallowed CodeUnit[0] bad_2nd_s = [0x0000, 0xDBFF, 0xE000, 0x110000] # boarders of disallowed CodeUnit[1] good_sequences = [unicode_to_utf16(x) for x in boarders] trafo = EncodingTrafoUTF16() Setup.buffer_setup("none", 4, "utf16") sm = helper.generate_sm_for_boarders(boarders, EncodingTrafoUTF16()) bad_sequence_list = helper.get_bad_sequences(good_sequences, bad_1st_s, bad_2nd_s) if True: helper.test_good_and_bad_sequences(sm, good_sequences, bad_sequence_list) else: # Check on isolated sequence (debugging) sequence = [0xD800, 0x11000] si = sm.init_state_index print "#si:", si, sm.states[si].get_string(Option="hex") for lexatom in sequence: print "#tm:", sm.states[si].target_map.get_string("",
import os sys.path.insert(0, os.environ["QUEX_PATH"]) import quex.input.regular_expression.engine as core import quex.engine.state_machine.algorithm.beautifier as beautifier import quex.engine.state_machine.algebra.reverse as reverse from quex.blackboard import setup as Setup import quex.output.languages.core as languages Setup.buffer_limit_code = -1 Setup.path_limit_code = -1 # Setup.buffer_element_specification_prepare() Setup.bad_lexatom_detection_f = False Setup.language_db = languages.db["C++"]() Setup.buffer_setup("<no-type>", -1, "utf8") if "--hwut-info" in sys.argv: print "Transformations" sys.exit(0) def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}).finalize(None) # During 'finalize()': pattern.transform(Setup.buffer_encoding) # During 'finalize()': pattern.mount_post_context_sm() # During 'finalize()': pattern.mount_pre_context_sm() print "pattern\n"
if len(sys.argv) < 2: print "Argument not acceptable, use --hwut-info" sys.exit(0) BS = int(sys.argv[1]) if BS not in [5, 6, 7, 8]: print "Argument not acceptable, use --hwut-info" sys.exit(0) Language = "Cpp" __Setup_init_language_database(Language) trigger_set = NumberSet([Interval(0x600, 0x700)]) Setup.buffer_setup("", 1, "utf8") def make(TriggerSet, BufferSize): Language = "ANSI-C-from-file" code = create_character_set_skipper_code(Language, "", TriggerSet, QuexBufferSize=BufferSize, InitialSkipF=False, OnePassOnlyF=True) exe_name, tmp_file_name = compile(Language, code) return exe_name, tmp_file_name def core(Executable, BufferSize, TestStr):
from quex.engine.state_machine.core import DFA from quex.engine.analyzer.door_id_address_label import DoorID import quex.engine.analyzer.core as analyzer_generator from quex.engine.analyzer.door_id_address_label import DialDB from quex.engine.analyzer.transition_map import TransitionMap import quex.engine.state_machine.transformation.core as bc_factory import quex.output.analyzer.adapt as adapt from quex.blackboard import setup as Setup, \ E_IncidenceIDs, \ Lng from collections import defaultdict dial_db = DialDB() Setup.language_db = languages.db["C++"]() Setup.buffer_setup("uint32_t", 4, "none") if "--hwut-info" in sys.argv: print "Single DFA_State: Transition Code Generation;" print "CHOICES: A, B, C, A-UTF8, B-UTF8, C-UTF8;" sys.exit(0) choice, codec = { "A": ("A", ""), "B": ("B", ""), "C": ("C", ""), "A-UTF8": ("A", "UTF8"), "B-UTF8": ("B", "UTF8"), "C-UTF8": ("C", "UTF8"), }[sys.argv[1]]