def test(TestString):
    TestString = TestString.replace("\n", "\\n").replace("\t", "\\t")
    if "BeginOfLine" in sys.argv:
        TestString = "^%s" % TestString
    print("expr. = " + TestString).replace("\n", "\\n").replace("\t", "\\t")
    pattern = core.do(TestString, {})

    # Prepare transformation info according to choice.
    #  Setup.buffer_element_specification_prepare()
    if "UTF8" in sys.argv: Setup.buffer_setup("", 1, "utf8")
    else: Setup.buffer_setup("", 2, "utf16")

    # Count
    pattern = pattern.finalize(ca_map)
    print("info  = {\n    %s\n}\n" % str(pattern.lcci).replace("\n", "\n    "))
Exemple #2
0
def get_test_application(encoding, ca_map):

    # (*) Setup the buffer encoding ___________________________________________
    #
    if encoding == "utf_32_le": byte_n_per_code_unit = 4
    elif encoding == "ascii": byte_n_per_code_unit = 1
    elif encoding == "utf_8": byte_n_per_code_unit = 1
    elif encoding == "utf_16_le": byte_n_per_code_unit = 2
    elif encoding == "cp737": byte_n_per_code_unit = 1
    else: assert False

    Setup.buffer_setup("", byte_n_per_code_unit,
                       encoding.replace("_le", "").replace("_", ""))

    Setup.analyzer_class_name = "Lexer"
    # (*) Generate Code _______________________________________________________
    #
    counter_str = run_time_counter.get(ca_map, "TEST_MODE")
    counter_str = counter_str.replace("static void", "void")

    # Double check if reference delta counting has been implemented as expected.
    expect_reference_p_f = ca_map.get_column_number_per_code_unit() is not None
    assert_reference_delta_count_implementation(counter_str,
                                                expect_reference_p_f)

    counter_str = adapt.do(counter_str, "data", "")
    open("./data/test.c",
         "wb").write("#include <data/check.h>\n\n" + counter_str)

    # (*) Compile _____________________________________________________________
    #
    counter_function_name = Lng.DEFAULT_COUNTER_FUNCTION_NAME("TEST_MODE")
    os.system("rm -f test")
    compile_str =   "gcc -Wall -Werror -I. -ggdb ./data/check.c ./data/test.c "     \
                  + " -DQUEX_OPTION_COUNTER"                                \
                  + " -DDEF_COUNTER_FUNCTION='%s' " % counter_function_name \
                  + " -DDEF_FILE_NAME='\"data/input.txt\"' "                \
                  + " -DDEF_CHARACTER_TYPE=%s" % Setup.lexatom.type         \
                  + " -o test"
    # + " -DDEF_DEBUG_TRACE "

    print "## %s" % compile_str
    os.system(compile_str)
def get_transition_function(iid_map, Codec):
    global dial_db
    if Codec == "UTF8": Setup.buffer_setup("uint8_t", 1, "utf8")
    else: Setup.buffer_setup("uint32_t", 4, "none")

    Setup.bad_lexatom_detection_f = False
    sm = DFA.from_IncidenceIdMap(iid_map)
    analyzer = analyzer_generator.do(sm,
                                     engine.CHARACTER_COUNTER,
                                     dial_db=dial_db,
                                     CutF=False)
    tm_txt = do_analyzer(analyzer)
    tm_txt = Lng.GET_PLAIN_STRINGS(tm_txt, dial_db=dial_db)
    tm_txt.append("\n")
    #label   = dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.MATCH_FAILURE))

    for character_set, iid in iid_map:
        tm_txt.append("%s return (int)%s;\n" %
                      (Lng.LABEL(DoorID.incidence(iid, dial_db)), iid))
    tm_txt.append("%s return (int)-1;\n" %
                  Lng.LABEL(DoorID.drop_out(-1, dial_db)))

    return "".join(tm_txt)
Exemple #4
0
def test_plug_sequence(ByteSequenceDB):
    L = len(ByteSequenceDB[0])

    for seq in ByteSequenceDB:
        assert len(seq) == L
        for x in seq:
            assert isinstance(x, Interval)

    first_different_byte_index = -1
    for i in range(L):
        x0 = ByteSequenceDB[0][i]
        for seq in ByteSequenceDB[1:]:
            if not seq[i].is_equal(x0):
                first_different_byte_index = i
                break
        if first_different_byte_index != -1:
            break
    if first_different_byte_index == -1:
        first_different_byte_index = 0

    print "# Best To be Displayed by:"
    print "#"
    print "#  > " + sys.argv[0] + " " + sys.argv[1] + " | dot -Tsvg -o tmp.svg"
    print "#"
    print "# -------------------------"
    print "# Byte Sequences:     "
    i = -1
    for seq in ByteSequenceDB:
        i += 1
        print "# (%i) " % i,
        for x in seq:
            print "    " + x.get_string(Option="hex"),
        print
    print "#    L    = %i" % L
    print "#    DIdx = %i" % first_different_byte_index

    sm = DFA()
    end_index = state_machine.index.get()
    sm.states[end_index] = DFA_State()

    Setup.buffer_setup("", 1, "utf8")

    if Setup.bad_lexatom_detection_f: bad_lexatom_si = index.get()
    else: bad_lexatom_si = None

    trafo = Setup.buffer_encoding

    new_first_tm,    \
    new_state_db = trafo.plug_interval_sequences(sm.init_state_index, end_index,
                                                 ByteSequenceDB,
                                                 BadLexatomSi=bad_lexatom_si)

    if bad_lexatom_si is not None:
        new_first_tm[bad_lexatom_si] = trafo._error_range_by_code_unit_db[0]

    # Generate the 'bad lexatom accepter'.
    bad_lexatom_state = DFA_State(AcceptanceF=True)
    bad_lexatom_state.mark_acceptance_id(E_IncidenceIDs.BAD_LEXATOM)
    sm.states[bad_lexatom_si] = bad_lexatom_state

    first_tm = sm.get_init_state().target_map.get_map()
    if end_index in first_tm: del first_tm[end_index]
    first_tm.update(new_first_tm)

    sm.states.update(new_state_db)

    sm = beautifier.do(sm)
    if len(sm.get_orphaned_state_index_list()) != 0:
        print "Error: Orphaned States Detected!"

    # Double check, that there are no 'circles'
    predecessor_db = sm.get_predecessor_db()
    assert not any(si in predecessor_db[si] for si in sm.states)

    show_graphviz(sm)
Exemple #5
0
def prepare(command_line, argv):
    """RETURN:  True, if process needs to be started.
                False, if job is done.
    """

    # (*) Classes and their namespace
    __setup_analyzer_class(Setup)
    __setup_token_class(Setup)
    __setup_token_id_prefix(Setup)

    # (*) Line and Column number counting
    if Setup.__no_count_line_and_column_f:
        Setup.count_line_number_f = False
        Setup.count_column_number_f = False

    # (*) Output programming language
    Setup.language = Setup.language.upper()
    error.verify_word_in_list(
        Setup.language, output_language_db.keys(),
        "Programming language '%s' is not supported." % Setup.language)
    Setup.language_db = output_language_db[Setup.language]()

    # Is the output file naming scheme provided by the extension database
    # (Validation must happen immediately)
    Setup.language_db.extension_db = Setup.language_db.all_extension_db.get(
        Setup.output_file_naming_scheme)
    if Setup.language_db.extension_db is None:
        error.log("File extension scheme '%s' is not provided for language '%s'.\n" \
                  % (Setup.output_file_naming_scheme, Setup.language) + \
                  "Available schemes are: %s." % repr(sorted(Setup.language_db.all_extension_db.keys()))[1:-1])

    if Setup.__buffer_lexatom_size_in_byte == "wchar_t":
        error.log(
            "Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n"
            "with option '--buffer-element-size' or '-bes'. Please, specify\n"
            "'--buffer-element-type wchar_t' or '--bet'.")

    Setup.buffer_setup(Setup.__buffer_lexatom_type,
                       Setup.__buffer_lexatom_size_in_byte,
                       Setup.buffer_encoding_name, Setup.buffer_encoding_file)

    type_info = global_character_type_db.get(Setup.lexatom.type)
    if     type_info is not None and len(type_info) >= 4 \
       and type_info[3] != -1 and Setup.lexatom.size_in_byte != -1 \
       and type_info[3] != Setup.lexatom.size_in_byte:
        error.log("\nBuffer element type ('--bet' or '--buffer-element-type') was set to '%s'.\n" \
                  % Setup.lexatom.type \
                  + "It is well known to be of size %s[byte]. However, the buffer element size\n" \
                  % type_info[3] \
                  + "('-b' or '--buffer-element-type') was specified as '%s'.\n\n" \
                  % Setup.lexatom.size_in_byte \
                  + "Quex can continue, but the result is questionable.\n", \
                  DontExitF=True)

    if Setup.extern_token_id_specification:
        if len(Setup.extern_token_id_specification) > 3:
            error.log(
                "Option '--foreign-token-id-file' received > 3 followers.\n"
                "Found: %s" % str(Setup.extern_token_id_specification)[1:-1])
        if len(Setup.extern_token_id_specification) > 1:
            Setup.token_id_foreign_definition_file_region_begin_re = \
                    __compile_regular_expression(Setup.extern_token_id_specification[1], "token id region begin")
        if len(Setup.extern_token_id_specification) > 2:
            Setup.token_id_foreign_definition_file_region_end_re = \
                    __compile_regular_expression(Setup.extern_token_id_specification[2], "token id region end")
        Setup.extern_token_id_file = \
                Setup.extern_token_id_specification[0]

        token_id_file_parse(Setup.extern_token_id_file)

    # AFTER: Setup.extern_token_id_file !!!
    Setup.prepare_output_directory()
    if Setup.language not in ["DOT"]: Setup.prepare_all_file_names()

    # (*) Compression Types
    compression_type_list = []
    for name, ctype in [
        ("compression_template_f", E_Compression.TEMPLATE),
        ("compression_template_uniform_f", E_Compression.TEMPLATE_UNIFORM),
        ("compression_path_f", E_Compression.PATH),
        ("compression_path_uniform_f", E_Compression.PATH_UNIFORM)
    ]:
        if command_line_args_defined(command_line, name):
            compression_type_list.append(
                (command_line_arg_position(name), ctype))

    compression_type_list.sort(key=itemgetter(0))
    Setup.compression_type_list = map(lambda x: x[1], compression_type_list)

    validation.do(Setup, command_line, argv)

    # (*) return Setup ___________________________________________________________________
    return True
Exemple #6
0
                                                         

from   operator import attrgetter
from   collections import namedtuple

# Test Loop Map Entry
TestLME = namedtuple("TestLME", ("character_set", "iid_couple_terminal", "appendix_sm_id"))

if "--hwut-info" in sys.argv:
    print "Loop: Get All Analyzers."
    print "CHOICES: loop, appendix, appendix-wot, non-const;"
    sys.exit()

dial_db = DialDB()
Setup.language_db = languages.db["C++"]()
Setup.buffer_setup("none", 1, "utf8") 

def test(LoopMap, ColumnNPerCodeUnit):
    global dial_db

    Setup.buffer_encoding.source_set = NumberSet_All()

    # Generate sample state machines from what the loop map tells.
    appendix_sm_list = _get_appendix_sm_list(LoopMap)

    UserOnLoopExitDoorId  = dial_db.new_door_id()
    events = loop.LoopEvents(ColumnNPerCodeUnit, None, UserOnLoopExitDoorId)
    config = loop.LoopConfig(ColumnNPerCodeUnit    = ColumnNPerCodeUnit,
                                    LexemeEndCheckF       = False, 
                                    EngineType            = engine.FORWARD, 
                                    ReloadStateExtern     = None, 
Exemple #7
0
def do(
        PatternActionPairList,
        TestStr,
        PatternDictionary={},
        Language="ANSI-C-PlainMemory",
        QuexBufferSize=15,  # DO NOT CHANGE!
        SecondPatternActionPairList=[],
        QuexBufferFallbackN=0,
        ShowBufferLoadsF=False,
        AssertsActionvation_str="-DQUEX_OPTION_ASSERTS",
        Encoding="unicode",
        CodeUnitSize_inByte=1):
    assert type(TestStr) == list or isinstance(TestStr, (str, unicode))

    assert QuexBufferFallbackN >= 0
    __Setup_init_language_database(Language)

    BufferLimitCode = 0
    Setup.buffer_limit_code = BufferLimitCode
    Setup.buffer_setup("", CodeUnitSize_inByte, Encoding)

    CompileOptionStr = ""
    Setup.computed_gotos_f = False
    FullLanguage = Language
    if Language.find("StrangeStream") != -1:
        CompileOptionStr += " -DQUEX_OPTION_STRANGE_ISTREAM_IMPLEMENTATION_EXT "

    if Language.find("-CG") != -1:
        Language = Language.replace("-CG", "")
        Setup.computed_gotos_f = True

    if Language == "Cpp-Template":
        Language = "Cpp"
        # Shall template compression be used?
        Setup.compression_type_list = [E_Compression.TEMPLATE]
        Setup.compression_template_min_gain = 0

    elif Language == "Cpp-Path":
        Language = "Cpp"
        Setup.compression_type_list = [E_Compression.PATH]

    elif Language == "Cpp-PathUniform":
        Language = "Cpp"
        Setup.compression_type_list = [E_Compression.PATH_UNIFORM]

    elif Language == "ANSI-C-PathTemplate":
        Language = "ANSI-C"
        Setup.compression_type_list = [
            E_Compression.PATH, E_Compression.TEMPLATE
        ]
        Setup.compression_template_min_gain = 0

    try:
        adapted_dict = {}
        for key, regular_expression in PatternDictionary.items():
            string_stream = StringIO(regular_expression)
            pattern = regex.do(string_stream, adapted_dict)
            # It is ESSENTIAL that the state machines of defined patterns do not
            # have origins! Actually, there are not more than patterns waiting
            # to be applied in regular expressions. The regular expressions
            # can later be origins.

            ## assert not pattern.sm.has_specific_acceptance_id()

            adapted_dict[key] = PatternShorthand(key, pattern.extract_sm())

    except RegularExpressionException, x:
        print "Dictionary Creation:\n" + repr(x)
Exemple #8
0
import sys
import os
sys.path.append(os.environ["QUEX_PATH"])

from StringIO import StringIO

import quex.input.files.specifier.counter as counter
import quex.engine.misc.error as error
from quex.engine.misc.file_in import EndOfStreamException
from quex.blackboard import setup as Setup
import quex.engine.state_machine.transformation.core as bc_factory

# Setup.buffer_element_specification_prepare()
import quex.output.languages.core as languages
Setup.language_db = languages.db["C++"]()
Setup.buffer_setup("", 1, "unicode", "")

if "--hwut-info" in sys.argv:
    print "Parse Counter Setup;"
    print "CHOICES: basic, twice, intersection, intersection-2, non-numeric;"
    sys.exit()

# choice = sys.argv[1]
count_n = 0


def test(Text):
    global count_n
    count_n += 1

    if Text.find("\n") == -1:
Exemple #9
0
#   0x010000 - 0x110000: 2 code units = 4 byte = constructed from UCS code
#                        Range of 1st code unit: 0xD800..0xDBFF
#                                 2nd code unit: 0xDC00..0xDFFF
boarders = [0x000080, 0x00D7FF, 0x00E000, 0x00FFFF, 0x010000, 0x10FFFF]

good_sequences = [unicode_to_utf16(x) for x in boarders]

# Boarders of code unit ragnes which are encoding errors:
bad_1st_s = [0xDC00, 0xDFFF]  # boarders of disallowed CodeUnit[0]
bad_2nd_s = [0x0000, 0xDBFF, 0xE000,
             0x110000]  # boarders of disallowed CodeUnit[1]

good_sequences = [unicode_to_utf16(x) for x in boarders]

trafo = EncodingTrafoUTF16()
Setup.buffer_setup("none", 4, "utf16")
sm = helper.generate_sm_for_boarders(boarders, EncodingTrafoUTF16())

bad_sequence_list = helper.get_bad_sequences(good_sequences, bad_1st_s,
                                             bad_2nd_s)

if True:
    helper.test_good_and_bad_sequences(sm, good_sequences, bad_sequence_list)

else:
    # Check on isolated sequence (debugging)
    sequence = [0xD800, 0x11000]
    si = sm.init_state_index
    print "#si:", si, sm.states[si].get_string(Option="hex")
    for lexatom in sequence:
        print "#tm:", sm.states[si].target_map.get_string("",
Exemple #10
0
import os

sys.path.insert(0, os.environ["QUEX_PATH"])

import quex.input.regular_expression.engine as core
import quex.engine.state_machine.algorithm.beautifier as beautifier
import quex.engine.state_machine.algebra.reverse as reverse
from quex.blackboard import setup as Setup
import quex.output.languages.core as languages

Setup.buffer_limit_code = -1
Setup.path_limit_code = -1
# Setup.buffer_element_specification_prepare()
Setup.bad_lexatom_detection_f = False
Setup.language_db = languages.db["C++"]()
Setup.buffer_setup("<no-type>", -1, "utf8")

if "--hwut-info" in sys.argv:
    print "Transformations"
    sys.exit(0)


def test(TestString):
    print "-------------------------------------------------------------------"
    print "expression    = \"" + TestString + "\""
    pattern = core.do(TestString, {}).finalize(None)

    # During 'finalize()': pattern.transform(Setup.buffer_encoding)
    # During 'finalize()': pattern.mount_post_context_sm()
    # During 'finalize()': pattern.mount_pre_context_sm()
    print "pattern\n"
Exemple #11
0
if len(sys.argv) < 2:
    print "Argument not acceptable, use --hwut-info"
    sys.exit(0)

BS = int(sys.argv[1])

if BS not in [5, 6, 7, 8]:
    print "Argument not acceptable, use --hwut-info"
    sys.exit(0)

Language = "Cpp"
__Setup_init_language_database(Language)

trigger_set = NumberSet([Interval(0x600, 0x700)])
Setup.buffer_setup("", 1, "utf8")


def make(TriggerSet, BufferSize):
    Language = "ANSI-C-from-file"
    code = create_character_set_skipper_code(Language,
                                             "",
                                             TriggerSet,
                                             QuexBufferSize=BufferSize,
                                             InitialSkipF=False,
                                             OnePassOnlyF=True)
    exe_name, tmp_file_name = compile(Language, code)
    return exe_name, tmp_file_name


def core(Executable, BufferSize, TestStr):
from quex.engine.state_machine.core import DFA
from quex.engine.analyzer.door_id_address_label import DoorID
import quex.engine.analyzer.core as analyzer_generator
from quex.engine.analyzer.door_id_address_label import DialDB
from quex.engine.analyzer.transition_map import TransitionMap
import quex.engine.state_machine.transformation.core as bc_factory
import quex.output.analyzer.adapt as adapt
from   quex.blackboard                               import setup as Setup, \
                                                            E_IncidenceIDs, \
                                                            Lng
from collections import defaultdict

dial_db = DialDB()

Setup.language_db = languages.db["C++"]()
Setup.buffer_setup("uint32_t", 4, "none")

if "--hwut-info" in sys.argv:
    print "Single DFA_State: Transition Code Generation;"
    print "CHOICES: A, B, C, A-UTF8, B-UTF8, C-UTF8;"
    sys.exit(0)

choice, codec = {
    "A": ("A", ""),
    "B": ("B", ""),
    "C": ("C", ""),
    "A-UTF8": ("A", "UTF8"),
    "B-UTF8": ("B", "UTF8"),
    "C-UTF8": ("C", "UTF8"),
}[sys.argv[1]]