Ejemplo n.º 1
0
    def buffer_codec_prepare(self, BufferCodecName, BufferCodecFileName=None, Module=None):
        """Determines: Setup.buffer_codec_name
                       Setup.buffer_codec
        """
        assert    BufferCodecName == "unit-test" \
               or self.__buffer_element_specification_done_f == True

        if   BufferCodecName in ("utf8", "utf16"):
            assert Module is not None
            result = codec_db.CodecDynamicInfo(BufferCodecName, Module)
        elif BufferCodecFileName:
            os.path.splitext(os.path.basename(BufferCodecFileName))
            try: 
               os.path.splitext(os.path.basename(BufferCodecFileName))
            except:
                error.log("cannot interpret string following '--codec-file'")
            result = codec_db.CodecTransformationInfo(FileName=BufferCodecFileName)
        elif BufferCodecName == "unicode":
            # (Still, 'icu' or 'iconv' may provide converted content, but ...) 
            # If the internal buffer is 'unicode', then the pattern's state 
            # machines are not converted. The requirement for the pattern's
            # range is the same as for the 'buffer element chunks'.
            result = codec_db.CodecInfo("unicode", 
                                NumberSet.from_range(0, self.get_character_value_limit()), 
                                NumberSet.from_range(0, self.get_character_value_limit()))
        elif BufferCodecName == "unit-test":
            result = codec_db.CodecInfo("unicode", 
                                NumberSet.from_range(-sys.maxint, sys.maxint),
                                NumberSet.from_range(-sys.maxint, sys.maxint))

        else:
            result = codec_db.CodecTransformationInfo(BufferCodecName)

        self.buffer_codec = result
Ejemplo n.º 2
0
def get_setup(L0, L1, FSM0, FSM1, FSM2):
    # SPECIALITIES: -- sm0 and sm1 have an intersection between their second
    #                  transition.
    #               -- sm1 transits further upon acceptance.
    #               -- sm2 has only one transition.
    ci_list = [
        CountInfo(dial_db.new_incidence_id(), NumberSet.from_range(L0, L1),
                  CountAction(E_CharacterCountType.COLUMN, 0)),
    ]

    # Generate State Machine that does not have any intersection with
    # the loop transitions.
    sm0 = StateMachine()
    si = sm0.add_transition(sm0.init_state_index, FSM0)
    si = sm0.add_transition(si, NS_A, AcceptanceF=True)
    sm0.states[si].mark_acceptance_id(dial_db.new_incidence_id())

    sm1 = StateMachine()
    si0 = sm1.add_transition(sm1.init_state_index, FSM1)
    si = sm1.add_transition(si0, NS_A, AcceptanceF=True)
    iid1 = dial_db.new_incidence_id()
    sm1.states[si].mark_acceptance_id(iid1)
    si = sm1.add_transition(si, NS_B, si0)
    sm1.states[si].mark_acceptance_id(iid1)

    sm2 = StateMachine()
    si = sm2.add_transition(sm2.init_state_index, FSM2, AcceptanceF=True)
    sm2.states[si].mark_acceptance_id(dial_db.new_incidence_id())

    return ci_list, [sm0, sm1, sm2]
Ejemplo n.º 3
0
def get_ca_list(L0, L1):
    ns_0 = NumberSet.from_range(L0, L1)
    return [
        (ns_0, CountAction(E_CharacterCountType.COLUMN, 0)),
        # (ns_remainder, None),
        # (ns_remainder, CountAction(E_CharacterCountType.COLUMN, 4711)),
    ]
Ejemplo n.º 4
0
def get_setup(L0, L1, FSM0, FSM1, FSM2):
    # SPECIALITIES: -- sm0 and sm1 have an intersection between their second 
    #                  transition.
    #               -- sm1 transits further upon acceptance.
    #               -- sm2 has only one transition.
    ci_list = [
        CountInfo(dial_db.new_incidence_id(), NumberSet.from_range(L0, L1), 
                  CountAction(E_CharacterCountType.COLUMN, 0)),
    ]

    # Generate State Machine that does not have any intersection with 
    # the loop transitions.
    sm0 = StateMachine()
    si = sm0.add_transition(sm0.init_state_index, FSM0)
    si = sm0.add_transition(si, NS_A, AcceptanceF=True)
    sm0.states[si].mark_acceptance_id(dial_db.new_incidence_id())

    sm1 = StateMachine()
    si0 = sm1.add_transition(sm1.init_state_index, FSM1)
    si  = sm1.add_transition(si0, NS_A, AcceptanceF=True)
    iid1 = dial_db.new_incidence_id()
    sm1.states[si].mark_acceptance_id(iid1)
    si  = sm1.add_transition(si, NS_B, si0)
    sm1.states[si].mark_acceptance_id(iid1)

    sm2 = StateMachine()
    si = sm2.add_transition(sm2.init_state_index, FSM2, AcceptanceF=True)
    sm2.states[si].mark_acceptance_id(dial_db.new_incidence_id())

    return ci_list, [sm0, sm1, sm2]
Ejemplo n.º 5
0
 def __init__(self):
     EncodingTrafoBySplit.__init__(self, "utf16", 
                                      CodeUnitRange=NumberSet.from_range(0, 0x10000))
     self.error_range_code_unit0 = NumberSet([
         Interval(0x0000, 0xDC00), Interval(0xE000, 0x10000)
     ]).get_complement(NumberSet_All())
     self.error_range_code_unit1 = NumberSet([
         Interval(0xDC00, 0xE000)
     ]).get_complement(NumberSet_All())
Ejemplo n.º 6
0
    def _do_single(self, Code): 
        """Unicode character is translated to itself.
        """
        number_set = NumberSet.from_range(Code, Code+1)
        number_set.transform_by_table(self)

        # A single code element can only produce a single character!
        assert number_set.has_size_one()
        return [ number_set.get_the_only_element() ]
Ejemplo n.º 7
0
def test(ci_list, SM_list=[]):
    Setup.buffer_codec.source_set = NumberSet_All()
    ci_map                     = CountInfoMap(ci_list, NumberSet.from_range(0, 100))
    iid_loop_exit              = dial_db.new_incidence_id()
    loop_map, appendix_sm_list = loop._get_loop_map(ci_map, SM_list, iid_loop_exit) 

    print
    print
    print
    general_checks(loop_map, appendix_sm_list)
    print_this(loop_map, appendix_sm_list)
Ejemplo n.º 8
0
    def _do_single(self, Code):
        number_set = NumberSet.from_range(Code, Code + 1)
        if number_set.is_empty():
            return -1
        interval_list = number_set.get_intervals(PromiseToTreatWellF=True)
        assert len(interval_list) == 1
        interval_sequence_list = self.get_interval_sequences(interval_list[0])
        # A single code element can only produce a single interval sequence!
        assert len(interval_sequence_list) == 1
        assert all(x.size() == 1 for x in interval_sequence_list[0])

        return [x.begin for x in interval_sequence_list[0]]
Ejemplo n.º 9
0
def test(ci_list, SM_list=[]):
    Setup.buffer_codec.source_set = NumberSet_All()
    ci_map = CountInfoMap(ci_list, NumberSet.from_range(0, 100))
    iid_loop_exit = dial_db.new_incidence_id()
    loop_map, appendix_sm_list = loop._get_loop_map(ci_map, SM_list,
                                                    iid_loop_exit)

    print
    print
    print
    general_checks(loop_map, appendix_sm_list)
    print_this(loop_map, appendix_sm_list)
Ejemplo n.º 10
0
    def buffer_codec_prepare(self,
                             BufferCodecName,
                             BufferCodecFileName=None,
                             Module=None):
        """Determines: Setup.buffer_codec_name
                       Setup.buffer_codec
        """
        assert    BufferCodecName == "unit-test" \
               or self.__buffer_element_specification_done_f == True

        if BufferCodecName in ("utf8", "utf16"):
            assert Module is not None
            result = codec_db.CodecDynamicInfo(BufferCodecName, Module)
        elif BufferCodecFileName:
            os.path.splitext(os.path.basename(BufferCodecFileName))
            try:
                os.path.splitext(os.path.basename(BufferCodecFileName))
            except:
                error.log("cannot interpret string following '--codec-file'")
            result = codec_db.CodecTransformationInfo(
                FileName=BufferCodecFileName)
        elif BufferCodecName == "unicode":
            # (Still, 'icu' or 'iconv' may provide converted content, but ...)
            # If the internal buffer is 'unicode', then the pattern's state
            # machines are not converted. The requirement for the pattern's
            # range is the same as for the 'buffer element chunks'.
            result = codec_db.CodecInfo(
                "unicode",
                NumberSet.from_range(0, self.get_character_value_limit()),
                NumberSet.from_range(0, self.get_character_value_limit()))
        elif BufferCodecName == "unit-test":
            result = codec_db.CodecInfo(
                "unicode", NumberSet.from_range(-sys.maxint, sys.maxint),
                NumberSet.from_range(-sys.maxint, sys.maxint))

        else:
            result = codec_db.CodecTransformationInfo(BufferCodecName)

        self.buffer_codec = result
Ejemplo n.º 11
0
    def __init__(self):
        drain_set = NumberSet.from_range(0, 0x100)
        EncodingTrafoBySplit.__init__(self, "utf8", CodeUnitRange=drain_set)
        self.UnchangedRange = 0x7F

        self.error_range_byte0 = NumberSet([
            Interval(0b00000000, 0b01111111+1), Interval(0b11000000, 0b11011111+1),
            Interval(0b11100000, 0b11101111+1), Interval(0b11110000, 0b11110111+1),
            Interval(0b11111000, 0b11111011+1), Interval(0b11111100, 0b11111101+1),
        ]).get_complement(NumberSet_All())

        self.error_range_byteN = NumberSet(
            Interval(0b10000000, 0b10111111+1)
        ).get_complement(NumberSet_All())
Ejemplo n.º 12
0
#! /usr/bin/env python
import sys
import os

sys.path.insert(0, os.environ["QUEX_PATH"])

from quex.engine.misc.interval_handling import Interval, NumberSet

all = NumberSet.from_range(-sys.maxint, sys.maxint)

if "--hwut-info" in sys.argv:
    print "NumberSet: Inverse"
    print "CHOICES: 1, 2, serious;"
    sys.exit(0)


def test(NSet):
    print "# write output in temporary file: 'tmp'"
    print "# plot with gnuplot:"
    print "# > plot \"tmp\" w l"

    print NSet.gnuplot_string(1)
    result = NSet.get_complement(all)
    result.assert_consistency()
    print result.gnuplot_string(0)


if "1" in sys.argv:
    test(
        NumberSet([
            Interval(10, 20),
Ejemplo n.º 13
0
def get_unicode_range():
    return NumberSet.from_range(0, 0x110000)
Ejemplo n.º 14
0
def get_codec_element_range():
    """Codec element's size is 1 byte."""
    return NumberSet.from_range(0, 0x100)
Ejemplo n.º 15
0
 def get_sm(SmId, Trigger):
     sm = StateMachine.from_IncidenceIdMap([
         (NumberSet.from_range(Trigger, Trigger + 1), SmId)
     ])
     sm.set_id(SmId)
     return sm
Ejemplo n.º 16
0
            if k == 0:
                self.__cursor[k] += 2
                if self.__cursor[k] < 8: 
                    break
            else:
                self.__cursor[k] += 1
                if self.__cursor[k] < 3:
                    break
            self.__cursor[k] = 1
            k += 1

        return result

generator = NumberSetGenerator()

all = NumberSet.from_range(-sys.maxint, sys.maxint)

# Generate 100 NumberSets
number_set_list = []
for i in range(100):
    result = generator.get()
    number_set_list.append(generator.get())

def test(N1, Op1, N2, Op2):
    global number_set_list
    the_tester = Tester(N1, Op1, N2, Op2)

    # Permutate all existing intervals against each other
    count_n = 0
    for i, x in enumerate(number_set_list):
        for y in number_set_list[i + 1:]:
Ejemplo n.º 17
0
 def get_sm(SmId, Trigger):
     sm = StateMachine.from_IncidenceIdMap([
         (NumberSet.from_range(Trigger, Trigger + 1), SmId)
     ])
     sm.set_id(SmId)
     return sm
Ejemplo n.º 18
0
import sys
import os
sys.path.insert(0, os.environ["QUEX_PATH"])

from   quex.engine.counter                        import LineColumnCount, \
                                                         CountAction
from quex.engine.state_machine.core import StateMachine
from   quex.engine.misc.interval_handling         import NumberSet, \
                                                         NumberSet_All
from quex.engine.analyzer.door_id_address_label import dial_db
from   quex.engine.loop_counter                   import CountInfoMap, \
                                                         CountInfo
import quex.output.core.loop as loop
from   quex.blackboard                            import E_CharacterCountType, \
                                                         setup as Setup
NS_A = NumberSet.from_range(ord('A'), ord('A') + 1)
NS_B = NumberSet.from_range(ord('B'), ord('B') + 1)
NS_C = NumberSet.from_range(ord('C'), ord('C') + 1)
NS_D = NumberSet.from_range(ord('D'), ord('D') + 1)

if "--hwut-info" in sys.argv:
    print "Loop: Get Loop Map."
    print "CHOICES: Plain, AppendixNoI, AppendixI, Split;"


def test(ci_list, SM_list=[]):
    Setup.buffer_codec.source_set = NumberSet_All()
    ci_map = CountInfoMap(ci_list, NumberSet.from_range(0, 100))
    iid_loop_exit = dial_db.new_incidence_id()
    loop_map, appendix_sm_list = loop._get_loop_map(ci_map, SM_list,
                                                    iid_loop_exit)
Ejemplo n.º 19
0
def get_codec_element_range():
    """Codec element's size is 2 bytes."""
    return NumberSet.from_range(0, 0x10000)
Ejemplo n.º 20
0
def _enter(result, begin, end, target_state_setup):
    entry = result.get(target_state_setup)
    if entry is None:
        result[target_state_setup] = NumberSet.from_range(begin, end)
    else:
        entry.quick_append_interval(Interval(begin, end))
Ejemplo n.º 21
0
 def set_all_character_set_UNIT_TEST(self, Begin, End):
     self.buffer_codec.source_set = NumberSet.from_range(Begin, End)
Ejemplo n.º 22
0
def get_unicode_range():
    return NumberSet.from_range(0, 0x110000)
Ejemplo n.º 23
0
        return sm

    return [
        get_sm(lei.appendix_sm_id, trigger) for trigger, lei in enumerate(LoopMap)
        if lei.appendix_sm_has_transitions_f
    ]

def print_this(AnalyzerList):
    print "#_[ Print %i analyzer(s) ]______________________________" % len(AnalyzerList)
    print
    for i, analyzer in enumerate(AnalyzerList):
        print "--( %i: init si = %i )-------------------------\n" % (i, analyzer.init_state_index)
        print analyzer

if encoding == "unicode":
    NS_A = NumberSet.from_range(ord('A'), ord('A') + 1)
    NS_B = NumberSet.from_range(ord('B'), ord('B') + 1)
    NS_C = NumberSet.from_range(ord('C'), ord('C') + 1)
    NS_D = NumberSet.from_range(ord('D'), ord('D') + 1)
    NS_E = NumberSet.from_range(ord('E'), ord('E') + 1)
else:
    NS_A = NumberSet.from_range(0x600, 0x601)
    NS_B = NumberSet.from_range(0x601, 0x602)
    NS_C = NumberSet.from_range(0x602, 0x603)
    NS_D = NumberSet.from_range(0x603, 0x604)
    NS_E = NumberSet.from_range(0x604, 0x605)

CA_0 = CountAction(E_CharacterCountType.COLUMN,     5)
CA_1 = CountAction(E_CharacterCountType.LINE,       1)
CA_2 = CountAction(E_CharacterCountType.GRID,       2)
CA_3 = CountAction(E_CharacterCountType.WHITESPACE, 3)
Ejemplo n.º 24
0
import sys
import os
sys.path.insert(0, os.environ["QUEX_PATH"])

from   quex.engine.counter                        import CountAction, \
                                                         CountActionMap
from quex.engine.state_machine.core import DFA
from   quex.engine.misc.interval_handling         import NumberSet, \
                                                         NumberSet_All
import quex.engine.analyzer.door_id_address_label as dial
from quex.engine.analyzer.door_id_address_label import DialDB
import quex.engine.loop.core as loop
from quex.constants import E_CharacterCountType, E_Op
from quex.blackboard import setup as Setup

NS_A = NumberSet.from_range(ord('A'), ord('A') + 1)
NS_B = NumberSet.from_range(ord('B'), ord('B') + 1)
NS_C = NumberSet.from_range(ord('C'), ord('C') + 1)
NS_D = NumberSet.from_range(ord('D'), ord('D') + 1)

dial_db = DialDB()

if "--hwut-info" in sys.argv:
    print "Loop: Get Loop Map."
    print "CHOICES: Plain, AppendixNoI, AppendixI, Split;"


def test(NsCaList, SM_list=[]):
    global dial_db
    Setup.buffer_encoding.source_set = NumberSet_All()
    ca_map = CountActionMap.from_list(NsCaList)
Ejemplo n.º 25
0
 def __init__(self, Name, ErrorRangeByCodeUnitDb):
     base.EncodingTrafo.__init__(self, Name,
                                 NumberSet.from_range(0, 0x110000),
                                 ErrorRangeByCodeUnitDb)
Ejemplo n.º 26
0
#! /usr/bin/env python
import sys
import os
sys.path.insert(0, os.environ["QUEX_PATH"])

from quex.engine.misc.interval_handling import Interval, NumberSet
from quex.constants import INTEGER_MAX

all = NumberSet.from_range(-INTEGER_MAX, INTEGER_MAX)

if "--hwut-info" in sys.argv:
    print "NumberSet: Inverse"
    print "CHOICES: 1, 2, serious;"
    sys.exit(0)

def test(NSet):
    print "# write output in temporary file: 'tmp'"    
    print "# plot with gnuplot:"
    print "# > plot \"tmp\" w l"
    
    print NSet.gnuplot_string(1)
    result = NSet.get_complement(all)
    result.assert_consistency()
    print result.gnuplot_string(0)

if "1" in sys.argv:
    test(NumberSet([Interval(10,20),   Interval(21,30),
                    Interval(50,70),   Interval(71,80),
                    Interval(80,81),   Interval(82,90),
                    Interval(90,100),  Interval(110,130),
                    Interval(150,170), Interval(171,190),
Ejemplo n.º 27
0
import sys
import os
sys.path.insert(0, os.environ["QUEX_PATH"])

from   quex.engine.counter                        import LineColumnCount, \
                                                         CountAction
from   quex.engine.state_machine.core             import StateMachine  
from   quex.engine.misc.interval_handling         import NumberSet, \
                                                         NumberSet_All
from   quex.engine.analyzer.door_id_address_label import dial_db
from   quex.engine.loop_counter                   import CountInfoMap, \
                                                         CountInfo
import quex.output.core.loop                      as     loop
from   quex.blackboard                            import E_CharacterCountType, \
                                                         setup as Setup
NS_A = NumberSet.from_range(ord('A'), ord('A') + 1)
NS_B = NumberSet.from_range(ord('B'), ord('B') + 1)
NS_C = NumberSet.from_range(ord('C'), ord('C') + 1)
NS_D = NumberSet.from_range(ord('D'), ord('D') + 1)

if "--hwut-info" in sys.argv:
    print "Loop: Get Loop Map."
    print "CHOICES: Plain, AppendixNoI, AppendixI, Split;"

def test(ci_list, SM_list=[]):
    Setup.buffer_codec.source_set = NumberSet_All()
    ci_map                     = CountInfoMap(ci_list, NumberSet.from_range(0, 100))
    iid_loop_exit              = dial_db.new_incidence_id()
    loop_map, appendix_sm_list = loop._get_loop_map(ci_map, SM_list, iid_loop_exit) 

    print
Ejemplo n.º 28
0
        if lei.appendix_sm_has_transitions_f
    ]


def print_this(AnalyzerList):
    print "#_[ Print %i analyzer(s) ]______________________________" % len(
        AnalyzerList)
    print
    for i, analyzer in enumerate(AnalyzerList):
        print "--( %i: init si = %i )-------------------------\n" % (
            i, analyzer.init_state_index)
        print analyzer


if encoding == "unicode":
    NS_A = NumberSet.from_range(ord('A'), ord('A') + 1)
    NS_B = NumberSet.from_range(ord('B'), ord('B') + 1)
    NS_C = NumberSet.from_range(ord('C'), ord('C') + 1)
    NS_D = NumberSet.from_range(ord('D'), ord('D') + 1)
    NS_E = NumberSet.from_range(ord('E'), ord('E') + 1)
else:
    NS_A = NumberSet.from_range(0x600, 0x601)
    NS_B = NumberSet.from_range(0x601, 0x602)
    NS_C = NumberSet.from_range(0x602, 0x603)
    NS_D = NumberSet.from_range(0x603, 0x604)
    NS_E = NumberSet.from_range(0x604, 0x605)

CA_0 = CountAction(E_CharacterCountType.COLUMN, 5)
CA_1 = CountAction(E_CharacterCountType.LINE, 1)
CA_2 = CountAction(E_CharacterCountType.GRID, 2)
CA_3 = CountAction(E_CharacterCountType.WHITESPACE, 3)
Ejemplo n.º 29
0
        if ta.door_id in done: continue
        assert len(ta.command_list) == 1
        cmd = ta.command_list[0]
        print "%s => %s" % (ta.door_id, cmd.content.router_element)
        done.add(ta.door_id)

def print_this(AnalyzerList):
    print "#_[ Print %i analyzer(s) ]______________________________" % len(AnalyzerList)
    print
    for i, analyzer in enumerate(AnalyzerList):
        print "--( %i: init si = %i )-------------------------\n" % (i, analyzer.init_state_index)
        print analyzer
        print_drop_out(analyzer)


NS_A = NumberSet.from_range(0x600, 0x601) # UTF8: D8 80 => 216, 128
NS_B = NumberSet.from_range(0x601, 0x602) # UTF8: D8 81 => 216, 129
NS_C = NumberSet.from_range(0x640, 0x641) # UTF8: D9 80 => 217, 128

appendix_sm_id = 4711L

if "loop" in sys.argv:
    loop_map = loop.LoopMap([
        TestLME(NS_A, dial.new_incidence_id(), None),
    ])
    column_n_per_code_unit = 5
elif "appendix" in sys.argv:
    loop_map = loop.LoopMap([
        TestLME(NS_A, dial.new_incidence_id(), appendix_sm_id), # appendix_sm_id
    ])
    column_n_per_code_unit = 5
Ejemplo n.º 30
0
 def __init__(self, Name, CodeUnitRange):
     base.EncodingTrafo.__init__(self, Name, NumberSet.from_range(0, 0x110000),
                                 CodeUnitRange)
Ejemplo n.º 31
0
 def set_all_character_set_UNIT_TEST(self, Begin, End):
     self.buffer_codec.source_set = NumberSet.from_range(Begin, End)