def buffer_codec_prepare(self, BufferCodecName, BufferCodecFileName=None, Module=None): """Determines: Setup.buffer_codec_name Setup.buffer_codec """ assert BufferCodecName == "unit-test" \ or self.__buffer_element_specification_done_f == True if BufferCodecName in ("utf8", "utf16"): assert Module is not None result = codec_db.CodecDynamicInfo(BufferCodecName, Module) elif BufferCodecFileName: os.path.splitext(os.path.basename(BufferCodecFileName)) try: os.path.splitext(os.path.basename(BufferCodecFileName)) except: error.log("cannot interpret string following '--codec-file'") result = codec_db.CodecTransformationInfo(FileName=BufferCodecFileName) elif BufferCodecName == "unicode": # (Still, 'icu' or 'iconv' may provide converted content, but ...) # If the internal buffer is 'unicode', then the pattern's state # machines are not converted. The requirement for the pattern's # range is the same as for the 'buffer element chunks'. result = codec_db.CodecInfo("unicode", NumberSet.from_range(0, self.get_character_value_limit()), NumberSet.from_range(0, self.get_character_value_limit())) elif BufferCodecName == "unit-test": result = codec_db.CodecInfo("unicode", NumberSet.from_range(-sys.maxint, sys.maxint), NumberSet.from_range(-sys.maxint, sys.maxint)) else: result = codec_db.CodecTransformationInfo(BufferCodecName) self.buffer_codec = result
def get_setup(L0, L1, FSM0, FSM1, FSM2): # SPECIALITIES: -- sm0 and sm1 have an intersection between their second # transition. # -- sm1 transits further upon acceptance. # -- sm2 has only one transition. ci_list = [ CountInfo(dial_db.new_incidence_id(), NumberSet.from_range(L0, L1), CountAction(E_CharacterCountType.COLUMN, 0)), ] # Generate State Machine that does not have any intersection with # the loop transitions. sm0 = StateMachine() si = sm0.add_transition(sm0.init_state_index, FSM0) si = sm0.add_transition(si, NS_A, AcceptanceF=True) sm0.states[si].mark_acceptance_id(dial_db.new_incidence_id()) sm1 = StateMachine() si0 = sm1.add_transition(sm1.init_state_index, FSM1) si = sm1.add_transition(si0, NS_A, AcceptanceF=True) iid1 = dial_db.new_incidence_id() sm1.states[si].mark_acceptance_id(iid1) si = sm1.add_transition(si, NS_B, si0) sm1.states[si].mark_acceptance_id(iid1) sm2 = StateMachine() si = sm2.add_transition(sm2.init_state_index, FSM2, AcceptanceF=True) sm2.states[si].mark_acceptance_id(dial_db.new_incidence_id()) return ci_list, [sm0, sm1, sm2]
def get_ca_list(L0, L1): ns_0 = NumberSet.from_range(L0, L1) return [ (ns_0, CountAction(E_CharacterCountType.COLUMN, 0)), # (ns_remainder, None), # (ns_remainder, CountAction(E_CharacterCountType.COLUMN, 4711)), ]
def __init__(self): EncodingTrafoBySplit.__init__(self, "utf16", CodeUnitRange=NumberSet.from_range(0, 0x10000)) self.error_range_code_unit0 = NumberSet([ Interval(0x0000, 0xDC00), Interval(0xE000, 0x10000) ]).get_complement(NumberSet_All()) self.error_range_code_unit1 = NumberSet([ Interval(0xDC00, 0xE000) ]).get_complement(NumberSet_All())
def _do_single(self, Code): """Unicode character is translated to itself. """ number_set = NumberSet.from_range(Code, Code+1) number_set.transform_by_table(self) # A single code element can only produce a single character! assert number_set.has_size_one() return [ number_set.get_the_only_element() ]
def test(ci_list, SM_list=[]): Setup.buffer_codec.source_set = NumberSet_All() ci_map = CountInfoMap(ci_list, NumberSet.from_range(0, 100)) iid_loop_exit = dial_db.new_incidence_id() loop_map, appendix_sm_list = loop._get_loop_map(ci_map, SM_list, iid_loop_exit) print print print general_checks(loop_map, appendix_sm_list) print_this(loop_map, appendix_sm_list)
def _do_single(self, Code): number_set = NumberSet.from_range(Code, Code + 1) if number_set.is_empty(): return -1 interval_list = number_set.get_intervals(PromiseToTreatWellF=True) assert len(interval_list) == 1 interval_sequence_list = self.get_interval_sequences(interval_list[0]) # A single code element can only produce a single interval sequence! assert len(interval_sequence_list) == 1 assert all(x.size() == 1 for x in interval_sequence_list[0]) return [x.begin for x in interval_sequence_list[0]]
def buffer_codec_prepare(self, BufferCodecName, BufferCodecFileName=None, Module=None): """Determines: Setup.buffer_codec_name Setup.buffer_codec """ assert BufferCodecName == "unit-test" \ or self.__buffer_element_specification_done_f == True if BufferCodecName in ("utf8", "utf16"): assert Module is not None result = codec_db.CodecDynamicInfo(BufferCodecName, Module) elif BufferCodecFileName: os.path.splitext(os.path.basename(BufferCodecFileName)) try: os.path.splitext(os.path.basename(BufferCodecFileName)) except: error.log("cannot interpret string following '--codec-file'") result = codec_db.CodecTransformationInfo( FileName=BufferCodecFileName) elif BufferCodecName == "unicode": # (Still, 'icu' or 'iconv' may provide converted content, but ...) # If the internal buffer is 'unicode', then the pattern's state # machines are not converted. The requirement for the pattern's # range is the same as for the 'buffer element chunks'. result = codec_db.CodecInfo( "unicode", NumberSet.from_range(0, self.get_character_value_limit()), NumberSet.from_range(0, self.get_character_value_limit())) elif BufferCodecName == "unit-test": result = codec_db.CodecInfo( "unicode", NumberSet.from_range(-sys.maxint, sys.maxint), NumberSet.from_range(-sys.maxint, sys.maxint)) else: result = codec_db.CodecTransformationInfo(BufferCodecName) self.buffer_codec = result
def __init__(self): drain_set = NumberSet.from_range(0, 0x100) EncodingTrafoBySplit.__init__(self, "utf8", CodeUnitRange=drain_set) self.UnchangedRange = 0x7F self.error_range_byte0 = NumberSet([ Interval(0b00000000, 0b01111111+1), Interval(0b11000000, 0b11011111+1), Interval(0b11100000, 0b11101111+1), Interval(0b11110000, 0b11110111+1), Interval(0b11111000, 0b11111011+1), Interval(0b11111100, 0b11111101+1), ]).get_complement(NumberSet_All()) self.error_range_byteN = NumberSet( Interval(0b10000000, 0b10111111+1) ).get_complement(NumberSet_All())
#! /usr/bin/env python import sys import os sys.path.insert(0, os.environ["QUEX_PATH"]) from quex.engine.misc.interval_handling import Interval, NumberSet all = NumberSet.from_range(-sys.maxint, sys.maxint) if "--hwut-info" in sys.argv: print "NumberSet: Inverse" print "CHOICES: 1, 2, serious;" sys.exit(0) def test(NSet): print "# write output in temporary file: 'tmp'" print "# plot with gnuplot:" print "# > plot \"tmp\" w l" print NSet.gnuplot_string(1) result = NSet.get_complement(all) result.assert_consistency() print result.gnuplot_string(0) if "1" in sys.argv: test( NumberSet([ Interval(10, 20),
def get_unicode_range(): return NumberSet.from_range(0, 0x110000)
def get_codec_element_range(): """Codec element's size is 1 byte.""" return NumberSet.from_range(0, 0x100)
def get_sm(SmId, Trigger): sm = StateMachine.from_IncidenceIdMap([ (NumberSet.from_range(Trigger, Trigger + 1), SmId) ]) sm.set_id(SmId) return sm
if k == 0: self.__cursor[k] += 2 if self.__cursor[k] < 8: break else: self.__cursor[k] += 1 if self.__cursor[k] < 3: break self.__cursor[k] = 1 k += 1 return result generator = NumberSetGenerator() all = NumberSet.from_range(-sys.maxint, sys.maxint) # Generate 100 NumberSets number_set_list = [] for i in range(100): result = generator.get() number_set_list.append(generator.get()) def test(N1, Op1, N2, Op2): global number_set_list the_tester = Tester(N1, Op1, N2, Op2) # Permutate all existing intervals against each other count_n = 0 for i, x in enumerate(number_set_list): for y in number_set_list[i + 1:]:
import sys import os sys.path.insert(0, os.environ["QUEX_PATH"]) from quex.engine.counter import LineColumnCount, \ CountAction from quex.engine.state_machine.core import StateMachine from quex.engine.misc.interval_handling import NumberSet, \ NumberSet_All from quex.engine.analyzer.door_id_address_label import dial_db from quex.engine.loop_counter import CountInfoMap, \ CountInfo import quex.output.core.loop as loop from quex.blackboard import E_CharacterCountType, \ setup as Setup NS_A = NumberSet.from_range(ord('A'), ord('A') + 1) NS_B = NumberSet.from_range(ord('B'), ord('B') + 1) NS_C = NumberSet.from_range(ord('C'), ord('C') + 1) NS_D = NumberSet.from_range(ord('D'), ord('D') + 1) if "--hwut-info" in sys.argv: print "Loop: Get Loop Map." print "CHOICES: Plain, AppendixNoI, AppendixI, Split;" def test(ci_list, SM_list=[]): Setup.buffer_codec.source_set = NumberSet_All() ci_map = CountInfoMap(ci_list, NumberSet.from_range(0, 100)) iid_loop_exit = dial_db.new_incidence_id() loop_map, appendix_sm_list = loop._get_loop_map(ci_map, SM_list, iid_loop_exit)
def get_codec_element_range(): """Codec element's size is 2 bytes.""" return NumberSet.from_range(0, 0x10000)
def _enter(result, begin, end, target_state_setup): entry = result.get(target_state_setup) if entry is None: result[target_state_setup] = NumberSet.from_range(begin, end) else: entry.quick_append_interval(Interval(begin, end))
def set_all_character_set_UNIT_TEST(self, Begin, End): self.buffer_codec.source_set = NumberSet.from_range(Begin, End)
return sm return [ get_sm(lei.appendix_sm_id, trigger) for trigger, lei in enumerate(LoopMap) if lei.appendix_sm_has_transitions_f ] def print_this(AnalyzerList): print "#_[ Print %i analyzer(s) ]______________________________" % len(AnalyzerList) print for i, analyzer in enumerate(AnalyzerList): print "--( %i: init si = %i )-------------------------\n" % (i, analyzer.init_state_index) print analyzer if encoding == "unicode": NS_A = NumberSet.from_range(ord('A'), ord('A') + 1) NS_B = NumberSet.from_range(ord('B'), ord('B') + 1) NS_C = NumberSet.from_range(ord('C'), ord('C') + 1) NS_D = NumberSet.from_range(ord('D'), ord('D') + 1) NS_E = NumberSet.from_range(ord('E'), ord('E') + 1) else: NS_A = NumberSet.from_range(0x600, 0x601) NS_B = NumberSet.from_range(0x601, 0x602) NS_C = NumberSet.from_range(0x602, 0x603) NS_D = NumberSet.from_range(0x603, 0x604) NS_E = NumberSet.from_range(0x604, 0x605) CA_0 = CountAction(E_CharacterCountType.COLUMN, 5) CA_1 = CountAction(E_CharacterCountType.LINE, 1) CA_2 = CountAction(E_CharacterCountType.GRID, 2) CA_3 = CountAction(E_CharacterCountType.WHITESPACE, 3)
import sys import os sys.path.insert(0, os.environ["QUEX_PATH"]) from quex.engine.counter import CountAction, \ CountActionMap from quex.engine.state_machine.core import DFA from quex.engine.misc.interval_handling import NumberSet, \ NumberSet_All import quex.engine.analyzer.door_id_address_label as dial from quex.engine.analyzer.door_id_address_label import DialDB import quex.engine.loop.core as loop from quex.constants import E_CharacterCountType, E_Op from quex.blackboard import setup as Setup NS_A = NumberSet.from_range(ord('A'), ord('A') + 1) NS_B = NumberSet.from_range(ord('B'), ord('B') + 1) NS_C = NumberSet.from_range(ord('C'), ord('C') + 1) NS_D = NumberSet.from_range(ord('D'), ord('D') + 1) dial_db = DialDB() if "--hwut-info" in sys.argv: print "Loop: Get Loop Map." print "CHOICES: Plain, AppendixNoI, AppendixI, Split;" def test(NsCaList, SM_list=[]): global dial_db Setup.buffer_encoding.source_set = NumberSet_All() ca_map = CountActionMap.from_list(NsCaList)
def __init__(self, Name, ErrorRangeByCodeUnitDb): base.EncodingTrafo.__init__(self, Name, NumberSet.from_range(0, 0x110000), ErrorRangeByCodeUnitDb)
#! /usr/bin/env python import sys import os sys.path.insert(0, os.environ["QUEX_PATH"]) from quex.engine.misc.interval_handling import Interval, NumberSet from quex.constants import INTEGER_MAX all = NumberSet.from_range(-INTEGER_MAX, INTEGER_MAX) if "--hwut-info" in sys.argv: print "NumberSet: Inverse" print "CHOICES: 1, 2, serious;" sys.exit(0) def test(NSet): print "# write output in temporary file: 'tmp'" print "# plot with gnuplot:" print "# > plot \"tmp\" w l" print NSet.gnuplot_string(1) result = NSet.get_complement(all) result.assert_consistency() print result.gnuplot_string(0) if "1" in sys.argv: test(NumberSet([Interval(10,20), Interval(21,30), Interval(50,70), Interval(71,80), Interval(80,81), Interval(82,90), Interval(90,100), Interval(110,130), Interval(150,170), Interval(171,190),
import sys import os sys.path.insert(0, os.environ["QUEX_PATH"]) from quex.engine.counter import LineColumnCount, \ CountAction from quex.engine.state_machine.core import StateMachine from quex.engine.misc.interval_handling import NumberSet, \ NumberSet_All from quex.engine.analyzer.door_id_address_label import dial_db from quex.engine.loop_counter import CountInfoMap, \ CountInfo import quex.output.core.loop as loop from quex.blackboard import E_CharacterCountType, \ setup as Setup NS_A = NumberSet.from_range(ord('A'), ord('A') + 1) NS_B = NumberSet.from_range(ord('B'), ord('B') + 1) NS_C = NumberSet.from_range(ord('C'), ord('C') + 1) NS_D = NumberSet.from_range(ord('D'), ord('D') + 1) if "--hwut-info" in sys.argv: print "Loop: Get Loop Map." print "CHOICES: Plain, AppendixNoI, AppendixI, Split;" def test(ci_list, SM_list=[]): Setup.buffer_codec.source_set = NumberSet_All() ci_map = CountInfoMap(ci_list, NumberSet.from_range(0, 100)) iid_loop_exit = dial_db.new_incidence_id() loop_map, appendix_sm_list = loop._get_loop_map(ci_map, SM_list, iid_loop_exit) print
if lei.appendix_sm_has_transitions_f ] def print_this(AnalyzerList): print "#_[ Print %i analyzer(s) ]______________________________" % len( AnalyzerList) print for i, analyzer in enumerate(AnalyzerList): print "--( %i: init si = %i )-------------------------\n" % ( i, analyzer.init_state_index) print analyzer if encoding == "unicode": NS_A = NumberSet.from_range(ord('A'), ord('A') + 1) NS_B = NumberSet.from_range(ord('B'), ord('B') + 1) NS_C = NumberSet.from_range(ord('C'), ord('C') + 1) NS_D = NumberSet.from_range(ord('D'), ord('D') + 1) NS_E = NumberSet.from_range(ord('E'), ord('E') + 1) else: NS_A = NumberSet.from_range(0x600, 0x601) NS_B = NumberSet.from_range(0x601, 0x602) NS_C = NumberSet.from_range(0x602, 0x603) NS_D = NumberSet.from_range(0x603, 0x604) NS_E = NumberSet.from_range(0x604, 0x605) CA_0 = CountAction(E_CharacterCountType.COLUMN, 5) CA_1 = CountAction(E_CharacterCountType.LINE, 1) CA_2 = CountAction(E_CharacterCountType.GRID, 2) CA_3 = CountAction(E_CharacterCountType.WHITESPACE, 3)
if ta.door_id in done: continue assert len(ta.command_list) == 1 cmd = ta.command_list[0] print "%s => %s" % (ta.door_id, cmd.content.router_element) done.add(ta.door_id) def print_this(AnalyzerList): print "#_[ Print %i analyzer(s) ]______________________________" % len(AnalyzerList) print for i, analyzer in enumerate(AnalyzerList): print "--( %i: init si = %i )-------------------------\n" % (i, analyzer.init_state_index) print analyzer print_drop_out(analyzer) NS_A = NumberSet.from_range(0x600, 0x601) # UTF8: D8 80 => 216, 128 NS_B = NumberSet.from_range(0x601, 0x602) # UTF8: D8 81 => 216, 129 NS_C = NumberSet.from_range(0x640, 0x641) # UTF8: D9 80 => 217, 128 appendix_sm_id = 4711L if "loop" in sys.argv: loop_map = loop.LoopMap([ TestLME(NS_A, dial.new_incidence_id(), None), ]) column_n_per_code_unit = 5 elif "appendix" in sys.argv: loop_map = loop.LoopMap([ TestLME(NS_A, dial.new_incidence_id(), appendix_sm_id), # appendix_sm_id ]) column_n_per_code_unit = 5
def __init__(self, Name, CodeUnitRange): base.EncodingTrafo.__init__(self, Name, NumberSet.from_range(0, 0x110000), CodeUnitRange)