Ejemplo n.º 1
0
def test_on_UCS_sample_sets(Trafo, unicode_to_transformed_sequence):
    script_list = [
        "Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille", "Buginese", "Buhid",
        "Canadian_Aboriginal", "Cherokee", "Common",  "Cuneiform",  "Cypriot",  "Deseret",
        "Gothic",  "Greek",  
        "Hanunoo", "Hebrew", "Hiragana", "Inherited", "Kannada", "Han",  
        "Katakana", "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B", "Malayalam",
        "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Osmanya", "Ogham", "Old_Italic", "Old_Persian",
        "Phoenician",  "Shavian",  "Syloti_Nagri", 
        "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil", "Telugu", "Thaana", "Thai",
        "Tibetan", "Tifinagh", "Ugaritic", "Yi"
    ]
    sets = [ X(name) for name in script_list ]

    orig = get_combined_state_machine(map(lambda x: x.sm, sets))
    state_n_before, result = transform(Trafo, orig)

    # print result.get_graphviz_string(Option="hex")

    for set in sets:
        set.check(result, unicode_to_transformed_sequence)
    print "Translated %i groups without abortion on error (OK)" % len(sets)

    union = NumberSet()
    for nset in map(lambda set: set.charset, sets):
        union.unite_with(nset)

    inverse_union = NumberSet(Interval(0, 0x110000))
    inverse_union.subtract(union)
    # print inverse_union.get_string(Option="hex")
    check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True), 
                   unicode_to_transformed_sequence)
Ejemplo n.º 2
0
def test_on_UCS_sample_sets(Trafo, unicode_to_transformed_sequence):
    script_list = [
        "Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille",
        "Buginese", "Buhid", "Canadian_Aboriginal", "Cherokee", "Common",
        "Cuneiform", "Cypriot", "Deseret", "Gothic", "Greek", "Hanunoo",
        "Hebrew", "Hiragana", "Inherited", "Kannada", "Han", "Katakana",
        "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B",
        "Malayalam", "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Osmanya",
        "Ogham", "Old_Italic", "Old_Persian", "Phoenician", "Shavian",
        "Syloti_Nagri", "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil",
        "Telugu", "Thaana", "Thai", "Tibetan", "Tifinagh", "Ugaritic", "Yi"
    ]
    sets = [X(name) for name in script_list]

    orig = get_combined_state_machine(map(lambda x: x.sm, sets))
    state_n_before, result = transform(Trafo, orig)

    # print result.get_graphviz_string(Option="hex")

    for set in sets:
        set.check(result, unicode_to_transformed_sequence)
    print "Translated %i groups without abortion on error (OK)" % len(sets)

    union = NumberSet()
    for nset in map(lambda set: set.charset, sets):
        union.unite_with(nset)

    inverse_union = NumberSet(Interval(0, 0x110000))
    inverse_union.subtract(union)
    # print inverse_union.get_string(Option="hex")
    check_negative(result,
                   inverse_union.get_intervals(PromiseToTreatWellF=True),
                   unicode_to_transformed_sequence)
Ejemplo n.º 3
0
Archivo: loop.py Proyecto: xxyzzzq/quex
 def combined(appendix_sm_db, SmList):
     sm_ulist = unique(SmList)
     id_key   = tuple(sorted(list(set(sm.get_id() for sm in sm_ulist))))
     entry = appendix_sm_db.get(id_key)
     if entry is None:
         entry = get_combined_state_machine(sm_ulist,
                                            AlllowInitStateAcceptF=True)
         appendix_sm_db[id_key] = entry
     return entry.get_id()
Ejemplo n.º 4
0
 def combined(appendix_sm_db, SmList):
     sm_ulist = unique(SmList)
     id_key = tuple(sorted(list(set(sm.get_id() for sm in sm_ulist))))
     entry = appendix_sm_db.get(id_key)
     if entry is None:
         entry = get_combined_state_machine(sm_ulist,
                                            AlllowInitStateAcceptF=True)
         appendix_sm_db[id_key] = entry
     return entry.get_id()
Ejemplo n.º 5
0
def prepare(PatternStringList, GetPreContextSM_F=False):
    pattern_list = map(lambda x: regex.do(x, {}), PatternStringList)
    for pattern in pattern_list:
        pattern.mount_post_context_sm()
        pattern.mount_pre_context_sm()

    if GetPreContextSM_F:
        state_machine_list = [ pattern.pre_context_sm for pattern in pattern_list ]
    else:
        state_machine_list = [ pattern.sm for pattern in pattern_list ]

    sm = get_combined_state_machine(state_machine_list, False) # May be 'True' later.
    return sm.normalized_clone()
Ejemplo n.º 6
0
Archivo: help.py Proyecto: xxyzzzq/quex
def prepare(PatternStringList, GetPreContextSM_F=False):
    pattern_list = map(lambda x: regex.do(x, {}), PatternStringList)
    for pattern in pattern_list:
        pattern.mount_post_context_sm()
        pattern.mount_pre_context_sm()

    if GetPreContextSM_F:
        state_machine_list = [
            pattern.pre_context_sm for pattern in pattern_list
        ]
    else:
        state_machine_list = [pattern.sm for pattern in pattern_list]

    sm = get_combined_state_machine(state_machine_list,
                                    False)  # May be 'True' later.
    return sm.normalized_clone()
    "Phoenician",
    "Shavian",
    "Ugaritic",
    "Buginese",
    "Buhid",
    "Canadian_Aboriginal",
    "Cherokee",
    "Syloti_Nagri",
    "Syriac",
    "Tagalog",
    "Tagbanwa",
    "Tai_Le",
    "Yi",
])

orig = get_combined_state_machine(map(lambda x: x.sm, sets))
print "# Number of states in state machine:"
print "#   Unicode:       %i" % len(orig.states)
result = trafo.do(orig)
print "#   UTF8-Splitted: %i" % len(result.states)

# print result.get_graphviz_string(Option="hex")

for set in sets:
    set.check(result)

union = NumberSet()
for nset in map(lambda set: set.charset, sets):
    union.unite_with(nset)

inverse_union = NumberSet(Interval(0, 0x110000))
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import os
sys.path.insert(0, os.environ["QUEX_PATH"])

import quex.input.regular_expression.engine as regex
from quex.engine.misc.interval_handling import NumberSet, Interval
import quex.engine.state_machine.transformation.utf8_state_split as trafo
from quex.engine.state_machine.transformation.utf8_state_split import unicode_to_utf8
import quex.input.regular_expression.engine as regex
from quex.engine.state_machine.engine_state_machine_set import get_combined_state_machine

if "--hwut-info" in sys.argv:
    print "UTF8 State Split: Larger Number Sets"

sm1 = regex.do("[ΆΈΉΊΌΎ-Ϋ]+", {}).sm
sm2 = regex.do("[ \\t\\n]", {}).sm
result = trafo.do(get_combined_state_machine([sm1, sm2]))
for line in result.get_graphviz_string(NormalizeF=True,
                                       Option="hex").splitlines():
    if line.find("digraph") != -1:
        print "digraph state_machine {"
    else:
        print line
Ejemplo n.º 9
0
# -*- coding: utf8 -*-
import os
import sys
sys.path.insert(0, os.environ["QUEX_PATH"])

import quex.input.regular_expression.engine as regex
from quex.engine.state_machine.engine_state_machine_set import get_combined_state_machine
import quex.engine.analyzer.engine_supply_factory as engine
from quex.blackboard import E_InputActions
import help

from operator import attrgetter

if "--hwut-info" in sys.argv:
    print "Track Analyzis: Backward Input Position Detection;"
    sys.exit()

# There are no 'special cases'
pattern_list = [
    'ax',
]

state_machine_list = map(lambda x: regex.do(x, {}).sm, pattern_list)
sm = get_combined_state_machine(state_machine_list,
                                False)  # May be 'True' later.
sm = sm.normalized_clone()

# For DEBUG purposes: specify 'DRAW' on command line (in sys.argv)
help.if_DRAW_in_sys_argv(sm)
help.test(sm, engine.Class_BACKWARD_INPUT_POSITION(0))
# -*- coding: utf8 -*-
import os
import sys
sys.path.insert(0, os.environ["QUEX_PATH"])

import quex.input.regular_expression.engine  as regex
from   quex.engine.state_machine.engine_state_machine_set            import get_combined_state_machine
import quex.engine.analyzer.engine_supply_factory      as     engine
from   quex.blackboard                       import E_InputActions
import help

from   operator import attrgetter

if "--hwut-info" in sys.argv:
    print "Track Analyzis: Backward Input Position Detection;"
    sys.exit()

# There are no 'special cases'
pattern_list = [
    'ax',        
]

state_machine_list = map(lambda x: regex.do(x, {}).sm, pattern_list)
sm                 = get_combined_state_machine(state_machine_list, False) # May be 'True' later.
sm                 = sm.normalized_clone()

# For DEBUG purposes: specify 'DRAW' on command line (in sys.argv)
help.if_DRAW_in_sys_argv(sm)
help.test(sm, engine.Class_BACKWARD_INPUT_POSITION(0))

Ejemplo n.º 11
0
            # An acceptance state cannot be reached by a unicode value in ImpossibleIntervals
            for cmd in result.states[s_idx].single_entry:
                assert not cmd.is_acceptance()

    print " (OK)"

sets = map(lambda name: X(name),
        ["Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille",
            "Hanunoo", "Hebrew", "Hiragana", "Inherited", "Kannada",
            "Katakana", "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B", "Malayalam",
            "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Ogham", "Old_Italic", "Old_Persian",
            "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil", "Telugu", "Thaana", "Thai",
            "Tibetan", "Tifinagh", "Ugaritic", "Yi"])

orig = get_combined_state_machine(map(lambda x: x.sm, sets))
print "Number of states in state machine:"
print "   Unicode:       %i" % len(orig.states)
result = trafo.do(orig)
print "   UTF8-Splitted: %i" % len(result.states)

for set in sets:
    set.check(result)

union = NumberSet()
for nset in map(lambda set: set.charset, sets):
    union.unite_with(nset)

inverse_union = NumberSet(Interval(0, 0x110000))
inverse_union.subtract(union)
# print inverse_union.get_string(Option="hex")
Ejemplo n.º 12
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import os
sys.path.insert(0, os.environ["QUEX_PATH"])

import quex.input.regular_expression.engine        as regex
from   quex.engine.misc.interval_handling               import NumberSet, Interval
import quex.engine.state_machine.transformation.utf8_state_split  as trafo
from   quex.engine.state_machine.transformation.utf8_state_split  import unicode_to_utf8
import quex.input.regular_expression.engine        as regex
from   quex.engine.state_machine.engine_state_machine_set                  import get_combined_state_machine

if "--hwut-info" in sys.argv:
    print "UTF8 State Split: Larger Number Sets"


sm1 = regex.do("[ΆΈΉΊΌΎ-Ϋ]+", {}).sm
sm2 = regex.do("[ \\t\\n]", {}).sm
result = trafo.do(get_combined_state_machine([sm1, sm2]))
for line in result.get_graphviz_string(NormalizeF=True, Option="hex").splitlines():
    if line.find("digraph") != -1:
        print "digraph state_machine {"
    else:
        print line