Example #1
0
def test_on_UCS_sample_sets(Trafo, unicode_to_transformed_sequence):
    script_list = [
        "Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille", "Buginese", "Buhid",
        "Canadian_Aboriginal", "Cherokee", "Common",  "Cuneiform",  "Cypriot",  "Deseret",
        "Gothic",  "Greek",  
        "Hanunoo", "Hebrew", "Hiragana", "Inherited", "Kannada", "Han",  
        "Katakana", "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B", "Malayalam",
        "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Osmanya", "Ogham", "Old_Italic", "Old_Persian",
        "Phoenician",  "Shavian",  "Syloti_Nagri", 
        "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil", "Telugu", "Thaana", "Thai",
        "Tibetan", "Tifinagh", "Ugaritic", "Yi"
    ]
    sets = [ X(name) for name in script_list ]

    orig = get_combined_state_machine(map(lambda x: x.sm, sets))
    state_n_before, result = transform(Trafo, orig)

    # print result.get_graphviz_string(Option="hex")

    for set in sets:
        set.check(result, unicode_to_transformed_sequence)
    print "Translated %i groups without abortion on error (OK)" % len(sets)

    union = NumberSet()
    for nset in map(lambda set: set.charset, sets):
        union.unite_with(nset)

    inverse_union = NumberSet(Interval(0, 0x110000))
    inverse_union.subtract(union)
    # print inverse_union.get_string(Option="hex")
    check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True), 
                   unicode_to_transformed_sequence)
Example #2
0
def test_on_UCS_sample_sets(Trafo, unicode_to_transformed_sequence):
    script_list = [
        "Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille",
        "Buginese", "Buhid", "Canadian_Aboriginal", "Cherokee", "Common",
        "Cuneiform", "Cypriot", "Deseret", "Gothic", "Greek", "Hanunoo",
        "Hebrew", "Hiragana", "Inherited", "Kannada", "Han", "Katakana",
        "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B",
        "Malayalam", "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Osmanya",
        "Ogham", "Old_Italic", "Old_Persian", "Phoenician", "Shavian",
        "Syloti_Nagri", "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil",
        "Telugu", "Thaana", "Thai", "Tibetan", "Tifinagh", "Ugaritic", "Yi"
    ]
    sets = [X(name) for name in script_list]

    orig = combination.do(map(lambda x: x.sm, sets))
    state_n_before, result = transform(Trafo, orig)

    # print result.get_graphviz_string(Option="hex")

    for set in sets:
        set.check(result, unicode_to_transformed_sequence)
    print "Translated %i groups without abortion on error (OK)" % len(sets)

    union = NumberSet()
    for nset in map(lambda set: set.charset, sets):
        union.unite_with(nset)

    inverse_union = NumberSet(Interval(0, 0x110000))
    inverse_union.subtract(union)
    # print inverse_union.get_string(Option="hex")
    check_negative(result,
                   inverse_union.get_intervals(PromiseToTreatWellF=True),
                   unicode_to_transformed_sequence)
    "Buhid",
    "Canadian_Aboriginal",
    "Cherokee",
    "Syloti_Nagri",
    "Syriac",
    "Tagalog",
    "Tagbanwa",
    "Tai_Le",
    "Yi",
])

orig = get_combined_state_machine(map(lambda x: x.sm, sets))
print "# Number of states in state machine:"
print "#   Unicode:       %i" % len(orig.states)
result = trafo.do(orig)
print "#   UTF8-Splitted: %i" % len(result.states)

# print result.get_graphviz_string(Option="hex")

for set in sets:
    set.check(result)

union = NumberSet()
for nset in map(lambda set: set.charset, sets):
    union.unite_with(nset)

inverse_union = NumberSet(Interval(0, 0x110000))
inverse_union.subtract(union)
# print inverse_union.get_string(Option="hex")
check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True))
            for cmd in result.states[s_idx].single_entry:
                assert not cmd.is_acceptance()

    print " (OK)"

sets = map(lambda name: X(name),
        ["Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille",
            "Hanunoo", "Hebrew", "Hiragana", "Inherited", "Kannada",
            "Katakana", "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B", "Malayalam",
            "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Ogham", "Old_Italic", "Old_Persian",
            "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil", "Telugu", "Thaana", "Thai",
            "Tibetan", "Tifinagh", "Ugaritic", "Yi"])

orig = get_combined_state_machine(map(lambda x: x.sm, sets))
print "Number of states in state machine:"
print "   Unicode:       %i" % len(orig.states)
result = trafo.do(orig)
print "   UTF8-Splitted: %i" % len(result.states)

for set in sets:
    set.check(result)

union = NumberSet()
for nset in map(lambda set: set.charset, sets):
    union.unite_with(nset)

inverse_union = NumberSet(Interval(0, 0x110000))
inverse_union.subtract(union)
# print inverse_union.get_string(Option="hex")
check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True))