Esempio n. 1
0
def test_on_UCS_sample_sets(Trafo, unicode_to_transformed_sequence):
    script_list = [
        "Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille",
        "Buginese", "Buhid", "Canadian_Aboriginal", "Cherokee", "Common",
        "Cuneiform", "Cypriot", "Deseret", "Gothic", "Greek", "Hanunoo",
        "Hebrew", "Hiragana", "Inherited", "Kannada", "Han", "Katakana",
        "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B",
        "Malayalam", "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Osmanya",
        "Ogham", "Old_Italic", "Old_Persian", "Phoenician", "Shavian",
        "Syloti_Nagri", "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil",
        "Telugu", "Thaana", "Thai", "Tibetan", "Tifinagh", "Ugaritic", "Yi"
    ]
    sets = [X(name) for name in script_list]

    orig = combination.do(map(lambda x: x.sm, sets))
    state_n_before, result = transform(Trafo, orig)

    # print result.get_graphviz_string(Option="hex")

    for set in sets:
        set.check(result, unicode_to_transformed_sequence)
    print "Translated %i groups without abortion on error (OK)" % len(sets)

    union = NumberSet()
    for nset in map(lambda set: set.charset, sets):
        union.unite_with(nset)

    inverse_union = NumberSet(Interval(0, 0x110000))
    inverse_union.subtract(union)
    # print inverse_union.get_string(Option="hex")
    check_negative(result,
                   inverse_union.get_intervals(PromiseToTreatWellF=True),
                   unicode_to_transformed_sequence)
Esempio n. 2
0
def test_on_UCS_sample_sets(Trafo, unicode_to_transformed_sequence):
    script_list = [
        "Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille", "Buginese", "Buhid",
        "Canadian_Aboriginal", "Cherokee", "Common",  "Cuneiform",  "Cypriot",  "Deseret",
        "Gothic",  "Greek",  
        "Hanunoo", "Hebrew", "Hiragana", "Inherited", "Kannada", "Han",  
        "Katakana", "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B", "Malayalam",
        "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Osmanya", "Ogham", "Old_Italic", "Old_Persian",
        "Phoenician",  "Shavian",  "Syloti_Nagri", 
        "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil", "Telugu", "Thaana", "Thai",
        "Tibetan", "Tifinagh", "Ugaritic", "Yi"
    ]
    sets = [ X(name) for name in script_list ]

    orig = get_combined_state_machine(map(lambda x: x.sm, sets))
    state_n_before, result = transform(Trafo, orig)

    # print result.get_graphviz_string(Option="hex")

    for set in sets:
        set.check(result, unicode_to_transformed_sequence)
    print "Translated %i groups without abortion on error (OK)" % len(sets)

    union = NumberSet()
    for nset in map(lambda set: set.charset, sets):
        union.unite_with(nset)

    inverse_union = NumberSet(Interval(0, 0x110000))
    inverse_union.subtract(union)
    # print inverse_union.get_string(Option="hex")
    check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True), 
                   unicode_to_transformed_sequence)
    "Buhid",
    "Canadian_Aboriginal",
    "Cherokee",
    "Syloti_Nagri",
    "Syriac",
    "Tagalog",
    "Tagbanwa",
    "Tai_Le",
    "Yi",
])

orig = get_combined_state_machine(map(lambda x: x.sm, sets))
print "# Number of states in state machine:"
print "#   Unicode:       %i" % len(orig.states)
result = trafo.do(orig)
print "#   UTF8-Splitted: %i" % len(result.states)

# print result.get_graphviz_string(Option="hex")

for set in sets:
    set.check(result)

union = NumberSet()
for nset in map(lambda set: set.charset, sets):
    union.unite_with(nset)

inverse_union = NumberSet(Interval(0, 0x110000))
inverse_union.subtract(union)
# print inverse_union.get_string(Option="hex")
check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True))
Esempio n. 4
0
def do(Title, func, PlotF=True):
    global A6

    A0 = NumberSet([Interval(10, 20), Interval(21, 30)])
    B0 = NumberSet([Interval(0, 40)])
    func("(a) one interval overlaps all of the others", A0, B0)

    A1 = NumberSet([Interval(50, 70), Interval(71, 80)])
    B1 = NumberSet([Interval(40, 60)])
    func("(b) one interval overlaps the lower of the others", A1, B1)

    A2 = NumberSet([Interval(90, 100), Interval(11, 130)])
    B2 = NumberSet([Interval(120, 140)])
    func("(c) one interval overlaps the upper of the others", A2, B2)

    A3 = NumberSet([Interval(150, 170), Interval(171, 190)])
    B3 = NumberSet([Interval(160, 180)])
    func("(d) one interval overlaps the middle of the others", A3, B3)

    A4 = NumberSet([Interval(200, 230), Interval(231, 240)])
    B4 = NumberSet([Interval(220, 250)])
    func("(e) one interval overlaps the 1st a little, the second totally", A4,
         B4)

    A5 = NumberSet([Interval(250, 260), Interval(261, 280)])
    B5 = NumberSet([Interval(240, 270)])
    func("(d) one interval overlaps the 2nd a little, the first totally", A5,
         B5)

    A6 = NumberSet()
    A6 = A6.union(A0).union(A1).union(A2).union(A3).union(A4).union(A5)
    B6 = NumberSet()
    B6 = B6.union(B0).union(B1).union(B2).union(B3).union(B4).union(B5)
    func("(e) all together", A6, B6)

    if not PlotF: return
    print "# write output in temporary file: 'tmp'"
    print "# plot with gnuplot:"
    print "# > plot \"tmp\" w l"

    print A6.gnuplot_string(4)
    print B6.gnuplot_string(3)
    if Title == "UNION":
        print A6.union(B6).gnuplot_string(1)
        print B6.union(A6).gnuplot_string(0)
    elif Title == "INTERSECTION":
        print A6.intersection(B6).gnuplot_string(1)
        print B6.intersection(A6).gnuplot_string(0)
    elif Title == "DIFFERENCE":
        print A6.difference(B6).gnuplot_string(1)
        print B6.difference(A6).gnuplot_string(0)
    elif Title == "CUT_INTERVAL":
        X = deepcopy(A6)
        for interval in B6.get_intervals():
            X.cut_interval(interval)
        print X.gnuplot_string(1)
        Y = deepcopy(B6)
        for interval in A6.get_intervals():
            Y.cut_interval(interval)
        print Y.gnuplot_string(0)
    elif Title == "ADD_INTERVAL":
        X = deepcopy(A6)
        for interval in B6.get_intervals():
            X.add_interval(interval)
        print X.gnuplot_string(1)
        Y = deepcopy(B6)
        for interval in A6.get_intervals():
            Y.add_interval(interval)
        print Y.gnuplot_string(0)
    elif Title == "CLEAN":
        X = deepcopy(A6)
        for interval in B6.get_intervals():
            X.quick_append_interval(interval)
        X.clean()
        print X.gnuplot_string(1)

        X = deepcopy(B6)
        for interval in A6.get_intervals():
            X.quick_append_interval(interval)
        X.clean()
        print X.gnuplot_string(0)
            for cmd in result.states[s_idx].single_entry:
                assert not cmd.is_acceptance()

    print " (OK)"

sets = map(lambda name: X(name),
        ["Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille",
            "Hanunoo", "Hebrew", "Hiragana", "Inherited", "Kannada",
            "Katakana", "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B", "Malayalam",
            "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Ogham", "Old_Italic", "Old_Persian",
            "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil", "Telugu", "Thaana", "Thai",
            "Tibetan", "Tifinagh", "Ugaritic", "Yi"])

orig = get_combined_state_machine(map(lambda x: x.sm, sets))
print "Number of states in state machine:"
print "   Unicode:       %i" % len(orig.states)
result = trafo.do(orig)
print "   UTF8-Splitted: %i" % len(result.states)

for set in sets:
    set.check(result)

union = NumberSet()
for nset in map(lambda set: set.charset, sets):
    union.unite_with(nset)

inverse_union = NumberSet(Interval(0, 0x110000))
inverse_union.subtract(union)
# print inverse_union.get_string(Option="hex")
check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True))