def create_words_data(f):
    d = load_test_data("auxiliary/WordBreakTest.txt")

    test = []

    for (c, i) in d:
        allchars = [cn for s in c for cn in s]
        test.append((allchars, c))

    wtype = "&'static [(&'static str, &'static [&'static str])]"
    f.write("    // official Unicode test data\n")
    f.write("    // http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt\n")
    unicode.emit_table(f, "TEST_WORD", test, wtype, True, showfun, True)
Example #2
0
def create_sentence_data(f):
    d = load_test_data("auxiliary/SentenceBreakTest.txt")

    test = []

    for (c, i) in d:
        allchars = [cn for s in c for cn in s]
        test.append((allchars, c))

    wtype = "&'static [(&'static str, &'static [&'static str])]"
    f.write("    // official Unicode test data\n")
    f.write(
        "    // http://www.unicode.org/Public/%s/ucd/auxiliary/SentenceBreakTest.txt\n"
        % unicode.UNICODE_VERSION_NUMBER)
    unicode.emit_table(f, "TEST_SENTENCE", test, wtype, True, showfun, True)
Example #3
0
def create_grapheme_data(f):
    # rules 9.1 and 9.2 are for extended graphemes only
    optsplits = ['9.1', '9.2']
    d = load_test_data("auxiliary/GraphemeBreakTest.txt", optsplits)

    test_same = []
    test_diff = []

    for (c, i) in d:
        allchars = [cn for s in c for cn in s]
        extgraphs = []
        extwork = []

        extwork.extend(c[0])
        for n in range(0, len(i)):
            if i[n] in optsplits:
                extwork.extend(c[n + 1])
            else:
                extgraphs.append(extwork)
                extwork = []
                extwork.extend(c[n + 1])

        # these are the extended grapheme clusters
        extgraphs.append(extwork)

        if extgraphs == c:
            test_same.append((allchars, c))
        else:
            test_diff.append((allchars, extgraphs, c))

    stype = "&'static [(&'static str, &'static [&'static str])]"
    dtype = "&'static [(&'static str, &'static [&'static str], &'static [&'static str])]"
    f.write("    // official Unicode test data\n")
    f.write(
        "    // http://www.unicode.org/Public/%s/ucd/auxiliary/GraphemeBreakTest.txt\n"
        % unicode.UNICODE_VERSION_NUMBER)
    unicode.emit_table(f, "TEST_SAME", test_same, stype, True, showfun, True)
    unicode.emit_table(f, "TEST_DIFF", test_diff, dtype, True, showfun, True)
def create_grapheme_data(f):
    # rules 9.1 and 9.2 are for extended graphemes only
    optsplits = ['9.1','9.2']
    d = load_test_data("auxiliary/GraphemeBreakTest.txt", optsplits)

    test_same = []
    test_diff = []

    for (c, i) in d:
        allchars = [cn for s in c for cn in s]
        extgraphs = []
        extwork = []

        extwork.extend(c[0])
        for n in range(0,len(i)):
            if i[n] in optsplits:
                extwork.extend(c[n+1])
            else:
                extgraphs.append(extwork)
                extwork = []
                extwork.extend(c[n+1])

        # these are the extended grapheme clusters
        extgraphs.append(extwork)

        if extgraphs == c:
            test_same.append((allchars, c))
        else:
            test_diff.append((allchars, extgraphs, c))

    stype = "&'static [(&'static str, &'static [&'static str])]"
    dtype = "&'static [(&'static str, &'static [&'static str], &'static [&'static str])]"
    f.write("    // official Unicode test data\n")
    f.write("    // http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakTest.txt\n")
    unicode.emit_table(f, "TEST_SAME", test_same, stype, True, showfun, True)
    unicode.emit_table(f, "TEST_DIFF", test_diff, dtype, True, showfun, True)
    return outls


def showfun(gs):
    outstr = '('
    gfirst = True
    for g in gs:
        if not gfirst:
            outstr += ','
        gfirst = False

        outstr += '"'
        for ch in g:
            outstr += "\\u{%x}" % ch
        outstr += '"'
    outstr += ')'
    return outstr


if __name__ == "__main__":
    d = load_test_data("NormalizationTest.txt")
    ntype = "&'static [(&'static str, &'static str, &'static str, &'static str, &'static str)]"
    with open("testdata.rs", "w") as nf:
        nf.write(unicode.preamble)
        nf.write("\n")
        nf.write("    // official Unicode test data\n")
        nf.write(
            "    // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n"
        )
        unicode.emit_table(nf, "TEST_NORM", d, ntype, True, showfun)
        outls.append(groups)

    return outls


def showfun(gs):
    outstr = "("
    gfirst = True
    for g in gs:
        if not gfirst:
            outstr += ","
        gfirst = False

        outstr += '"'
        for ch in g:
            outstr += "\\u{%x}" % ch
        outstr += '"'
    outstr += ")"
    return outstr


if __name__ == "__main__":
    d = load_test_data("NormalizationTest.txt")
    ntype = "&'static [(&'static str, &'static str, &'static str, &'static str, &'static str)]"
    with open("testdata.rs", "w") as nf:
        nf.write(unicode.preamble)
        nf.write("\n")
        nf.write("    // official Unicode test data\n")
        nf.write("    // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n")
        unicode.emit_table(nf, "TEST_NORM", d, ntype, True, showfun)