def create_words_data(f): d = load_test_data("auxiliary/WordBreakTest.txt") test = [] for (c, i) in d: allchars = [cn for s in c for cn in s] test.append((allchars, c)) wtype = "&'static [(&'static str, &'static [&'static str])]" f.write(" // official Unicode test data\n") f.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt\n") unicode.emit_table(f, "TEST_WORD", test, wtype, True, showfun, True)
def create_sentence_data(f): d = load_test_data("auxiliary/SentenceBreakTest.txt") test = [] for (c, i) in d: allchars = [cn for s in c for cn in s] test.append((allchars, c)) wtype = "&'static [(&'static str, &'static [&'static str])]" f.write(" // official Unicode test data\n") f.write( " // http://www.unicode.org/Public/%s/ucd/auxiliary/SentenceBreakTest.txt\n" % unicode.UNICODE_VERSION_NUMBER) unicode.emit_table(f, "TEST_SENTENCE", test, wtype, True, showfun, True)
def create_grapheme_data(f): # rules 9.1 and 9.2 are for extended graphemes only optsplits = ['9.1', '9.2'] d = load_test_data("auxiliary/GraphemeBreakTest.txt", optsplits) test_same = [] test_diff = [] for (c, i) in d: allchars = [cn for s in c for cn in s] extgraphs = [] extwork = [] extwork.extend(c[0]) for n in range(0, len(i)): if i[n] in optsplits: extwork.extend(c[n + 1]) else: extgraphs.append(extwork) extwork = [] extwork.extend(c[n + 1]) # these are the extended grapheme clusters extgraphs.append(extwork) if extgraphs == c: test_same.append((allchars, c)) else: test_diff.append((allchars, extgraphs, c)) stype = "&'static [(&'static str, &'static [&'static str])]" dtype = "&'static [(&'static str, &'static [&'static str], &'static [&'static str])]" f.write(" // official Unicode test data\n") f.write( " // http://www.unicode.org/Public/%s/ucd/auxiliary/GraphemeBreakTest.txt\n" % unicode.UNICODE_VERSION_NUMBER) unicode.emit_table(f, "TEST_SAME", test_same, stype, True, showfun, True) unicode.emit_table(f, "TEST_DIFF", test_diff, dtype, True, showfun, True)
def create_grapheme_data(f): # rules 9.1 and 9.2 are for extended graphemes only optsplits = ['9.1','9.2'] d = load_test_data("auxiliary/GraphemeBreakTest.txt", optsplits) test_same = [] test_diff = [] for (c, i) in d: allchars = [cn for s in c for cn in s] extgraphs = [] extwork = [] extwork.extend(c[0]) for n in range(0,len(i)): if i[n] in optsplits: extwork.extend(c[n+1]) else: extgraphs.append(extwork) extwork = [] extwork.extend(c[n+1]) # these are the extended grapheme clusters extgraphs.append(extwork) if extgraphs == c: test_same.append((allchars, c)) else: test_diff.append((allchars, extgraphs, c)) stype = "&'static [(&'static str, &'static [&'static str])]" dtype = "&'static [(&'static str, &'static [&'static str], &'static [&'static str])]" f.write(" // official Unicode test data\n") f.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakTest.txt\n") unicode.emit_table(f, "TEST_SAME", test_same, stype, True, showfun, True) unicode.emit_table(f, "TEST_DIFF", test_diff, dtype, True, showfun, True)
return outls def showfun(gs): outstr = '(' gfirst = True for g in gs: if not gfirst: outstr += ',' gfirst = False outstr += '"' for ch in g: outstr += "\\u{%x}" % ch outstr += '"' outstr += ')' return outstr if __name__ == "__main__": d = load_test_data("NormalizationTest.txt") ntype = "&'static [(&'static str, &'static str, &'static str, &'static str, &'static str)]" with open("testdata.rs", "w") as nf: nf.write(unicode.preamble) nf.write("\n") nf.write(" // official Unicode test data\n") nf.write( " // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n" ) unicode.emit_table(nf, "TEST_NORM", d, ntype, True, showfun)
outls.append(groups) return outls def showfun(gs): outstr = "(" gfirst = True for g in gs: if not gfirst: outstr += "," gfirst = False outstr += '"' for ch in g: outstr += "\\u{%x}" % ch outstr += '"' outstr += ")" return outstr if __name__ == "__main__": d = load_test_data("NormalizationTest.txt") ntype = "&'static [(&'static str, &'static str, &'static str, &'static str, &'static str)]" with open("testdata.rs", "w") as nf: nf.write(unicode.preamble) nf.write("\n") nf.write(" // official Unicode test data\n") nf.write(" // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n") unicode.emit_table(nf, "TEST_NORM", d, ntype, True, showfun)