# import re import sys import codecs import unicodedata from xml.sax.saxutils import escape from romanizer import to_hepburn, to_kunrei alt_escape_rule = {u'"': u'"', u"'": u'''} def h(value): return escape(value, alt_escape_rule) if __name__ == '__main__': sys.stdout = codecs.getwriter('UTF-8')(sys.stdout) q = sys.argv[1].decode('UTF-8') if 1 < len(sys.argv) else u"""{query}""" q = unicodedata.normalize('NFKC', q) h_text = to_hepburn(q) k_text = to_kunrei(q) print u'''<?xml version="1.0"?> <items> <item uid="h_result" arg="{h_text}" valid="yes"> <title>{h_text}</title> <subtitle>Hepburn system</subtitle> </item> <item uid="k_result" arg="{k_text}" valid="yes"> <title>{k_text}</title> <subtitle>Kunrei system</subtitle> </item> </items>'''.format(h_text=h(h_text), k_text=h(k_text))
def test_kunrei(self): self.assertEqual(u"", to_kunrei(u"")) self.assertEqual(u"aiueo", to_kunrei(u"あいうえお")) self.assertEqual(u"gunma", to_kunrei(u"ぐんま")) self.assertEqual(u"tinpui", to_kunrei(u"チンプイ")) self.assertEqual(u"banpuobutikin", to_kunrei(u"バンプオブチキン")) self.assertEqual(u"kanda", to_kunrei(u"かんだ")) self.assertEqual(u"bôzuga zyôzuni byôbuni bôzuno ewokaita", to_kunrei(u"ボウズガ ジョウズニ ビョウブニ ボウズノ エヲカイタ")) self.assertEqual(u"beppu", to_kunrei(u"べっぷ")) self.assertEqual(u"kotti", to_kunrei(u"コッチ")) self.assertEqual(u"nnnnnn", to_kunrei(u"んんんンンン")) self.assertEqual(u"ôi tôru ônisi", to_kunrei(u"おーい とおる おおにし")) self.assertEqual(u"sin'iti", to_kunrei(u"シンイチ")) self.assertEqual(u"syônin", to_kunrei(u"しょーにん")) self.assertEqual(u"zyôhô", to_kunrei(u"じょうほう"))