def test_sentence(self):
     self.assertEqual(
         avro.parse("ami banglay gan gai"), utf("আমি বাংলায় গান গাই"))
     self.assertEqual(
         avro.parse(
             "amader valObasa hoye gel ghas, kheye gel goru ar diye gelo ba^sh"),
         utf("আমাদের ভালোবাসা হয়ে গেল ঘাস, খেয়ে গেল গরু আর দিয়ে গেল বাঁশ"))
Example #2
0
 def test_sentence(self):
     self.assertEqual(avro.parse("ami banglay gan gai"),
                      utf("আমি বাংলায় গান গাই"))
     self.assertEqual(
         avro.parse(
             "amader valObasa hoye gel ghas, kheye gel goru ar diye gelo ba^sh"
         ),
         utf("আমাদের ভালোবাসা হয়ে গেল ঘাস, খেয়ে গেল গরু আর দিয়ে গেল বাঁশ"))
Example #3
0
    def test_non_ascii(self):
        """Test parser response for non ascii characters

        Parser should return any non-ascii characters passed to it

        """
        self.assertEqual(utf('ব'), avro.parse('ব'))
        self.assertEqual(utf('অভ্র'), avro.parse('অভ্র'))
        # mixed string
        self.assertEqual(utf('বআবা গো'), avro.parse('বaba gO'))
        self.assertEqual(utf('আমি বাংলায় গান গাই'),
                         avro.parse('aমি বাংলায় gaন গাi'))
Example #4
0
    def test_non_ascii(self):
        """Test parser response for non ascii characters

        Parser should return any non-ascii characters passed to it

        """
        self.assertEquals(utf('ব'), avro.parse('ব'))
        self.assertEquals(utf('অভ্র'), avro.parse('অভ্র'))
        # mixed string
        self.assertEquals(utf('বআবা গো'), avro.parse('বaba gO'))
        self.assertEquals(utf('আমি বাংলায় গান গাই'),
                          avro.parse('aমি বাংলায় gaন গাi'))
Example #5
0
    def test_patterns_without_rules_not_from_config(self):
        """Tests all patterns not from config that don't have rules

        This test is done in addition to
        test_patterns_without_rules_from_config() to ensure that text
        passed manually to avro.parse are properly parsed when they
        don't exact match a pattern that has no rules specified.

        """
        # Test some conjunctions
        self.assertEquals(utf("ভ্ল"), avro.parse("bhl"))
        self.assertEquals(utf("ব্ধ"), avro.parse("bdh"))
        self.assertEquals(utf("ব্ধ"), avro.parse("bdh"))
        self.assertEquals(utf("ড্ড"), avro.parse("DD"))
        # stunned stork!
        self.assertEquals(utf("স্তব্ধ বক"), avro.parse("stbdh bk"))
Example #6
0
    def test_patterns_without_rules_not_from_config(self):
        """Tests all patterns not from config that don't have rules

        This test is done in addition to
        test_patterns_without_rules_from_config() to ensure that text
        passed manually to avro.parse are properly parsed when they
        don't exact match a pattern that has no rules specified.

        """
        # Test some conjunctions
        self.assertEquals(utf("ভ্ল"), avro.parse("bhl"))
        self.assertEquals(utf("ব্ধ"), avro.parse("bdh"))
        self.assertEquals(utf("ব্ধ"), avro.parse("bdh"))
        self.assertEquals(utf("ড্ড"), avro.parse("DD"))
        # stunned stork!
        self.assertEquals(utf("স্তব্ধ বক"),
                          avro.parse("stbdh bk"))
Example #7
0
 def test_basic(self):
     self.assertEqual(avro.parse("bhl"), utf("ভ্ল"))
     self.assertEqual(avro.parse("bj"), utf("ব্জ"))
     self.assertEqual(avro.parse("bd"), utf("ব্দ"))
     self.assertEqual(avro.parse("bb"), utf("ব্ব"))
     self.assertEqual(avro.parse("bl"), utf("ব্ল"))
     self.assertEqual(avro.parse("bh"), utf("ভ"))
     self.assertEqual(avro.parse("vl"), utf("ভ্ল"))
     self.assertEqual(avro.parse("b"), utf("ব"))
     self.assertEqual(avro.parse("v"), utf("ভ"))
     self.assertEqual(avro.parse("cNG"), utf("চ্ঞ"))
     self.assertEqual(avro.parse("cch"), utf("চ্ছ"))
     self.assertEqual(avro.parse("cc"), utf("চ্চ"))
     self.assertEqual(avro.parse("ch"), utf("ছ"))
     self.assertEqual(avro.parse("c"), utf("চ"))
     self.assertEqual(avro.parse("dhn"), utf("ধ্ন"))
     self.assertEqual(avro.parse("dhm"), utf("ধ্ম"))
     self.assertEqual(avro.parse("dgh"), utf("দ্ঘ"))
     self.assertEqual(avro.parse("ddh"), utf("দ্ধ"))
     self.assertEqual(avro.parse("dbh"), utf("দ্ভ"))
     self.assertEqual(avro.parse("dv"), utf("দ্ভ"))
     self.assertEqual(avro.parse("dm"), utf("দ্ম"))
     self.assertEqual(avro.parse("DD"), utf("ড্ড"))
     self.assertEqual(avro.parse("Dh"), utf("ঢ"))
     self.assertEqual(avro.parse("dh"), utf("ধ"))
     self.assertEqual(avro.parse("dg"), utf("দ্গ"))
     self.assertEqual(avro.parse("dd"), utf("দ্দ"))
     self.assertEqual(avro.parse("D"), utf("ড"))
     self.assertEqual(avro.parse("d"), utf("দ"))
     self.assertEqual(avro.parse("..."), utf("..."))
     self.assertEqual(avro.parse(".`"), utf("."))
     self.assertEqual(avro.parse(".."), utf("।।"))
     self.assertEqual(avro.parse("."), utf("।"))
     self.assertEqual(avro.parse("ghn"), utf("ঘ্ন"))
     self.assertEqual(avro.parse("Ghn"), utf("ঘ্ন"))
     self.assertEqual(avro.parse("gdh"), utf("গ্ধ"))
     self.assertEqual(avro.parse("gN"), utf("গ্ণ"))
     self.assertEqual(avro.parse("GN"), utf("গ্ণ"))
     self.assertEqual(avro.parse("gn"), utf("গ্ন"))
     self.assertEqual(avro.parse("gm"), utf("গ্ম"))
     self.assertEqual(avro.parse("Gm"), utf("গ্ম"))
     self.assertEqual(avro.parse("gl"), utf("গ্ল"))
     self.assertEqual(avro.parse("Gl"), utf("গ্ল"))
     self.assertEqual(avro.parse("gg"), utf("জ্ঞ"))
     self.assertEqual(avro.parse("GG"), utf("জ্ঞ"))
     self.assertEqual(avro.parse("Gg"), utf("জ্ঞ"))
     self.assertEqual(avro.parse("gG"), utf("জ্ঞ"))
     self.assertEqual(avro.parse("gh"), utf("ঘ"))
     self.assertEqual(avro.parse("Gh"), utf("ঘ"))
     self.assertEqual(avro.parse("g"), utf("গ"))
     self.assertEqual(avro.parse("hN"), utf("হ্ণ"))
     self.assertEqual(avro.parse("hn"), utf("হ্ন"))
     self.assertEqual(avro.parse("hm"), utf("হ্ম"))
     self.assertEqual(avro.parse("hl"), utf("হ্ল"))
     self.assertEqual(avro.parse("h"), utf("হ"))
     self.assertEqual(avro.parse("jjh"), utf("জ্ঝ"))
     self.assertEqual(avro.parse("jNG"), utf("জ্ঞ"))
     self.assertEqual(avro.parse("jh"), utf("ঝ"))
     self.assertEqual(avro.parse("jj"), utf("জ্জ"))
     self.assertEqual(avro.parse("j"), utf("জ"))
     self.assertEqual(avro.parse("J"), utf("জ"))
     self.assertEqual(avro.parse("kkhN"), utf("ক্ষ্ণ"))
     self.assertEqual(avro.parse("kShN"), utf("ক্ষ্ণ"))
     self.assertEqual(avro.parse("kkhm"), utf("ক্ষ্ম"))
     self.assertEqual(avro.parse("kShm"), utf("ক্ষ্ম"))
     self.assertEqual(avro.parse("kxN"), utf("ক্ষ্ণ"))
     self.assertEqual(avro.parse("kxm"), utf("ক্ষ্ম"))
     self.assertEqual(avro.parse("kkh"), utf("ক্ষ"))
     self.assertEqual(avro.parse("kSh"), utf("ক্ষ"))
     self.assertEqual(avro.parse("ksh"), utf("কশ"))
     self.assertEqual(avro.parse("kx"), utf("ক্ষ"))
     self.assertEqual(avro.parse("kk"), utf("ক্ক"))
     self.assertEqual(avro.parse("kT"), utf("ক্ট"))
     self.assertEqual(avro.parse("kt"), utf("ক্ত"))
     self.assertEqual(avro.parse("kl"), utf("ক্ল"))
     self.assertEqual(avro.parse("ks"), utf("ক্স"))
     self.assertEqual(avro.parse("kh"), utf("খ"))
     self.assertEqual(avro.parse("k"), utf("ক"))
     self.assertEqual(avro.parse("lbh"), utf("ল্ভ"))
     self.assertEqual(avro.parse("ldh"), utf("ল্ধ"))
     self.assertEqual(avro.parse("lkh"), utf("লখ"))
     self.assertEqual(avro.parse("lgh"), utf("লঘ"))
     self.assertEqual(avro.parse("lph"), utf("লফ"))
     self.assertEqual(avro.parse("lk"), utf("ল্ক"))
     self.assertEqual(avro.parse("lg"), utf("ল্গ"))
     self.assertEqual(avro.parse("lT"), utf("ল্ট"))
     self.assertEqual(avro.parse("lD"), utf("ল্ড"))
     self.assertEqual(avro.parse("lp"), utf("ল্প"))
     self.assertEqual(avro.parse("lv"), utf("ল্ভ"))
     self.assertEqual(avro.parse("lm"), utf("ল্ম"))
     self.assertEqual(avro.parse("ll"), utf("ল্ল"))
     self.assertEqual(avro.parse("lb"), utf("ল্ব"))
     self.assertEqual(avro.parse("l"), utf("ল"))
     self.assertEqual(avro.parse("mth"), utf("ম্থ"))
     self.assertEqual(avro.parse("mph"), utf("ম্ফ"))
     self.assertEqual(avro.parse("mbh"), utf("ম্ভ"))
     self.assertEqual(avro.parse("mpl"), utf("মপ্ল"))
     self.assertEqual(avro.parse("mn"), utf("ম্ন"))
     self.assertEqual(avro.parse("mp"), utf("ম্প"))
     self.assertEqual(avro.parse("mv"), utf("ম্ভ"))
     self.assertEqual(avro.parse("mm"), utf("ম্ম"))
     self.assertEqual(avro.parse("ml"), utf("ম্ল"))
     self.assertEqual(avro.parse("mb"), utf("ম্ব"))
     self.assertEqual(avro.parse("mf"), utf("ম্ফ"))
     self.assertEqual(avro.parse("m"), utf("ম"))
     self.assertEqual(avro.parse("0"), utf("০"))
     self.assertEqual(avro.parse("1"), utf("১"))
     self.assertEqual(avro.parse("2"), utf("২"))
     self.assertEqual(avro.parse("3"), utf("৩"))
     self.assertEqual(avro.parse("4"), utf("৪"))
     self.assertEqual(avro.parse("5"), utf("৫"))
     self.assertEqual(avro.parse("6"), utf("৬"))
     self.assertEqual(avro.parse("7"), utf("৭"))
     self.assertEqual(avro.parse("8"), utf("৮"))
     self.assertEqual(avro.parse("9"), utf("৯"))
     self.assertEqual(avro.parse("NgkSh"), utf("ঙ্ক্ষ"))
     self.assertEqual(avro.parse("Ngkkh"), utf("ঙ্ক্ষ"))
     self.assertEqual(avro.parse("NGch"), utf("ঞ্ছ"))
     self.assertEqual(avro.parse("Nggh"), utf("ঙ্ঘ"))
     self.assertEqual(avro.parse("Ngkh"), utf("ঙ্খ"))
     self.assertEqual(avro.parse("NGjh"), utf("ঞ্ঝ"))
     self.assertEqual(avro.parse("ngOU"), utf("ঙ্গৌ"))
     self.assertEqual(avro.parse("ngOI"), utf("ঙ্গৈ"))
     self.assertEqual(avro.parse("Ngkx"), utf("ঙ্ক্ষ"))
     self.assertEqual(avro.parse("NGc"), utf("ঞ্চ"))
     self.assertEqual(avro.parse("nch"), utf("ঞ্ছ"))
     self.assertEqual(avro.parse("njh"), utf("ঞ্ঝ"))
     self.assertEqual(avro.parse("ngh"), utf("ঙ্ঘ"))
     self.assertEqual(avro.parse("Ngk"), utf("ঙ্ক"))
     self.assertEqual(avro.parse("Ngx"), utf("ঙ্ষ"))
     self.assertEqual(avro.parse("Ngg"), utf("ঙ্গ"))
     self.assertEqual(avro.parse("Ngm"), utf("ঙ্ম"))
     self.assertEqual(avro.parse("NGj"), utf("ঞ্জ"))
     self.assertEqual(avro.parse("ndh"), utf("ন্ধ"))
     self.assertEqual(avro.parse("nTh"), utf("ন্ঠ"))
     self.assertEqual(avro.parse("NTh"), utf("ণ্ঠ"))
     self.assertEqual(avro.parse("nth"), utf("ন্থ"))
     self.assertEqual(avro.parse("nkh"), utf("ঙ্খ"))
     self.assertEqual(avro.parse("ngo"), utf("ঙ্গ"))
     self.assertEqual(avro.parse("nga"), utf("ঙ্গা"))
     self.assertEqual(avro.parse("ngi"), utf("ঙ্গি"))
     self.assertEqual(avro.parse("ngI"), utf("ঙ্গী"))
     self.assertEqual(avro.parse("ngu"), utf("ঙ্গু"))
     self.assertEqual(avro.parse("ngU"), utf("ঙ্গূ"))
     self.assertEqual(avro.parse("nge"), utf("ঙ্গে"))
     self.assertEqual(avro.parse("ngO"), utf("ঙ্গো"))
     self.assertEqual(avro.parse("NDh"), utf("ণ্ঢ"))
     self.assertEqual(avro.parse("nsh"), utf("নশ"))
     self.assertEqual(avro.parse("Ngr"), utf("ঙর"))
     self.assertEqual(avro.parse("NGr"), utf("ঞর"))
     self.assertEqual(avro.parse("ngr"), utf("ংর"))
     self.assertEqual(avro.parse("nj"), utf("ঞ্জ"))
     self.assertEqual(avro.parse("Ng"), utf("ঙ"))
     self.assertEqual(avro.parse("NG"), utf("ঞ"))
     self.assertEqual(avro.parse("nk"), utf("ঙ্ক"))
     self.assertEqual(avro.parse("ng"), utf("ং"))
     self.assertEqual(avro.parse("nn"), utf("ন্ন"))
     self.assertEqual(avro.parse("NN"), utf("ণ্ণ"))
     self.assertEqual(avro.parse("Nn"), utf("ণ্ন"))
     self.assertEqual(avro.parse("nm"), utf("ন্ম"))
     self.assertEqual(avro.parse("Nm"), utf("ণ্ম"))
     self.assertEqual(avro.parse("nd"), utf("ন্দ"))
     self.assertEqual(avro.parse("nT"), utf("ন্ট"))
     self.assertEqual(avro.parse("NT"), utf("ণ্ট"))
     self.assertEqual(avro.parse("nD"), utf("ন্ড"))
     self.assertEqual(avro.parse("ND"), utf("ণ্ড"))
     self.assertEqual(avro.parse("nt"), utf("ন্ত"))
     self.assertEqual(avro.parse("ns"), utf("ন্স"))
     self.assertEqual(avro.parse("nc"), utf("ঞ্চ"))
     self.assertEqual(avro.parse("n"), utf("ন"))
     self.assertEqual(avro.parse("N"), utf("ণ"))
     self.assertEqual(avro.parse("OI`"), utf("ৈ"))
     self.assertEqual(avro.parse("OU`"), utf("ৌ"))
     self.assertEqual(avro.parse("O`"), utf("ো"))
     self.assertEqual(avro.parse("OI"), utf("ঐ"))
     self.assertEqual(avro.parse("kOI"), utf("কৈ"))
     self.assertEqual(avro.parse(" OI"), utf(" ঐ"))
     self.assertEqual(avro.parse("(OI"), utf("(ঐ"))
     self.assertEqual(avro.parse(".OI"), utf("।ঐ"))
     self.assertEqual(avro.parse("OU"), utf("ঔ"))
     self.assertEqual(avro.parse("kOU"), utf("কৌ"))
     self.assertEqual(avro.parse(" OU"), utf(" ঔ"))
     self.assertEqual(avro.parse("-OU"), utf("-ঔ"))
     self.assertEqual(avro.parse(",,OU"), utf("্‌ঔ"))
     self.assertEqual(avro.parse("O"), utf("ও"))
     self.assertEqual(avro.parse("pO"), utf("পো"))
     self.assertEqual(avro.parse(" O"), utf(" ও"))
     self.assertEqual(avro.parse("iO"), utf("ইও"))
     self.assertEqual(avro.parse("`O"), utf("ও"))
     self.assertEqual(avro.parse("phl"), utf("ফ্ল"))
     self.assertEqual(avro.parse("pT"), utf("প্ট"))
     self.assertEqual(avro.parse("pt"), utf("প্ত"))
     self.assertEqual(avro.parse("pn"), utf("প্ন"))
     self.assertEqual(avro.parse("pp"), utf("প্প"))
     self.assertEqual(avro.parse("pl"), utf("প্ল"))
     self.assertEqual(avro.parse("ps"), utf("প্স"))
     self.assertEqual(avro.parse("ph"), utf("ফ"))
     self.assertEqual(avro.parse("fl"), utf("ফ্ল"))
     self.assertEqual(avro.parse("f"), utf("ফ"))
     self.assertEqual(avro.parse("p"), utf("প"))
     self.assertEqual(avro.parse("rri`"), utf("ৃ"))
     self.assertEqual(avro.parse("rri"), utf("ঋ"))
     self.assertEqual(avro.parse("krri"), utf("কৃ"))
     self.assertEqual(avro.parse("Irri"), utf("ঈঋ"))
     self.assertEqual(avro.parse("^rri"), utf("ঁঋ"))
     self.assertEqual(avro.parse(":rri"), utf("ঃঋ"))
     self.assertEqual(avro.parse("rZ"), utf("র‍্য"))
     self.assertEqual(avro.parse("krZ"), utf("ক্র্য"))
     self.assertEqual(avro.parse("rrZ"), utf("রর‍্য"))
     self.assertEqual(avro.parse("yrZ"), utf("ইয়র‍্য"))
     self.assertEqual(avro.parse("wrZ"), utf("ওর‍্য"))
     self.assertEqual(avro.parse("xrZ"), utf("এক্সর‍্য"))
     self.assertEqual(avro.parse("irZ"), utf("ইর‍্য"))
     self.assertEqual(avro.parse("-rZ"), utf("-র‍্য"))
     self.assertEqual(avro.parse("rrrZ"), utf("ররর‍্য"))
     self.assertEqual(avro.parse("ry"), utf("র‍্য"))
     self.assertEqual(avro.parse("qry"), utf("ক্র্য"))
     self.assertEqual(avro.parse("rry"), utf("রর‍্য"))
     self.assertEqual(avro.parse("yry"), utf("ইয়র‍্য"))
     self.assertEqual(avro.parse("wry"), utf("ওর‍্য"))
     self.assertEqual(avro.parse("xry"), utf("এক্সর‍্য"))
     self.assertEqual(avro.parse("0ry"), utf("০র‍্য"))
     self.assertEqual(avro.parse("rrrry"), utf("রররর‍্য"))
     self.assertEqual(avro.parse("Rry"), utf("ড়্র্য"))
     self.assertEqual(avro.parse("rr"), utf("রর"))
     self.assertEqual(avro.parse("arr"), utf("আরর"))
     self.assertEqual(avro.parse("arrk"), utf("আর্ক"))
     self.assertEqual(avro.parse("arra"), utf("আররা"))
     self.assertEqual(avro.parse("arr"), utf("আরর"))
     self.assertEqual(avro.parse("arr!"), utf("আরর!"))
     self.assertEqual(avro.parse("krr"), utf("ক্রর"))
     self.assertEqual(avro.parse("krra"), utf("ক্ররা"))
     self.assertEqual(avro.parse("Rg"), utf("ড়্গ"))
     self.assertEqual(avro.parse("Rh"), utf("ঢ়"))
     self.assertEqual(avro.parse("R"), utf("ড়"))
     self.assertEqual(avro.parse("r"), utf("র"))
     self.assertEqual(avro.parse("or"), utf("অর"))
     self.assertEqual(avro.parse("mr"), utf("ম্র"))
     self.assertEqual(avro.parse("1r"), utf("১র"))
     self.assertEqual(avro.parse("+r"), utf("+র"))
     self.assertEqual(avro.parse("rr"), utf("রর"))
     self.assertEqual(avro.parse("yr"), utf("ইয়র"))
     self.assertEqual(avro.parse("wr"), utf("ওর"))
     self.assertEqual(avro.parse("xr"), utf("এক্সর"))
     self.assertEqual(avro.parse("zr"), utf("য্র"))
     self.assertEqual(avro.parse("mri"), utf("ম্রি"))
     self.assertEqual(avro.parse("shch"), utf("শ্ছ"))
     self.assertEqual(avro.parse("ShTh"), utf("ষ্ঠ"))
     self.assertEqual(avro.parse("Shph"), utf("ষ্ফ"))
     self.assertEqual(avro.parse("Sch"), utf("শ্ছ"))
     self.assertEqual(avro.parse("skl"), utf("স্ক্ল"))
     self.assertEqual(avro.parse("skh"), utf("স্খ"))
     self.assertEqual(avro.parse("sth"), utf("স্থ"))
     self.assertEqual(avro.parse("sph"), utf("স্ফ"))
     self.assertEqual(avro.parse("shc"), utf("শ্চ"))
     self.assertEqual(avro.parse("sht"), utf("শ্ত"))
     self.assertEqual(avro.parse("shn"), utf("শ্ন"))
     self.assertEqual(avro.parse("shm"), utf("শ্ম"))
     self.assertEqual(avro.parse("shl"), utf("শ্ল"))
     self.assertEqual(avro.parse("Shk"), utf("ষ্ক"))
     self.assertEqual(avro.parse("ShT"), utf("ষ্ট"))
     self.assertEqual(avro.parse("ShN"), utf("ষ্ণ"))
     self.assertEqual(avro.parse("Shp"), utf("ষ্প"))
     self.assertEqual(avro.parse("Shf"), utf("ষ্ফ"))
     self.assertEqual(avro.parse("Shm"), utf("ষ্ম"))
     self.assertEqual(avro.parse("spl"), utf("স্প্ল"))
     self.assertEqual(avro.parse("sk"), utf("স্ক"))
     self.assertEqual(avro.parse("Sc"), utf("শ্চ"))
     self.assertEqual(avro.parse("sT"), utf("স্ট"))
     self.assertEqual(avro.parse("st"), utf("স্ত"))
     self.assertEqual(avro.parse("sn"), utf("স্ন"))
     self.assertEqual(avro.parse("sp"), utf("স্প"))
     self.assertEqual(avro.parse("sf"), utf("স্ফ"))
     self.assertEqual(avro.parse("sm"), utf("স্ম"))
     self.assertEqual(avro.parse("sl"), utf("স্ল"))
     self.assertEqual(avro.parse("sh"), utf("শ"))
     self.assertEqual(avro.parse("Sc"), utf("শ্চ"))
     self.assertEqual(avro.parse("St"), utf("শ্ত"))
     self.assertEqual(avro.parse("Sn"), utf("শ্ন"))
     self.assertEqual(avro.parse("Sm"), utf("শ্ম"))
     self.assertEqual(avro.parse("Sl"), utf("শ্ল"))
     self.assertEqual(avro.parse("Sh"), utf("ষ"))
     self.assertEqual(avro.parse("s"), utf("স"))
     self.assertEqual(avro.parse("S"), utf("শ"))
     self.assertEqual(avro.parse("oo"), utf("উ"))
     self.assertEqual(avro.parse("OO"), utf("ওও"))
     self.assertEqual(avro.parse("oo`"), utf("ু"))
     self.assertEqual(avro.parse("koo"), utf("কু"))
     self.assertEqual(avro.parse("ooo"), utf("উঅ"))
     self.assertEqual(avro.parse("!oo"), utf("!উ"))
     self.assertEqual(avro.parse("!ooo"), utf("!উঅ"))
     self.assertEqual(avro.parse("aoo"), utf("আউ"))
     self.assertEqual(avro.parse("oop"), utf("উপ"))
     self.assertEqual(avro.parse("ooo`"), utf("উ"))
     self.assertEqual("", avro.parse("o`"))
     self.assertEqual(avro.parse("oZ"), utf("অ্য"))
     self.assertEqual(avro.parse("oY"), utf("অয়"))
     self.assertEqual(avro.parse("o"), utf("অ"))
     self.assertEqual(avro.parse("!o"), utf("!অ"))
     self.assertEqual(avro.parse("^o"), utf("ঁঅ"))
     self.assertEqual(avro.parse("*o"), utf("*অ"))
     self.assertEqual(avro.parse("io"), utf("ইও"))
     self.assertEqual(avro.parse("yo"), utf("ইয়"))
     self.assertEqual(avro.parse("no"), utf("ন"))
     self.assertEqual(avro.parse("tth"), utf("ত্থ"))
     self.assertEqual(avro.parse("t``"), utf("ৎ"))
     self.assertEqual(avro.parse("`t``"), utf("ৎ"))
     self.assertEqual(avro.parse("t``t``"), utf("ৎৎ"))
     self.assertEqual(avro.parse("t```"), utf("ৎ"))
     self.assertEqual(avro.parse("TT"), utf("ট্ট"))
     self.assertEqual(avro.parse("Tm"), utf("ট্ম"))
     self.assertEqual(avro.parse("Th"), utf("ঠ"))
     self.assertEqual(avro.parse("tn"), utf("ত্ন"))
     self.assertEqual(avro.parse("tm"), utf("ত্ম"))
     self.assertEqual(avro.parse("th"), utf("থ"))
     self.assertEqual(avro.parse("tt"), utf("ত্ত"))
     self.assertEqual(avro.parse("T"), utf("ট"))
     self.assertEqual(avro.parse("t"), utf("ত"))
     self.assertEqual(avro.parse("aZ"), utf("অ্যা"))
     self.assertEqual(avro.parse("aaZ"), utf("আঅ্যা"))
     self.assertEqual(avro.parse("AZ"), utf("অ্যা"))
     self.assertEqual(avro.parse("a`"), utf("া"))
     self.assertEqual(avro.parse("a``"), utf("া"))
     self.assertEqual(avro.parse("ka`"), utf("কা"))
     self.assertEqual(avro.parse("A`"), utf("া"))
     self.assertEqual(avro.parse("a"), utf("আ"))
     self.assertEqual(avro.parse("`a"), utf("আ"))
     self.assertEqual(avro.parse("k`a"), utf("কআ"))
     self.assertEqual(avro.parse("ia"), utf("ইয়া"))
     self.assertEqual(avro.parse("aaaa`"), utf("আআআা"))
     self.assertEqual(avro.parse("i`"), utf("ি"))
     self.assertEqual(avro.parse("i"), utf("ই"))
     self.assertEqual(avro.parse("`i"), utf("ই"))
     self.assertEqual(avro.parse("hi"), utf("হি"))
     self.assertEqual(avro.parse("ih"), utf("ইহ"))
     self.assertEqual(avro.parse("i`h"), utf("িহ"))
     self.assertEqual(avro.parse("I`"), utf("ী"))
     self.assertEqual(avro.parse("I"), utf("ঈ"))
     self.assertEqual(avro.parse("cI"), utf("চী"))
     self.assertEqual(avro.parse("Ix"), utf("ঈক্স"))
     self.assertEqual(avro.parse("II"), utf("ঈঈ"))
     self.assertEqual(avro.parse("0I"), utf("০ঈ"))
     self.assertEqual(avro.parse("oI"), utf("অঈ"))
     self.assertEqual(avro.parse("u`"), utf("ু"))
     self.assertEqual(avro.parse("u"), utf("উ"))
     self.assertEqual(avro.parse("ku"), utf("কু"))
     self.assertEqual(avro.parse("uk"), utf("উক"))
     self.assertEqual(avro.parse("uu"), utf("উউ"))
     self.assertEqual(avro.parse("iu"), utf("ইউ"))
     self.assertEqual(avro.parse("&u"), utf("&উ"))
     self.assertEqual(avro.parse("u&"), utf("উ&"))
     self.assertEqual(avro.parse("U`"), utf("ূ"))
     self.assertEqual(avro.parse("U"), utf("ঊ"))
     self.assertEqual(avro.parse("yU"), utf("ইয়ূ"))
     self.assertEqual(avro.parse("Uy"), utf("ঊয়"))
     self.assertEqual(avro.parse("^U"), utf("ঁঊ"))
     self.assertEqual(avro.parse("U^"), utf("ঊঁ"))
     self.assertEqual(avro.parse("EE"), utf("ঈ"))
     self.assertEqual(avro.parse("ee"), utf("ঈ"))
     self.assertEqual(avro.parse("Ee"), utf("ঈ"))
     self.assertEqual(avro.parse("eE"), utf("ঈ"))
     self.assertEqual(avro.parse("ee`"), utf("ী"))
     self.assertEqual(avro.parse("kee"), utf("কী"))
     self.assertEqual(avro.parse("eek"), utf("ঈক"))
     self.assertEqual(avro.parse("0ee"), utf("০ঈ"))
     self.assertEqual(avro.parse("ee8"), utf("ঈ৮"))
     self.assertEqual(avro.parse("(ee)"), utf("(ঈ)"))
     self.assertEqual(avro.parse("e`"), utf("ে"))
     self.assertEqual(avro.parse("e"), utf("এ"))
     self.assertEqual(avro.parse("ke"), utf("কে"))
     self.assertEqual(avro.parse("we"), utf("ওয়ে"))
     self.assertEqual(avro.parse("#e#"), utf("#এ#"))
     self.assertEqual(avro.parse("`e`"), utf("ে"))
     self.assertEqual(avro.parse("z"), utf("য"))
     self.assertEqual(avro.parse("Z"), utf("্য"))
     self.assertEqual(avro.parse("rZ"), utf("র‍্য"))
     self.assertEqual(avro.parse("kZS"), utf("ক্যশ"))
     self.assertEqual(avro.parse("y"), utf("ইয়"))
     self.assertEqual(avro.parse("oy"), utf("অয়"))
     self.assertEqual(avro.parse("ky"), utf("ক্য"))
     self.assertEqual(avro.parse("ya"), utf("ইয়া"))
     self.assertEqual(avro.parse("yaa"), utf("ইয়াআ"))
     self.assertEqual(avro.parse("Y"), utf("য়"))
     self.assertEqual(avro.parse("YY"), utf("য়য়"))
     self.assertEqual(avro.parse("iY"), utf("ইয়"))
     self.assertEqual(avro.parse("kY"), utf("কয়"))
     self.assertEqual(avro.parse("q"), utf("ক"))
     self.assertEqual(avro.parse("Q"), utf("ক"))
     self.assertEqual(avro.parse("w"), utf("ও"))
     self.assertEqual(avro.parse("wa"), utf("ওয়া"))
     self.assertEqual(avro.parse("-wa-"), utf("-ওয়া-"))
     self.assertEqual(avro.parse("woo"), utf("ওয়ু"))
     self.assertEqual(avro.parse("wre"), utf("ওরে"))
     self.assertEqual(avro.parse("kw"), utf("ক্ব"))
     self.assertEqual(avro.parse("x"), utf("এক্স"))
     self.assertEqual(avro.parse("ex"), utf("এক্স"))
     self.assertEqual(avro.parse("bx"), utf("বক্স"))
     self.assertEqual(avro.parse(":`"), utf(":"))
     self.assertEqual(avro.parse(":"), utf("ঃ"))
     self.assertEqual(avro.parse("^`"), utf("^"))
     self.assertEqual(avro.parse("^"), utf("ঁ"))
     self.assertEqual(avro.parse("k^"), utf("কঁ"))
     self.assertEqual(avro.parse("k^i"), utf("কঁই"))
     self.assertEqual(avro.parse("ki^"), utf("কিঁ"))
     self.assertEqual(avro.parse(",,"), utf("্‌"))
     self.assertEqual(avro.parse(",,,"), utf("্‌,"))
     self.assertEqual(avro.parse(",,`,"), utf("্‌,"))
     self.assertEqual(avro.parse("`,,"), utf("্‌"))
     self.assertEqual(avro.parse(",`,"), utf(",,"))
     self.assertEqual(avro.parse("$"), utf("৳"))
     self.assertEqual("", avro.parse("`"))
     self.assertEqual(avro.parse("bdh"), utf("ব্ধ"))
Example #8
0
 def test_patterns_with_rules_svaravarna(self):
     """Test patterns - with rules - svaravarna"""
     # Test some numbers
     self.assertEquals(utf("অ"), avro.parse("o"))
     self.assertEquals(utf("আ"), avro.parse("a"))
     self.assertEquals(utf("ই"), avro.parse("i"))
     self.assertEquals(utf("ঈ"), avro.parse("I"))
     self.assertEquals(utf("উ"), avro.parse("u"))
     self.assertEquals(utf("উ"), avro.parse("oo"))
     self.assertEquals(utf("ঊ"), avro.parse("U"))
     self.assertEquals(utf("এ"), avro.parse("e"))
     self.assertEquals(utf("ঐ"), avro.parse("OI"))
     self.assertEquals(utf("ও"), avro.parse("O"))
     self.assertEquals(utf("ঔ"), avro.parse("OU"))
Example #9
0
 def test_patterns_punctuations(self):
     """Tests patterns - punctuations"""
     # Test some punctuations
     self.assertEquals(utf("।"), avro.parse("."))
     self.assertEquals(utf("।।"), avro.parse(".."))
     self.assertEquals(utf("..."), avro.parse("..."))
Example #10
0
 def test_patterns_numbers(self):
     """Test patterns - numbers"""
     # Test some numbers
     self.assertEquals(utf("০"), avro.parse("0"))
     self.assertEquals(utf("১"), avro.parse("1"))
     self.assertEquals(utf("২"), avro.parse("2"))
     self.assertEquals(utf("৩"), avro.parse("3"))
     self.assertEquals(utf("৪"), avro.parse("4"))
     self.assertEquals(utf("৫"), avro.parse("5"))
     self.assertEquals(utf("৬"), avro.parse("6"))
     self.assertEquals(utf("৭"), avro.parse("7"))
     self.assertEquals(utf("৮"), avro.parse("8"))
     self.assertEquals(utf("৯"), avro.parse("9"))
     self.assertEquals(utf("১১২"), avro.parse("112"))
Example #11
0
 def test_words_with_punctuations(self):
     """Test parsing of words with punctuations"""
     self.assertEquals(utf('আয়রে,'), avro.parse('ayre,'))
     self.assertEquals(utf('ভোলা'), avro.parse('bhOla'))
     self.assertEquals(utf('খেয়াল'), avro.parse('kheyal'))
     self.assertEquals(utf('খোলা'), avro.parse('khOla'))
Example #12
0
 def test_sentences(self):
     """Test parsing of sentences"""
     self.assertEquals(utf('আমি বাংলায় গান গাই'),
                       avro.parse('ami banglay gan gai'))
Example #13
0
 def test_sentences(self):
     """Test parsing of sentences"""
     self.assertEqual(utf('আমি বাংলায় গান গাই'),
                      avro.parse('ami banglay gan gai'))
Example #14
0
def parse(text):
    """Parses input text, matches and replaces using avrodict

    If a valid replacement is found, returns the replaced string. If
    no replacement is found, returns the input text.

    Usage:

    ::
      from pyavrophonetic import avro
      avro.parse("ami banglay gan gai")

    """
    # Sanitize text case to meet phonetic comparison standards
    fixed_text = validate.fix_string_case(utf(text))
    # prepare output list
    output = []
    # cursor end point
    cur_end = 0
    # iterate through input text
    for cur, i in enumerate(fixed_text):
        # Trap characters with unicode encoding errors
        try:
            i.encode('utf-8')
        except UnicodeDecodeError:
            uni_pass = False
        else:
            uni_pass = True
        # Default value for match
        match = {'matched': False}
        # Check cur is greater than or equals cur_end. If cursor is in
        # a position that has alread been processed/replaced, we don't
        # process anything at all
        if not uni_pass:
            cur_end = cur + 1
            output.append(i)
        elif cur >= cur_end and uni_pass:
            # Try looking in non rule patterns with current string portion
            match = match_non_rule_patterns(fixed_text, cur)
            # Check if non rule patterns have matched
            if match["matched"]:
                output.append(match["replaced"])
                cur_end = cur + len(match["found"])
            else:
            # if non rule patterns have not matched, try rule patterns
                match = match_rule_patterns(fixed_text, cur)
                # Check if rule patterns have matched
                if match["matched"]:
                    # Update cur_end as cursor + length of match found
                    cur_end =  cur + len(match["found"])
                    # Process its rules
                    replaced = process_rules(rules = match["rules"],
                                             fixed_text = fixed_text,
                                             cur = cur, cur_end = cur_end)
                    # If any rules match, output replacement from the
                    # rule, else output it's default top-level/default
                    # replacement
                    if replaced is not None:
                        # Rule has matched
                        output.append(replaced)
                    else:
                        # No rules have matched
                        # output common match
                        output.append(match["replaced"])

            # If none matched, append present cursor value
            if not match["matched"]:
                cur_end = cur + 1
                output.append(i)

    # End looping through input text and produce output
    return ''.join(output)
Example #15
0
 def test_words_with_punctuations(self):
     """Test parsing of words with punctuations"""
     self.assertEqual(utf('আয়রে,'), avro.parse('ayre,'))
     self.assertEqual(utf('ভোলা'), avro.parse('bhOla'))
     self.assertEqual(utf('খেয়াল'), avro.parse('kheyal'))
     self.assertEqual(utf('খোলা'), avro.parse('khOla'))
Example #16
0
 def test_other(self):
     self.assertEqual(avro.parse("!"), utf("!"))
Example #17
0
 def test_patterns_numbers(self):
     """Test patterns - numbers"""
     # Test some numbers
     self.assertEqual(utf("০"), avro.parse("0"))
     self.assertEqual(utf("১"), avro.parse("1"))
     self.assertEqual(utf("২"), avro.parse("2"))
     self.assertEqual(utf("৩"), avro.parse("3"))
     self.assertEqual(utf("৪"), avro.parse("4"))
     self.assertEqual(utf("৫"), avro.parse("5"))
     self.assertEqual(utf("৬"), avro.parse("6"))
     self.assertEqual(utf("৭"), avro.parse("7"))
     self.assertEqual(utf("৮"), avro.parse("8"))
     self.assertEqual(utf("৯"), avro.parse("9"))
     self.assertEqual(utf("১১২"), avro.parse("112"))
Example #18
0
 def test_other(self):
     self.assertEqual(avro.parse("!"), utf("!"))
Example #19
0
 def test_basic(self):
     self.assertEqual(avro.parse("bhl"), utf("ভ্ল"))
     self.assertEqual(avro.parse("bj"), utf("ব্জ"))
     self.assertEqual(avro.parse("bd"), utf("ব্দ"))
     self.assertEqual(avro.parse("bb"), utf("ব্ব"))
     self.assertEqual(avro.parse("bl"), utf("ব্ল"))
     self.assertEqual(avro.parse("bh"), utf("ভ"))
     self.assertEqual(avro.parse("vl"), utf("ভ্ল"))
     self.assertEqual(avro.parse("b"), utf("ব"))
     self.assertEqual(avro.parse("v"), utf("ভ"))
     self.assertEqual(avro.parse("cNG"), utf("চ্ঞ"))
     self.assertEqual(avro.parse("cch"), utf("চ্ছ"))
     self.assertEqual(avro.parse("cc"), utf("চ্চ"))
     self.assertEqual(avro.parse("ch"), utf("ছ"))
     self.assertEqual(avro.parse("c"), utf("চ"))
     self.assertEqual(avro.parse("dhn"), utf("ধ্ন"))
     self.assertEqual(avro.parse("dhm"), utf("ধ্ম"))
     self.assertEqual(avro.parse("dgh"), utf("দ্ঘ"))
     self.assertEqual(avro.parse("ddh"), utf("দ্ধ"))
     self.assertEqual(avro.parse("dbh"), utf("দ্ভ"))
     self.assertEqual(avro.parse("dv"), utf("দ্ভ"))
     self.assertEqual(avro.parse("dm"), utf("দ্ম"))
     self.assertEqual(avro.parse("DD"), utf("ড্ড"))
     self.assertEqual(avro.parse("Dh"), utf("ঢ"))
     self.assertEqual(avro.parse("dh"), utf("ধ"))
     self.assertEqual(avro.parse("dg"), utf("দ্গ"))
     self.assertEqual(avro.parse("dd"), utf("দ্দ"))
     self.assertEqual(avro.parse("D"), utf("ড"))
     self.assertEqual(avro.parse("d"), utf("দ"))
     self.assertEqual(avro.parse("..."), utf("..."))
     self.assertEqual(avro.parse(".`"), utf("."))
     self.assertEqual(avro.parse(".."), utf("।।"))
     self.assertEqual(avro.parse("."), utf("।"))
     self.assertEqual(avro.parse("ghn"), utf("ঘ্ন"))
     self.assertEqual(avro.parse("Ghn"), utf("ঘ্ন"))
     self.assertEqual(avro.parse("gdh"), utf("গ্ধ"))
     self.assertEqual(avro.parse("gN"), utf("গ্ণ"))
     self.assertEqual(avro.parse("GN"), utf("গ্ণ"))
     self.assertEqual(avro.parse("gn"), utf("গ্ন"))
     self.assertEqual(avro.parse("gm"), utf("গ্ম"))
     self.assertEqual(avro.parse("Gm"), utf("গ্ম"))
     self.assertEqual(avro.parse("gl"), utf("গ্ল"))
     self.assertEqual(avro.parse("Gl"), utf("গ্ল"))
     self.assertEqual(avro.parse("gg"), utf("জ্ঞ"))
     self.assertEqual(avro.parse("GG"), utf("জ্ঞ"))
     self.assertEqual(avro.parse("Gg"), utf("জ্ঞ"))
     self.assertEqual(avro.parse("gG"), utf("জ্ঞ"))
     self.assertEqual(avro.parse("gh"), utf("ঘ"))
     self.assertEqual(avro.parse("Gh"), utf("ঘ"))
     self.assertEqual(avro.parse("g"), utf("গ"))
     self.assertEqual(avro.parse("hN"), utf("হ্ণ"))
     self.assertEqual(avro.parse("hn"), utf("হ্ন"))
     self.assertEqual(avro.parse("hm"), utf("হ্ম"))
     self.assertEqual(avro.parse("hl"), utf("হ্ল"))
     self.assertEqual(avro.parse("h"), utf("হ"))
     self.assertEqual(avro.parse("jjh"), utf("জ্ঝ"))
     self.assertEqual(avro.parse("jNG"), utf("জ্ঞ"))
     self.assertEqual(avro.parse("jh"), utf("ঝ"))
     self.assertEqual(avro.parse("jj"), utf("জ্জ"))
     self.assertEqual(avro.parse("j"), utf("জ"))
     self.assertEqual(avro.parse("J"), utf("জ"))
     self.assertEqual(avro.parse("kkhN"), utf("ক্ষ্ণ"))
     self.assertEqual(avro.parse("kShN"), utf("ক্ষ্ণ"))
     self.assertEqual(avro.parse("kkhm"), utf("ক্ষ্ম"))
     self.assertEqual(avro.parse("kShm"), utf("ক্ষ্ম"))
     self.assertEqual(avro.parse("kxN"), utf("ক্ষ্ণ"))
     self.assertEqual(avro.parse("kxm"), utf("ক্ষ্ম"))
     self.assertEqual(avro.parse("kkh"), utf("ক্ষ"))
     self.assertEqual(avro.parse("kSh"), utf("ক্ষ"))
     self.assertEqual(avro.parse("ksh"), utf("কশ"))
     self.assertEqual(avro.parse("kx"), utf("ক্ষ"))
     self.assertEqual(avro.parse("kk"), utf("ক্ক"))
     self.assertEqual(avro.parse("kT"), utf("ক্ট"))
     self.assertEqual(avro.parse("kt"), utf("ক্ত"))
     self.assertEqual(avro.parse("kl"), utf("ক্ল"))
     self.assertEqual(avro.parse("ks"), utf("ক্স"))
     self.assertEqual(avro.parse("kh"), utf("খ"))
     self.assertEqual(avro.parse("k"), utf("ক"))
     self.assertEqual(avro.parse("lbh"), utf("ল্ভ"))
     self.assertEqual(avro.parse("ldh"), utf("ল্ধ"))
     self.assertEqual(avro.parse("lkh"), utf("লখ"))
     self.assertEqual(avro.parse("lgh"), utf("লঘ"))
     self.assertEqual(avro.parse("lph"), utf("লফ"))
     self.assertEqual(avro.parse("lk"), utf("ল্ক"))
     self.assertEqual(avro.parse("lg"), utf("ল্গ"))
     self.assertEqual(avro.parse("lT"), utf("ল্ট"))
     self.assertEqual(avro.parse("lD"), utf("ল্ড"))
     self.assertEqual(avro.parse("lp"), utf("ল্প"))
     self.assertEqual(avro.parse("lv"), utf("ল্ভ"))
     self.assertEqual(avro.parse("lm"), utf("ল্ম"))
     self.assertEqual(avro.parse("ll"), utf("ল্ল"))
     self.assertEqual(avro.parse("lb"), utf("ল্ব"))
     self.assertEqual(avro.parse("l"), utf("ল"))
     self.assertEqual(avro.parse("mth"), utf("ম্থ"))
     self.assertEqual(avro.parse("mph"), utf("ম্ফ"))
     self.assertEqual(avro.parse("mbh"), utf("ম্ভ"))
     self.assertEqual(avro.parse("mpl"), utf("মপ্ল"))
     self.assertEqual(avro.parse("mn"), utf("ম্ন"))
     self.assertEqual(avro.parse("mp"), utf("ম্প"))
     self.assertEqual(avro.parse("mv"), utf("ম্ভ"))
     self.assertEqual(avro.parse("mm"), utf("ম্ম"))
     self.assertEqual(avro.parse("ml"), utf("ম্ল"))
     self.assertEqual(avro.parse("mb"), utf("ম্ব"))
     self.assertEqual(avro.parse("mf"), utf("ম্ফ"))
     self.assertEqual(avro.parse("m"), utf("ম"))
     self.assertEqual(avro.parse("0"), utf("০"))
     self.assertEqual(avro.parse("1"), utf("১"))
     self.assertEqual(avro.parse("2"), utf("২"))
     self.assertEqual(avro.parse("3"), utf("৩"))
     self.assertEqual(avro.parse("4"), utf("৪"))
     self.assertEqual(avro.parse("5"), utf("৫"))
     self.assertEqual(avro.parse("6"), utf("৬"))
     self.assertEqual(avro.parse("7"), utf("৭"))
     self.assertEqual(avro.parse("8"), utf("৮"))
     self.assertEqual(avro.parse("9"), utf("৯"))
     self.assertEqual(avro.parse("NgkSh"), utf("ঙ্ক্ষ"))
     self.assertEqual(avro.parse("Ngkkh"), utf("ঙ্ক্ষ"))
     self.assertEqual(avro.parse("NGch"), utf("ঞ্ছ"))
     self.assertEqual(avro.parse("Nggh"), utf("ঙ্ঘ"))
     self.assertEqual(avro.parse("Ngkh"), utf("ঙ্খ"))
     self.assertEqual(avro.parse("NGjh"), utf("ঞ্ঝ"))
     self.assertEqual(avro.parse("ngOU"), utf("ঙ্গৌ"))
     self.assertEqual(avro.parse("ngOI"), utf("ঙ্গৈ"))
     self.assertEqual(avro.parse("Ngkx"), utf("ঙ্ক্ষ"))
     self.assertEqual(avro.parse("NGc"), utf("ঞ্চ"))
     self.assertEqual(avro.parse("nch"), utf("ঞ্ছ"))
     self.assertEqual(avro.parse("njh"), utf("ঞ্ঝ"))
     self.assertEqual(avro.parse("ngh"), utf("ঙ্ঘ"))
     self.assertEqual(avro.parse("Ngk"), utf("ঙ্ক"))
     self.assertEqual(avro.parse("Ngx"), utf("ঙ্ষ"))
     self.assertEqual(avro.parse("Ngg"), utf("ঙ্গ"))
     self.assertEqual(avro.parse("Ngm"), utf("ঙ্ম"))
     self.assertEqual(avro.parse("NGj"), utf("ঞ্জ"))
     self.assertEqual(avro.parse("ndh"), utf("ন্ধ"))
     self.assertEqual(avro.parse("nTh"), utf("ন্ঠ"))
     self.assertEqual(avro.parse("NTh"), utf("ণ্ঠ"))
     self.assertEqual(avro.parse("nth"), utf("ন্থ"))
     self.assertEqual(avro.parse("nkh"), utf("ঙ্খ"))
     self.assertEqual(avro.parse("ngo"), utf("ঙ্গ"))
     self.assertEqual(avro.parse("nga"), utf("ঙ্গা"))
     self.assertEqual(avro.parse("ngi"), utf("ঙ্গি"))
     self.assertEqual(avro.parse("ngI"), utf("ঙ্গী"))
     self.assertEqual(avro.parse("ngu"), utf("ঙ্গু"))
     self.assertEqual(avro.parse("ngU"), utf("ঙ্গূ"))
     self.assertEqual(avro.parse("nge"), utf("ঙ্গে"))
     self.assertEqual(avro.parse("ngO"), utf("ঙ্গো"))
     self.assertEqual(avro.parse("NDh"), utf("ণ্ঢ"))
     self.assertEqual(avro.parse("nsh"), utf("নশ"))
     self.assertEqual(avro.parse("Ngr"), utf("ঙর"))
     self.assertEqual(avro.parse("NGr"), utf("ঞর"))
     self.assertEqual(avro.parse("ngr"), utf("ংর"))
     self.assertEqual(avro.parse("nj"), utf("ঞ্জ"))
     self.assertEqual(avro.parse("Ng"), utf("ঙ"))
     self.assertEqual(avro.parse("NG"), utf("ঞ"))
     self.assertEqual(avro.parse("nk"), utf("ঙ্ক"))
     self.assertEqual(avro.parse("ng"), utf("ং"))
     self.assertEqual(avro.parse("nn"), utf("ন্ন"))
     self.assertEqual(avro.parse("NN"), utf("ণ্ণ"))
     self.assertEqual(avro.parse("Nn"), utf("ণ্ন"))
     self.assertEqual(avro.parse("nm"), utf("ন্ম"))
     self.assertEqual(avro.parse("Nm"), utf("ণ্ম"))
     self.assertEqual(avro.parse("nd"), utf("ন্দ"))
     self.assertEqual(avro.parse("nT"), utf("ন্ট"))
     self.assertEqual(avro.parse("NT"), utf("ণ্ট"))
     self.assertEqual(avro.parse("nD"), utf("ন্ড"))
     self.assertEqual(avro.parse("ND"), utf("ণ্ড"))
     self.assertEqual(avro.parse("nt"), utf("ন্ত"))
     self.assertEqual(avro.parse("ns"), utf("ন্স"))
     self.assertEqual(avro.parse("nc"), utf("ঞ্চ"))
     self.assertEqual(avro.parse("n"), utf("ন"))
     self.assertEqual(avro.parse("N"), utf("ণ"))
     self.assertEqual(avro.parse("OI`"), utf("ৈ"))
     self.assertEqual(avro.parse("OU`"), utf("ৌ"))
     self.assertEqual(avro.parse("O`"), utf("ো"))
     self.assertEqual(avro.parse("OI"), utf("ঐ"))
     self.assertEqual(avro.parse("kOI"), utf("কৈ"))
     self.assertEqual(avro.parse(" OI"), utf(" ঐ"))
     self.assertEqual(avro.parse("(OI"), utf("(ঐ"))
     self.assertEqual(avro.parse(".OI"), utf("।ঐ"))
     self.assertEqual(avro.parse("OU"), utf("ঔ"))
     self.assertEqual(avro.parse("kOU"), utf("কৌ"))
     self.assertEqual(avro.parse(" OU"), utf(" ঔ"))
     self.assertEqual(avro.parse("-OU"), utf("-ঔ"))
     self.assertEqual(avro.parse(",,OU"), utf("্‌ঔ"))
     self.assertEqual(avro.parse("O"), utf("ও"))
     self.assertEqual(avro.parse("pO"), utf("পো"))
     self.assertEqual(avro.parse(" O"), utf(" ও"))
     self.assertEqual(avro.parse("iO"), utf("ইও"))
     self.assertEqual(avro.parse("`O"), utf("ও"))
     self.assertEqual(avro.parse("phl"), utf("ফ্ল"))
     self.assertEqual(avro.parse("pT"), utf("প্ট"))
     self.assertEqual(avro.parse("pt"), utf("প্ত"))
     self.assertEqual(avro.parse("pn"), utf("প্ন"))
     self.assertEqual(avro.parse("pp"), utf("প্প"))
     self.assertEqual(avro.parse("pl"), utf("প্ল"))
     self.assertEqual(avro.parse("ps"), utf("প্স"))
     self.assertEqual(avro.parse("ph"), utf("ফ"))
     self.assertEqual(avro.parse("fl"), utf("ফ্ল"))
     self.assertEqual(avro.parse("f"), utf("ফ"))
     self.assertEqual(avro.parse("p"), utf("প"))
     self.assertEqual(avro.parse("rri`"), utf("ৃ"))
     self.assertEqual(avro.parse("rri"), utf("ঋ"))
     self.assertEqual(avro.parse("krri"), utf("কৃ"))
     self.assertEqual(avro.parse("Irri"), utf("ঈঋ"))
     self.assertEqual(avro.parse("^rri"), utf("ঁঋ"))
     self.assertEqual(avro.parse(":rri"), utf("ঃঋ"))
     self.assertEqual(avro.parse("rZ"), utf("র‍্য"))
     self.assertEqual(avro.parse("krZ"), utf("ক্র্য"))
     self.assertEqual(avro.parse("rrZ"), utf("রর‍্য"))
     self.assertEqual(avro.parse("yrZ"), utf("ইয়র‍্য"))
     self.assertEqual(avro.parse("wrZ"), utf("ওর‍্য"))
     self.assertEqual(avro.parse("xrZ"), utf("এক্সর‍্য"))
     self.assertEqual(avro.parse("irZ"), utf("ইর‍্য"))
     self.assertEqual(avro.parse("-rZ"), utf("-র‍্য"))
     self.assertEqual(avro.parse("rrrZ"), utf("ররর‍্য"))
     self.assertEqual(avro.parse("ry"), utf("র‍্য"))
     self.assertEqual(avro.parse("qry"), utf("ক্র্য"))
     self.assertEqual(avro.parse("rry"), utf("রর‍্য"))
     self.assertEqual(avro.parse("yry"), utf("ইয়র‍্য"))
     self.assertEqual(avro.parse("wry"), utf("ওর‍্য"))
     self.assertEqual(avro.parse("xry"), utf("এক্সর‍্য"))
     self.assertEqual(avro.parse("0ry"), utf("০র‍্য"))
     self.assertEqual(avro.parse("rrrry"), utf("রররর‍্য"))
     self.assertEqual(avro.parse("Rry"), utf("ড়্র্য"))
     self.assertEqual(avro.parse("rr"), utf("রর"))
     self.assertEqual(avro.parse("arr"), utf("আরর"))
     self.assertEqual(avro.parse("arrk"), utf("আর্ক"))
     self.assertEqual(avro.parse("arra"), utf("আররা"))
     self.assertEqual(avro.parse("arr"), utf("আরর"))
     self.assertEqual(avro.parse("arr!"), utf("আরর!"))
     self.assertEqual(avro.parse("krr"), utf("ক্রর"))
     self.assertEqual(avro.parse("krra"), utf("ক্ররা"))
     self.assertEqual(avro.parse("Rg"), utf("ড়্গ"))
     self.assertEqual(avro.parse("Rh"), utf("ঢ়"))
     self.assertEqual(avro.parse("R"), utf("ড়"))
     self.assertEqual(avro.parse("r"), utf("র"))
     self.assertEqual(avro.parse("or"), utf("অর"))
     self.assertEqual(avro.parse("mr"), utf("ম্র"))
     self.assertEqual(avro.parse("1r"), utf("১র"))
     self.assertEqual(avro.parse("+r"), utf("+র"))
     self.assertEqual(avro.parse("rr"), utf("রর"))
     self.assertEqual(avro.parse("yr"), utf("ইয়র"))
     self.assertEqual(avro.parse("wr"), utf("ওর"))
     self.assertEqual(avro.parse("xr"), utf("এক্সর"))
     self.assertEqual(avro.parse("zr"), utf("য্র"))
     self.assertEqual(avro.parse("mri"), utf("ম্রি"))
     self.assertEqual(avro.parse("shch"), utf("শ্ছ"))
     self.assertEqual(avro.parse("ShTh"), utf("ষ্ঠ"))
     self.assertEqual(avro.parse("Shph"), utf("ষ্ফ"))
     self.assertEqual(avro.parse("Sch"), utf("শ্ছ"))
     self.assertEqual(avro.parse("skl"), utf("স্ক্ল"))
     self.assertEqual(avro.parse("skh"), utf("স্খ"))
     self.assertEqual(avro.parse("sth"), utf("স্থ"))
     self.assertEqual(avro.parse("sph"), utf("স্ফ"))
     self.assertEqual(avro.parse("shc"), utf("শ্চ"))
     self.assertEqual(avro.parse("sht"), utf("শ্ত"))
     self.assertEqual(avro.parse("shn"), utf("শ্ন"))
     self.assertEqual(avro.parse("shm"), utf("শ্ম"))
     self.assertEqual(avro.parse("shl"), utf("শ্ল"))
     self.assertEqual(avro.parse("Shk"), utf("ষ্ক"))
     self.assertEqual(avro.parse("ShT"), utf("ষ্ট"))
     self.assertEqual(avro.parse("ShN"), utf("ষ্ণ"))
     self.assertEqual(avro.parse("Shp"), utf("ষ্প"))
     self.assertEqual(avro.parse("Shf"), utf("ষ্ফ"))
     self.assertEqual(avro.parse("Shm"), utf("ষ্ম"))
     self.assertEqual(avro.parse("spl"), utf("স্প্ল"))
     self.assertEqual(avro.parse("sk"), utf("স্ক"))
     self.assertEqual(avro.parse("Sc"), utf("শ্চ"))
     self.assertEqual(avro.parse("sT"), utf("স্ট"))
     self.assertEqual(avro.parse("st"), utf("স্ত"))
     self.assertEqual(avro.parse("sn"), utf("স্ন"))
     self.assertEqual(avro.parse("sp"), utf("স্প"))
     self.assertEqual(avro.parse("sf"), utf("স্ফ"))
     self.assertEqual(avro.parse("sm"), utf("স্ম"))
     self.assertEqual(avro.parse("sl"), utf("স্ল"))
     self.assertEqual(avro.parse("sh"), utf("শ"))
     self.assertEqual(avro.parse("Sc"), utf("শ্চ"))
     self.assertEqual(avro.parse("St"), utf("শ্ত"))
     self.assertEqual(avro.parse("Sn"), utf("শ্ন"))
     self.assertEqual(avro.parse("Sm"), utf("শ্ম"))
     self.assertEqual(avro.parse("Sl"), utf("শ্ল"))
     self.assertEqual(avro.parse("Sh"), utf("ষ"))
     self.assertEqual(avro.parse("s"), utf("স"))
     self.assertEqual(avro.parse("S"), utf("শ"))
     self.assertEqual(avro.parse("oo"), utf("উ"))
     self.assertEqual(avro.parse("OO"), utf("ওও"))
     self.assertEqual(avro.parse("oo`"), utf("ু"))
     self.assertEqual(avro.parse("koo"), utf("কু"))
     self.assertEqual(avro.parse("ooo"), utf("উঅ"))
     self.assertEqual(avro.parse("!oo"), utf("!উ"))
     self.assertEqual(avro.parse("!ooo"), utf("!উঅ"))
     self.assertEqual(avro.parse("aoo"), utf("আউ"))
     self.assertEqual(avro.parse("oop"), utf("উপ"))
     self.assertEqual(avro.parse("ooo`"), utf("উ"))
     self.assertEqual("", avro.parse("o`"))
     self.assertEqual(avro.parse("oZ"), utf("অ্য"))
     self.assertEqual(avro.parse("oY"), utf("অয়"))
     self.assertEqual(avro.parse("o"), utf("অ"))
     self.assertEqual(avro.parse("!o"), utf("!অ"))
     self.assertEqual(avro.parse("^o"), utf("ঁঅ"))
     self.assertEqual(avro.parse("*o"), utf("*অ"))
     self.assertEqual(avro.parse("io"), utf("ইও"))
     self.assertEqual(avro.parse("yo"), utf("ইয়"))
     self.assertEqual(avro.parse("no"), utf("ন"))
     self.assertEqual(avro.parse("tth"), utf("ত্থ"))
     self.assertEqual(avro.parse("t``"), utf("ৎ"))
     self.assertEqual(avro.parse("`t``"), utf("ৎ"))
     self.assertEqual(avro.parse("t``t``"), utf("ৎৎ"))
     self.assertEqual(avro.parse("t```"), utf("ৎ"))
     self.assertEqual(avro.parse("TT"), utf("ট্ট"))
     self.assertEqual(avro.parse("Tm"), utf("ট্ম"))
     self.assertEqual(avro.parse("Th"), utf("ঠ"))
     self.assertEqual(avro.parse("tn"), utf("ত্ন"))
     self.assertEqual(avro.parse("tm"), utf("ত্ম"))
     self.assertEqual(avro.parse("th"), utf("থ"))
     self.assertEqual(avro.parse("tt"), utf("ত্ত"))
     self.assertEqual(avro.parse("T"), utf("ট"))
     self.assertEqual(avro.parse("t"), utf("ত"))
     self.assertEqual(avro.parse("aZ"), utf("অ্যা"))
     self.assertEqual(avro.parse("aaZ"), utf("আঅ্যা"))
     self.assertEqual(avro.parse("AZ"), utf("অ্যা"))
     self.assertEqual(avro.parse("a`"), utf("া"))
     self.assertEqual(avro.parse("a``"), utf("া"))
     self.assertEqual(avro.parse("ka`"), utf("কা"))
     self.assertEqual(avro.parse("A`"), utf("া"))
     self.assertEqual(avro.parse("a"), utf("আ"))
     self.assertEqual(avro.parse("`a"), utf("আ"))
     self.assertEqual(avro.parse("k`a"), utf("কআ"))
     self.assertEqual(avro.parse("ia"), utf("ইয়া"))
     self.assertEqual(avro.parse("aaaa`"), utf("আআআা"))
     self.assertEqual(avro.parse("i`"), utf("ি"))
     self.assertEqual(avro.parse("i"), utf("ই"))
     self.assertEqual(avro.parse("`i"), utf("ই"))
     self.assertEqual(avro.parse("hi"), utf("হি"))
     self.assertEqual(avro.parse("ih"), utf("ইহ"))
     self.assertEqual(avro.parse("i`h"), utf("িহ"))
     self.assertEqual(avro.parse("I`"), utf("ী"))
     self.assertEqual(avro.parse("I"), utf("ঈ"))
     self.assertEqual(avro.parse("cI"), utf("চী"))
     self.assertEqual(avro.parse("Ix"), utf("ঈক্স"))
     self.assertEqual(avro.parse("II"), utf("ঈঈ"))
     self.assertEqual(avro.parse("0I"), utf("০ঈ"))
     self.assertEqual(avro.parse("oI"), utf("অঈ"))
     self.assertEqual(avro.parse("u`"), utf("ু"))
     self.assertEqual(avro.parse("u"), utf("উ"))
     self.assertEqual(avro.parse("ku"), utf("কু"))
     self.assertEqual(avro.parse("uk"), utf("উক"))
     self.assertEqual(avro.parse("uu"), utf("উউ"))
     self.assertEqual(avro.parse("iu"), utf("ইউ"))
     self.assertEqual(avro.parse("&u"), utf("&উ"))
     self.assertEqual(avro.parse("u&"), utf("উ&"))
     self.assertEqual(avro.parse("U`"), utf("ূ"))
     self.assertEqual(avro.parse("U"), utf("ঊ"))
     self.assertEqual(avro.parse("yU"), utf("ইয়ূ"))
     self.assertEqual(avro.parse("Uy"), utf("ঊয়"))
     self.assertEqual(avro.parse("^U"), utf("ঁঊ"))
     self.assertEqual(avro.parse("U^"), utf("ঊঁ"))
     self.assertEqual(avro.parse("EE"), utf("ঈ"))
     self.assertEqual(avro.parse("ee"), utf("ঈ"))
     self.assertEqual(avro.parse("Ee"), utf("ঈ"))
     self.assertEqual(avro.parse("eE"), utf("ঈ"))
     self.assertEqual(avro.parse("ee`"), utf("ী"))
     self.assertEqual(avro.parse("kee"), utf("কী"))
     self.assertEqual(avro.parse("eek"), utf("ঈক"))
     self.assertEqual(avro.parse("0ee"), utf("০ঈ"))
     self.assertEqual(avro.parse("ee8"), utf("ঈ৮"))
     self.assertEqual(avro.parse("(ee)"), utf("(ঈ)"))
     self.assertEqual(avro.parse("e`"), utf("ে"))
     self.assertEqual(avro.parse("e"), utf("এ"))
     self.assertEqual(avro.parse("ke"), utf("কে"))
     self.assertEqual(avro.parse("we"), utf("ওয়ে"))
     self.assertEqual(avro.parse("#e#"), utf("#এ#"))
     self.assertEqual(avro.parse("`e`"), utf("ে"))
     self.assertEqual(avro.parse("z"), utf("য"))
     self.assertEqual(avro.parse("Z"), utf("্য"))
     self.assertEqual(avro.parse("rZ"), utf("র‍্য"))
     self.assertEqual(avro.parse("kZS"), utf("ক্যশ"))
     self.assertEqual(avro.parse("y"), utf("ইয়"))
     self.assertEqual(avro.parse("oy"), utf("অয়"))
     self.assertEqual(avro.parse("ky"), utf("ক্য"))
     self.assertEqual(avro.parse("ya"), utf("ইয়া"))
     self.assertEqual(avro.parse("yaa"), utf("ইয়াআ"))
     self.assertEqual(avro.parse("Y"), utf("য়"))
     self.assertEqual(avro.parse("YY"), utf("য়য়"))
     self.assertEqual(avro.parse("iY"), utf("ইয়"))
     self.assertEqual(avro.parse("kY"), utf("কয়"))
     self.assertEqual(avro.parse("q"), utf("ক"))
     self.assertEqual(avro.parse("Q"), utf("ক"))
     self.assertEqual(avro.parse("w"), utf("ও"))
     self.assertEqual(avro.parse("wa"), utf("ওয়া"))
     self.assertEqual(avro.parse("-wa-"), utf("-ওয়া-"))
     self.assertEqual(avro.parse("woo"), utf("ওয়ু"))
     self.assertEqual(avro.parse("wre"), utf("ওরে"))
     self.assertEqual(avro.parse("kw"), utf("ক্ব"))
     self.assertEqual(avro.parse("x"), utf("এক্স"))
     self.assertEqual(avro.parse("ex"), utf("এক্স"))
     self.assertEqual(avro.parse("bx"), utf("বক্স"))
     self.assertEqual(avro.parse(":`"), utf(":"))
     self.assertEqual(avro.parse(":"), utf("ঃ"))
     self.assertEqual(avro.parse("^`"), utf("^"))
     self.assertEqual(avro.parse("^"), utf("ঁ"))
     self.assertEqual(avro.parse("k^"), utf("কঁ"))
     self.assertEqual(avro.parse("k^i"), utf("কঁই"))
     self.assertEqual(avro.parse("ki^"), utf("কিঁ"))
     self.assertEqual(avro.parse(",,"), utf("্‌"))
     self.assertEqual(avro.parse(",,,"), utf("্‌,"))
     self.assertEqual(avro.parse(",,`,"), utf("্‌,"))
     self.assertEqual(avro.parse("`,,"), utf("্‌"))
     self.assertEqual(avro.parse(",`,"), utf(",,"))
     self.assertEqual(avro.parse("$"), utf("৳"))
     self.assertEqual("", avro.parse("`"))
     self.assertEqual(avro.parse("bdh"), utf("ব্ধ"))
Example #20
0
 def test_patterns_punctuations(self):
     """Tests patterns - punctuations"""
     # Test some punctuations
     self.assertEqual(utf("।"), avro.parse("."))
     self.assertEqual(utf("।।"), avro.parse(".."))
     self.assertEqual(utf("..."), avro.parse("..."))
Example #21
0
 def test_patterns_with_rules_svaravarna(self):
     """Test patterns - with rules - svaravarna"""
     # Test some numbers
     self.assertEqual(utf("অ"), avro.parse("o"))
     self.assertEqual(utf("আ"), avro.parse("a"))
     self.assertEqual(utf("ই"), avro.parse("i"))
     self.assertEqual(utf("ঈ"), avro.parse("I"))
     self.assertEqual(utf("উ"), avro.parse("u"))
     self.assertEqual(utf("উ"), avro.parse("oo"))
     self.assertEqual(utf("ঊ"), avro.parse("U"))
     self.assertEqual(utf("এ"), avro.parse("e"))
     self.assertEqual(utf("ঐ"), avro.parse("OI"))
     self.assertEqual(utf("ও"), avro.parse("O"))
     self.assertEqual(utf("ঔ"), avro.parse("OU"))