def test_parsing_text(self): ret = to_arabic( 'その形は3-50個ほどの銀河が集まった銀河群と呼ばれる小規模な集団に始まり、フラクタル状の階層的段階の集団を構成する。200万光年程度の狭い領域に纏まった銀河群はコンパクト銀河群と呼ばれる' ) eq_(len(ret), 3) compare_result(ret[0], R(number=3, text='3', index=4)) compare_result(ret[1], R(number=50, text='50', index=6)) compare_result(ret[2], R(number=2000000, text='200万', index=59))
def test_kanji_only_1000(self): ret = to_arabic('千百八十一') eq_(len(ret), 1) compare_result(ret[0], R(number=1181, text='千百八十一', index=0)) ret = to_arabic('二千百八十一') eq_(len(ret), 1) compare_result(ret[0], R(number=2181, text='二千百八十一', index=0))
def test_kanji_only_10(self): ret = to_arabic('十一') eq_(len(ret), 1) compare_result(ret[0], R(number=11, text='十一', index=0)) ret = to_arabic('八十一') eq_(len(ret), 1) compare_result(ret[0], R(number=81, text='八十一', index=0))
def test_kanji_only_100000000000(self): ret = to_arabic('千億十一') eq_(len(ret), 1) compare_result(ret[0], R(number=100000000011, text='千億十一', index=0)) ret = to_arabic('六千三百二十一億千五百十一万二千百八十一') eq_(len(ret), 1) compare_result(ret[0], R(number=632115112181, text='六千三百二十一億千五百十一万二千百八十一', index=0))
def test_numeric_with_comma(self): ret = to_arabic('10,000円') eq_(len(ret), 1) compare_result(ret[0], R(number=10000, text='10,000', index=0)) ret = to_arabic('abc, 10,000,000,円, edf, 50,000,') eq_(len(ret), 2) compare_result(ret[0], R(number=10000000, text='10,000,000', index=5)) compare_result(ret[1], R(number=50000, text='50,000', index=24))
def test_numeric_only(self): ret = to_arabic('100000') eq_(len(ret), 1) compare_result(ret[0], R(number=100000, text='100000', index=0)) ret = to_arabic('1を聞いて10を知る。') eq_(len(ret), 2) compare_result(ret[0], R(number=1, text='1', index=0)) compare_result(ret[1], R(number=10, text='10', index=5))
def test_multibyte_numeric_only(self): ret = to_arabic('1234567890') eq_(len(ret), 1) compare_result(ret[0], R(number=1234567890, text='1234567890', index=0)) ret = to_arabic('1を聞いて10を知る。') eq_(len(ret), 2) compare_result(ret[0], R(number=1, text='1', index=0)) compare_result(ret[1], R(number=10, text='10', index=5))
def test_kanji_with_comma(self): ret = to_arabic('二,三,四') eq_(len(ret), 3) compare_result(ret[0], R(number=2, text='二', index=0)) compare_result(ret[1], R(number=3, text='三', index=2)) compare_result(ret[2], R(number=4, text='四', index=4)) ret = to_arabic('テスト、二三四,十一') eq_(len(ret), 2) compare_result(ret[0], R(number=234, text='二三四', index=4)) compare_result(ret[1], R(number=11, text='十一', index=8))
def test_mixed(self): ret = to_arabic('1万') eq_(len(ret), 1) compare_result(ret[0], R(number=10000, text='1万', index=0)) ret = to_arabic('5百万') eq_(len(ret), 1) compare_result(ret[0], R(number=5000000, text='5百万', index=0)) ret = to_arabic('1億2500万光年') eq_(len(ret), 1) compare_result(ret[0], R(number=125000000, text='1億2500万', index=0))
def test_single_kanji_word(self): ret = to_arabic('一') eq_(len(ret), 1) compare_result(ret[0], R(number=1, text='一', index=0)) ret = to_arabic('十') eq_(len(ret), 1) compare_result(ret[0], R(number=10, text='十', index=0)) ret = to_arabic('百') eq_(len(ret), 1) compare_result(ret[0], R(number=100, text='百', index=0)) ret = to_arabic('一を聞いて十を知る。') eq_(len(ret), 2) compare_result(ret[0], R(number=1, text='一', index=0)) compare_result(ret[1], R(number=10, text='十', index=5))
def test_numeric_chain2(self): ret = to_arabic('505号室') eq_(len(ret), 1) compare_result(ret[0], R(number=505, text='505', index=0))