def parse_ibu(ibu_string): ''' parse ibu to float ibu might be a float (probably int) or it might be '-' or ??? ''' return (parse_number(ibu_string))
def parse_abv(abv_string): ''' parse abv to float abv might be a float with the '%' symbol or it might be '-' or ??? ''' return (parse_number(abv_string, rstrip='%'))
def _test_files(path, language): for filename in get_test_files(path, f'{language}_'): with open(filename, "r") as csv_file: csv_reader = csv.DictReader(csv_file) for row in csv_reader: try: assert parse_number(row['text'], language) == int(row['number']) except AssertionError as e: raise AssertionError(F"Failed execution of {row['text']}") from e
def test_zero(self): self.assertEqual(parse_number('一百零一'), 101) self.assertEqual(parse_number('三百零五'), 305) self.assertEqual(parse_number('一千零三十五'), 1035) self.assertEqual(parse_number('一千零六'), 1006) self.assertEqual(parse_number('三十萬零二百五十'), 300250) self.assertEqual(parse_number('八百萬零三百零一'), 8000301)
def normalize_human_input( *, human_input: str, verbose: bool, debug: bool, ): if verbose or debug: ic(human_input) human_input = normalize_whitespace( string=human_input, verbose=verbose, debug=debug, ) if verbose or debug: ic(human_input) words = [] for word in human_input.split(' '): #ic(word) converted_word = parse_number(word) #ic(converted_word) if converted_word: words.append(str(converted_word)) # numbers [/d] come back as ints else: words.append(word) #ic(words) human_input = ' '.join(words) #ic(human_input) if verbose or debug: ic(human_input) return human_input
def test_parse_number_digits(expected, test_input): assert parse_number(test_input, LANG) == expected
def test_parse_number(self, expected, test_input): assert parse_number(test_input) == expected
def test_parse_number(expected, test_input, lang): assert parse_number(test_input, language=lang) == expected
def test_kanji_only(self): self.assertEqual(parse_number('百八十一'), 181) self.assertEqual(parse_number('二百八十一'), 281) self.assertEqual(parse_number('千百八十一'), 1181) self.assertEqual(parse_number('二千百八十一'), 2181) self.assertEqual(parse_number('一万二千百八十一'), 12181) self.assertEqual(parse_number('十万二千百八十一'), 102181) self.assertEqual(parse_number('十一万二千百八十一'), 112181) self.assertEqual(parse_number('百十一万二千百八十一'), 1112181) self.assertEqual(parse_number('五百十一万二千百八十一'), 5112181) self.assertEqual(parse_number('千五百十一万二千百八十一'), 15112181) self.assertEqual(parse_number('四千五百十一万二千百八十一'), 45112181) self.assertEqual(parse_number('一億千五百十一万二千百八十一'), 115112181) self.assertEqual(parse_number('五十億十一'), 5000000011) self.assertEqual(parse_number('三百二十一億千五百十一万二千百八十一'), 32115112181) self.assertEqual(parse_number('千億十一'), 100000000011) self.assertEqual(parse_number('六千三百二十一億千五百十一万二千百八十一'), 632115112181)
def test_separator(self): self.assertEqual(parse_number('66,728千株'), 66728000) self.assertEqual(parse_number('12 566 728千株'), 12566728000)
def test_numeric_mix(self): self.assertEqual(parse_number('1億1511万2181'), 115112181) self.assertEqual(parse_number('1000億11'), 100000000011) self.assertEqual(parse_number('6千3百2十1億1511万2千百8十1'), 632115112181)
def test_decimal_mix(self): self.assertEqual(parse_number('0.6千株'), 600) self.assertEqual(parse_number('8千株'), 8000) self.assertEqual(parse_number('7.28千株'), 7280) self.assertEqual(parse_number('728千3百株'), 728300) self.assertAlmostEqual(parse_number('66,728.123千株'), 66728123.0)
def test_parse_number_lang_auto(expected, test_input): assert parse_number(test_input) == expected