def test_friendly_cardinals(): inputs = { 0: "nul", 1: "een", 19: "negentien", 20: "twintig", 50: "vijftig", 100: "honderd", 101: "101", 300: "driehonderd", 301: "301", 900: "negenhonderd", 6000: "zesduizend", 10000: "tienduizend", 10010: "10010", 11000: "elfduizend", 11011: "11011", 12000: "12 duizend", 12001: "12001", 13000: "13 duizend", 100000: "100 duizend", 10000000: "10 miljoen", # Negative numbers -12: "min twaalf", -312: "-312", } for n, w in inputs.items(): assert cardinal(n) == w
def check_word(first_word): """ Check the first word of the line on multiple aspects and change it if necessary """ if "'" in first_word and not first_word == "'": first_word = first_word.lstrip("'") if "£" in first_word: first_word = first_word.replace("£", "f") if first_word != ",": first_word = first_word.strip(",") if first_word.startswith("mc"): first_word = first_word.replace("mc", "mac") if "ij" in first_word or "IJ" in first_word: first_word = first_word.replace("ij", "y") first_word = first_word.replace("IJ", "Y") if "-" in first_word and not first_word == "-": first_word = first_word.replace("-", "") if first_word.isdigit(): first_word = cardinal(int(first_word), friendly=False) if first_word.startswith("0"): first_word = first_word.replace("0", "O") if first_word.startswith("1"): first_word = first_word.replace("1", "I") first_word = first_word.strip(".") return first_word
def test_cardinal(): inputs = { # Normal numbers 0: "nul", 1: "een", 11: "elf", 19: "negentien", 20: "twintig", 23: "eenentwintig", 23: "drieëntwintig", 100: "honderd", 200: "tweehonderd", 999: "negenhonderdnegenennegentig", 1000: "duizend", 2000: "tweeduizend", 6000: "zesduizend", 12001: "twaalfduizend een", 99900: "negenennegentigduizend negenhonderd", 99999: "negenennegentigduizend negenhonderdnegenennegentig", 220000: "tweehonderdtwintigduizend", 1000000: "een miljoen", 1000012: "een miljoen twaalf", 2000000: "twee miljoen", 300000000: "driehonderd miljoen", 300012013: "driehonderd miljoen twaalfduizend dertien", 1000000000: "een miljard", 12345678901: "twaalf miljard driehonderdvijfenveertig miljoen " "zeshonderdachtenzeventigduizend negenhonderdeen", 1000000000000: "een biljoen", 13000000500003: "dertien biljoen vijfhonderdduizend drie", # From http://woordenlijst.org/leidraad/6/9/ 2: "twee", 20: "twintig", 22: "tweeëntwintig", 200: "tweehonderd", 222: "tweehonderdtweeëntwintig", 2200: "tweeëntwintighonderd", 2220: "tweeduizend tweehonderdtwintig", 2220222: "twee miljoen tweehonderdtwintigduizend " "tweehonderdtweeëntwintig", # From https://onzetaal.nl/taaladvies/advies/getallen-uitschrijven 108: "honderdacht", 678: "zeshonderdachtenzeventig", 2013: "tweeduizend dertien", 2577: "tweeduizend vijfhonderdzevenenzeventig", 17053980: "zeventien miljoen drieënvijftigduizend negenhonderdtachtig", # Negative numbers -1: "min een", -10: "min tien", } for n, w in inputs.items(): assert cardinal(n, friendly=False) == w
elements_line = line.split() length_line = len(elements_line) first_word = line.split()[0] # Check the first word of the line on multiple aspects and change it if necessary if "'" in first_word and not first_word == "'": first_word = first_word.lstrip("'") if first_word != ",": first_word = first_word.strip(",") if "ij" in first_word or "IJ" in first_word: first_word = first_word.replace("ij", "y") first_word = first_word.replace("IJ", "Y") if not first_word.startswith("-"): first_word = first_word.replace("-", "") if first_word.isdigit(): first_word = cardinal(int(first_word), friendly=False) if first_word.startswith("0") and current_letter == "O": first_word = first_word.replace("0", "O") if first_word.startswith("1") and current_letter == "I": first_word = first_word.replace("1", "I") if first_word == current_letter + ".": first_word = first_word.strip(".") # Check if a new bibliographical entry should be started or not check_result = check_line(length_line, first_word, previous_first_word, current_letter, previous_letter, string_item) # Either create a new entry or concatenate with the current one if check_result == True: previous_first_word = first_word items.append(string_item)
def test_cardinal(number, expected): actual = cardinal(number, friendly=False) assert actual == expected
def test_friendly_cardinals(number, expected): actual = cardinal(number) assert actual == expected
ssssw_clean) if p != "" ] if len( num_list ) > 1: # if there are leading zeroes assert len(num_list) == 2 for zero in num_list[0]: kaldi_lex_used[ "nul"] = kaldi_lex[ "nul"] new_words.append("nul") sw_l.append("nul") si_l.append(str(sw_i)) sw_i += 1 number = cardinal( int(num_list[1]), friendly=False) else: # if there are no leading zeroes number = cardinal( int(ssssw_clean), friendly=False) number = re.sub( r'een', 'één', number) numbers = number.split( " " ) # sometimes cardinal() splits up numbers: 1001 --> duizend een for n_counter, n in enumerate( numbers, 1): sw_i += 1 if n_counter > 1 else 0 if n in kaldi_lex: kaldi_lex_used[