Exemple #1
0
def test_friendly_cardinals():
    inputs = {
        0: "nul",
        1: "een",
        19: "negentien",
        20: "twintig",
        50: "vijftig",
        100: "honderd",
        101: "101",
        300: "driehonderd",
        301: "301",
        900: "negenhonderd",
        6000: "zesduizend",
        10000: "tienduizend",
        10010: "10010",
        11000: "elfduizend",
        11011: "11011",
        12000: "12 duizend",
        12001: "12001",
        13000: "13 duizend",
        100000: "100 duizend",
        10000000: "10 miljoen",

        # Negative numbers
        -12: "min twaalf",
        -312: "-312",
    }

    for n, w in inputs.items():
        assert cardinal(n) == w
Exemple #2
0
def check_word(first_word):
	"""
	Check the first word of the line on multiple aspects and change it if necessary
	"""
	
	if "'" in first_word and not first_word == "'":
		first_word = first_word.lstrip("'")
	if "£" in first_word:
		first_word = first_word.replace("£", "f")
	if first_word != ",":
		first_word = first_word.strip(",")
	if first_word.startswith("mc"):
		first_word = first_word.replace("mc", "mac")
	if "ij" in first_word or "IJ" in first_word:
		first_word = first_word.replace("ij", "y")
		first_word = first_word.replace("IJ", "Y")
	if "-" in first_word and not first_word == "-":
		first_word = first_word.replace("-", "")
	if first_word.isdigit():
		first_word = cardinal(int(first_word), friendly=False)
	if first_word.startswith("0"):
		first_word = first_word.replace("0", "O")
	if first_word.startswith("1"):
		first_word = first_word.replace("1", "I")
	first_word = first_word.strip(".")
	return first_word
Exemple #3
0
def test_cardinal():

    inputs = {
        # Normal numbers
        0: "nul",
        1: "een",
        11: "elf",
        19: "negentien",
        20: "twintig",
        23: "eenentwintig",
        23: "drieëntwintig",
        100: "honderd",
        200: "tweehonderd",
        999: "negenhonderdnegenennegentig",
        1000: "duizend",
        2000: "tweeduizend",
        6000: "zesduizend",
        12001: "twaalfduizend een",
        99900: "negenennegentigduizend negenhonderd",
        99999: "negenennegentigduizend negenhonderdnegenennegentig",
        220000: "tweehonderdtwintigduizend",
        1000000: "een miljoen",
        1000012: "een miljoen twaalf",
        2000000: "twee miljoen",
        300000000: "driehonderd miljoen",
        300012013: "driehonderd miljoen twaalfduizend dertien",
        1000000000: "een miljard",
        12345678901: "twaalf miljard driehonderdvijfenveertig miljoen "
                     "zeshonderdachtenzeventigduizend negenhonderdeen",
        1000000000000: "een biljoen",
        13000000500003: "dertien biljoen vijfhonderdduizend drie",


        # From http://woordenlijst.org/leidraad/6/9/
        2: "twee",
        20: "twintig",
        22: "tweeëntwintig",
        200: "tweehonderd",
        222: "tweehonderdtweeëntwintig",
        2200: "tweeëntwintighonderd",
        2220: "tweeduizend tweehonderdtwintig",
        2220222: "twee miljoen tweehonderdtwintigduizend "
                 "tweehonderdtweeëntwintig",

        # From https://onzetaal.nl/taaladvies/advies/getallen-uitschrijven
        108: "honderdacht",
        678: "zeshonderdachtenzeventig",
        2013: "tweeduizend dertien",
        2577: "tweeduizend vijfhonderdzevenenzeventig",
        17053980: "zeventien miljoen drieënvijftigduizend negenhonderdtachtig",

        # Negative numbers
        -1: "min een",
        -10: "min tien",
    }

    for n, w in inputs.items():
        assert cardinal(n, friendly=False) == w
    elements_line = line.split()
    length_line = len(elements_line)
    first_word = line.split()[0]

    # Check the first word of the line on multiple aspects and change it if necessary
    if "'" in first_word and not first_word == "'":
        first_word = first_word.lstrip("'")
    if first_word != ",":
        first_word = first_word.strip(",")
    if "ij" in first_word or "IJ" in first_word:
        first_word = first_word.replace("ij", "y")
        first_word = first_word.replace("IJ", "Y")
    if not first_word.startswith("-"):
        first_word = first_word.replace("-", "")
    if first_word.isdigit():
        first_word = cardinal(int(first_word), friendly=False)
    if first_word.startswith("0") and current_letter == "O":
        first_word = first_word.replace("0", "O")
    if first_word.startswith("1") and current_letter == "I":
        first_word = first_word.replace("1", "I")
    if first_word == current_letter + ".":
        first_word = first_word.strip(".")

    # Check if a new bibliographical entry should be started or not
    check_result = check_line(length_line, first_word, previous_first_word,
                              current_letter, previous_letter, string_item)

    # Either create a new entry or concatenate with the current one
    if check_result == True:
        previous_first_word = first_word
        items.append(string_item)
Exemple #5
0
def test_cardinal(number, expected):
    actual = cardinal(number, friendly=False)
    assert actual == expected
Exemple #6
0
def test_friendly_cardinals(number, expected):
    actual = cardinal(number)
    assert actual == expected
Exemple #7
0
         ssssw_clean) if p != ""
 ]
 if len(
         num_list
 ) > 1:  # if there are leading zeroes
     assert len(num_list) == 2
     for zero in num_list[0]:
         kaldi_lex_used[
             "nul"] = kaldi_lex[
                 "nul"]
         new_words.append("nul")
         sw_l.append("nul")
         si_l.append(str(sw_i))
         sw_i += 1
     number = cardinal(
         int(num_list[1]),
         friendly=False)
 else:  # if there are no leading zeroes
     number = cardinal(
         int(ssssw_clean),
         friendly=False)
 number = re.sub(
     r'een', 'één', number)
 numbers = number.split(
     " "
 )  # sometimes cardinal() splits up numbers: 1001 --> duizend een
 for n_counter, n in enumerate(
         numbers, 1):
     sw_i += 1 if n_counter > 1 else 0
     if n in kaldi_lex:
         kaldi_lex_used[