Exemple #1
0
 def test_text2num_zeroes(self):
     self.assertEqual(text2num("zero", "ca"), 0)
     self.assertEqual(text2num("zero vuit", "ca"), 8)
     self.assertEqual(text2num("zero zero cent vint-i-cinc", "ca"), 125)
     self.assertRaises(ValueError, text2num, "cinc zero", "ca")
     self.assertRaises(ValueError, text2num, "cinquanta zero tres", "ca")
     self.assertRaises(ValueError, text2num, "cinquanta-tres zero", "ca")
Exemple #2
0
 def test_text2num_zeroes(self):
     self.assertEqual(text2num("zéro", "fr"), 0)
     self.assertEqual(text2num("zéro huit", "fr"), 8)
     self.assertEqual(text2num("zéro zéro cent vingt-cinq", "fr"), 125)
     self.assertRaises(ValueError, text2num, "cinq zéro", "fr")
     self.assertRaises(ValueError, text2num, "cinquante zéro trois", "fr")
     self.assertRaises(ValueError, text2num, "cinquante trois zéro", "fr")
Exemple #3
0
 def test_text2num_zeroes(self):
     self.assertEqual(text2num("zero", "en"), 0)
     self.assertEqual(text2num("zero eight", "en"), 8)
     self.assertEqual(text2num("zero zero hundred twenty five", "en"), 125)
     self.assertRaises(ValueError, text2num, "five zero", "en")
     self.assertRaises(ValueError, text2num, "fifty zero three", "en")
     self.assertRaises(ValueError, text2num, "fifty three zero", "en")
 def test_text2num_zeroes(self):
     self.assertEqual(text2num("cero", "es"), 0)
     self.assertEqual(text2num("cero ocho", "es"), 8)
     self.assertEqual(text2num("cero cero ciento veinticinco", "es"), 125)
     self.assertRaises(ValueError, text2num, "cinco cero", "es")
     self.assertRaises(ValueError, text2num, "cincuenta cero tres", "es")
     self.assertRaises(ValueError, text2num, "cincuenta y tres cero", "es")
 def test_text2num_zeroes(self):
     self.assertEqual(text2num("zero", "pt"), 0)
     self.assertEqual(text2num("zero oito", "pt"), 8)
     self.assertEqual(text2num("zero zero cento vinte e cinco", "pt"), 125)
     self.assertRaises(ValueError, text2num, "cinco zero", "pt")
     self.assertRaises(ValueError, text2num, "cinquenta zero três", "pt")
     self.assertRaises(ValueError, text2num, "cinquenta e três zero", "pt")
Exemple #6
0
 def test_text2num_variants(self):
     self.assertEqual(text2num("quatre-vingt dix-huit", "fr"), 98)
     self.assertEqual(text2num("nonante-huit", "fr"), 98)
     self.assertEqual(text2num("soixante-dix-huit", "fr"), 78)
     self.assertEqual(text2num("septante-huit", "fr"), 78)
     self.assertEqual(text2num("quatre-vingt-huit", "fr"), 88)
     self.assertEqual(text2num("octante-huit", "fr"), 88)
     self.assertEqual(text2num("huitante-huit", "fr"), 88)
     self.assertEqual(text2num("huitante-et-un", "fr"), 81)
     self.assertEqual(text2num("quatre-vingts", "fr"), 80)
     self.assertEqual(text2num("mil neuf cent vingt", "fr"), 1920)
Exemple #7
0
    def _preprocess_french_number_words(self):
        # Since the French model has no number entities, need to deal with number words by hand
        # for now. Could eventually train our own cardinal entity, but in the medium
        # term this should probably be made a pipeline component, although the text
        # immutability may be an issue
        french_number_words = {
            'millier': 1E3,
            'milliers': 1E3,
            'million': 1E6,
            'millions': 1E6,
            'milliard': 1E9,
            'milliards': 1E9
        }

        words = self.text.split(' ')
        for i, word in enumerate(words):
            if word in french_number_words.keys():
                prev_word = words[i - 1]
                if re.match('^\\d+$', prev_word):
                    number = int(prev_word)
                    need_to_merge = True
                else:
                    try:
                        number = text2num(str(prev_word))
                        need_to_merge = True
                    except ValueError:
                        number = 2  # Multiply 1 million or whatever by 2
                        need_to_merge = False

                number *= french_number_words[word]
                if need_to_merge:
                    search_text = '{}\\s+{}'.format(prev_word, word)
                else:
                    search_text = word
                self.text = re.sub(search_text, str(int(number)), self.text)
 def test_text2num_variants(self):
     self.assertEqual(text2num('quatre-vingt dix-huit'), 98)
     self.assertEqual(text2num('nonante-huit'), 98)
     self.assertEqual(text2num('soixante-dix-huit'), 78)
     self.assertEqual(text2num('septante-huit'), 78)
     self.assertEqual(text2num('quatre-vingt-huit'), 88)
     self.assertEqual(text2num('octante-huit'), 88)
     self.assertEqual(text2num('huitante-huit'), 88)
     self.assertEqual(text2num('huitante-et-un'), 81)
Exemple #9
0
    def test_text2num(self):
        test1 = "fifty-three billion two hundred forty-three thousand seven hundred twenty-four"
        self.assertEqual(text2num(test1, "en"), 53_000_243_724)

        test2 = (
            "fifty-one million five hundred seventy-eight thousand three hundred two"
        )
        self.assertEqual(text2num(test2, "en"), 51_578_302)

        test3 = "eighty-five"
        self.assertEqual(text2num(test3, "en"), 85)

        test4 = "eighty-one"
        self.assertEqual(text2num(test4, "en"), 81)

        self.assertEqual(text2num("fifteen", "en"), 15)
        self.assertEqual(text2num("hundred fifteen", "en"), 115)
        self.assertEqual(text2num("one hundred fifteen", "en"), 115)
        self.assertEqual(text2num("seventy-five thousands", "en"), 75000)
        self.assertEqual(text2num("thousand nine hundred twenty", "en"), 1920)
Exemple #10
0
 def extract_number_words(span):
     """
     Detects literals like "twenty two"
     """
     index = 0
     while index < len(span) and span[
             index] in number_words and not has_to_be_ignored(
                 span, index):
         index += 1
     if index == 0:
         return 1, span[0]
     try:
         __result = text2num(" ".join(span[:index]), lang="en")
         return index, __result
     except ValueError:
         return index, " ".join(span[:index])
Exemple #11
0
    def create_questions(self, sentence, chunked):
        gaps = []

        for word in chunked:
            if type(word) != tuple:
                target = []
                for y in word:
                    target.append(y[0])
                orig_phrase = " ".join(target)

                if word.label() == "NUMBER":
                    modified_phrase = orig_phrase[:]

                    try:
                        # convert spelled out word to numerical value
                        modified_phrase = t2n.text2num(modified_phrase,
                                                       lang=T2N_LANG)
                    except:
                        try:
                            test = int(modified_phrase) + \
                                float(modified_phrase)
                        except:
                            # if the word could not be converted and
                            # was not already numerical, ignore it
                            continue

                    if self.probably_range(modified_phrase):
                        return

                    gaps.append((word.label(), orig_phrase, modified_phrase))
                elif word.label() in ["LOCATION", "PROPER"]:
                    gaps.append((word.label(), orig_phrase, orig_phrase))

        if len(gaps) >= 1 and len(gaps) == len(set(gaps)):

            gaps_filtered = [
                gap for gap in gaps
                if gap[0] == 'NUMBER' or gap[0] == 'LOCATION'
            ]

            if len(gaps_filtered):
                self.quiz.add(QuestionSentence(sentence, gaps_filtered))
    def test_text2num(self):
        test1 = "cinquante trois mille millions deux cent quarante trois mille sept cent vingt quatre"
        self.assertEqual(text2num(test1), 53_000_243_724)

        test2 = "cinquante et un million cinq cent soixante dix-huit mille trois cent deux"
        self.assertEqual(text2num(test2), 51_578_302)

        test3 = "quatre-vingt cinq"
        self.assertEqual(text2num(test3), 85)

        test4 = "quatre-vingt un"
        self.assertEqual(text2num(test4), 81)

        self.assertEqual(text2num('quinze'), 15)
        self.assertEqual(text2num('soixante quinze mille'), 75000)
    def get_speech_dict(self, recognition_result, language):
        print("Start get_speech_dict")
        speech_list = recognition_result.split()

        self.speech_dict.update({"keyword": self.keyword})
        speech_list.remove(self.keyword)

        for identifier in self.identifiers_list:

            words_by_identifier = list(
                set(speech_list) & set(identifier.content_list))
            for word in words_by_identifier:
                speech_list.remove(word)

            self.speech_dict.update({identifier.name: words_by_identifier})

        if self.find_number:
            numbers_list = []
            for word in speech_list:

                number = None
                try:
                    number = text2num(word, language[0:2])

                except ValueError:
                    try:
                        number = int(word)
                    except ValueError:
                        pass
                finally:
                    if number:
                        numbers_list.append(number)
                        speech_list.remove(word)

            self.speech_dict.update({"numbers": numbers_list})

        self.speech_dict.update({"other": speech_list})
        print("Speech dictionary {}".format(self.speech_dict))
Exemple #14
0
 def test_accent(self):
     self.assertEqual(text2num("un milió", "ca"), 1000000)
     self.assertEqual(text2num("un milio", "ca"), 1000000)
     self.assertEqual(alpha2digit("Un milió", "ca"), "1000000")
     self.assertEqual(alpha2digit("Un milio", "ca"), "1000000")
    def test_text2num(self):
        self.assertEqual(text2num("cero", "es"), 0)
        self.assertEqual(text2num("uno", "es"), 1)
        self.assertEqual(text2num("nueve", "es"), 9)
        self.assertEqual(text2num("diez", "es"), 10)
        self.assertEqual(text2num("once", "es"), 11)
        self.assertEqual(text2num("diecinueve", "es"), 19)
        self.assertEqual(text2num("veinte", "es"), 20)
        self.assertEqual(text2num("veintiuno", "es"), 21)
        self.assertEqual(text2num("treinta", "es"), 30)
        self.assertEqual(text2num("treinta y uno", "es"), 31)
        self.assertEqual(text2num("treinta y dos", "es"), 32)
        self.assertEqual(text2num("treinta y nueve", "es"), 39)
        self.assertEqual(text2num("noventa y nueve", "es"), 99)
        self.assertEqual(text2num("cien", "es"), 100)
        self.assertEqual(text2num("ciento uno", "es"), 101)
        self.assertEqual(text2num("doscientos", "es"), 200)
        self.assertEqual(text2num("doscientos uno", "es"), 201)
        self.assertEqual(text2num("mil", "es"), 1000)
        self.assertEqual(text2num("mil uno", "es"), 1001)
        self.assertEqual(text2num("dos mil", "es"), 2000)
        self.assertEqual(text2num("dos mil noventa y nueve", "es"), 2099)
        self.assertEqual(
            text2num("nueve mil novecientos noventa y nueve", "es"), 9999)
        self.assertEqual(
            text2num(
                "novecientos noventa y nueve mil novecientos noventa y nueve",
                "es"), 999999)
        long_text = "novecientos noventa y nueve mil novecientos noventa y nueve millones novecientos noventa y nueve mil novecientos noventa y nueve"
        self.assertEqual(text2num(long_text, "es"), 999999999999)

        self.assertEqual(alpha2digit("uno coma uno", "es"), '1.1')
        self.assertEqual(alpha2digit("uno coma cuatrocientos uno", "es"),
                         '1.401')

        # TODO:
        # self.assertEqual(alpha2digit("cero coma cinco", "es"), '0.5')

        test1 = "cincuenta y tres mil veinte millones doscientos cuarenta y tres mil setecientos veinticuatro"
        self.assertEqual(text2num(test1, "es"), 53_020_243_724)

        test2 = (
            "cincuenta y un millones quinientos setenta y ocho mil trescientos dos"
        )
        self.assertEqual(text2num(test2, "es"), 51_578_302)

        test3 = "ochenta y cinco"
        self.assertEqual(text2num(test3, "es"), 85)

        test4 = "ochenta y uno"
        self.assertEqual(text2num(test4, "es"), 81)

        self.assertEqual(text2num("quince", "es"), 15)
        self.assertEqual(text2num("ciento quince", "es"), 115)
        self.assertEqual(text2num("setenta y cinco mil", "es"), 75000)
        self.assertEqual(text2num("mil novecientos veinte", "es"), 1920)
 def test_accent(self):
     self.assertEqual(text2num("un millon", "es"), 1000000)
     self.assertEqual(text2num("un millón", "es"), 1000000)
     self.assertEqual(alpha2digit("Un millon", "es"), "1000000")
     self.assertEqual(alpha2digit("Un millón", "es"), "1000000")
Exemple #17
0
    def test_text2num(self):
        self.assertEqual(text2num("zero", "ca"), 0)
        self.assertEqual(text2num("un", "ca"), 1)
        self.assertEqual(text2num("nou", "ca"), 9)
        self.assertEqual(text2num("deu", "ca"), 10)
        self.assertEqual(text2num("onze", "ca"), 11)
        self.assertEqual(text2num("dinou", "ca"), 19)
        self.assertEqual(text2num("vint", "ca"), 20)
        self.assertEqual(text2num("vint-i-dues", "ca"), 22)
        self.assertEqual(text2num("trenta", "ca"), 30)
        self.assertEqual(text2num("trenta-u", "ca"), 31)
        self.assertEqual(text2num("trenta-dos", "ca"), 32)
        self.assertEqual(text2num("trenta-huit", "ca"), 38)
        self.assertEqual(text2num("noranta-nou", "ca"), 99)
        self.assertEqual(text2num("cent", "ca"), 100)
        self.assertEqual(text2num("cent u", "ca"), 101)
        self.assertEqual(text2num("dues-centes", "ca"), 200)
        self.assertEqual(text2num("dues-centes una", "ca"), 201)
        self.assertEqual(text2num("mil", "ca"), 1000)
        self.assertEqual(text2num("mil un", "ca"), 1001)
        self.assertEqual(text2num("dos mil", "ca"), 2000)
        self.assertEqual(text2num("dos mil noranta-nou", "ca"), 2099)
        self.assertEqual(text2num("nou mil nou-cents noranta-nou", "ca"), 9999)
        self.assertEqual(
            text2num("nou-cents noranta-nou mil nou-cents noranta-nou", "ca"),
            999999)
        long_text = "nou-cents noranta-nou mil nou-cents noranta-nou milions nou-cents noranta-nou mil nou-cents noranta-nou"
        self.assertEqual(text2num(long_text, "ca"), 999999999999)

        self.assertEqual(alpha2digit("un coma un", "ca"), '1,1')
        self.assertEqual(alpha2digit("u coma quatre-cents u", "ca"), '1,401')

        #FIXME: self.assertEqual(alpha2digit("zero coma cinc", "ca"), '0,5')

        test1 = "cinquanta-tres mil vint milions dos-cents quaranta-tres mil set-cents vint-i-quatre"
        self.assertEqual(text2num(test1, "ca"), 53020243724)

        test2 = (
            "cinquanta-un milions cinc-cents setanta-vuit mil tres-cents dos")
        self.assertEqual(text2num(test2, "ca"), 51578302)

        test3 = "huitanta-cinc"
        self.assertEqual(text2num(test3, "ca"), 85)

        test4 = "vuitanta-un"
        self.assertEqual(text2num(test4, "ca"), 81)

        self.assertEqual(text2num("quinze", "ca"), 15)
        self.assertEqual(text2num("cent quinze", "ca"), 115)
        self.assertEqual(text2num("setanta-cinc mil", "ca"), 75000)
        self.assertEqual(text2num("mil nou-cents vint", "ca"), 1920)
Exemple #18
0
 def test_text2num_centuries(self):
     self.assertEqual(text2num("nineteen hundred seventy-three", "en"),
                      1973)
 def test_accent(self):
     self.assertEqual(text2num("um milhao", "pt"), 1000000)
     self.assertEqual(text2num("um milhão", "pt"), 1000000)
     self.assertEqual(alpha2digit("Um milhao", "pt"), "1000000")
     self.assertEqual(alpha2digit("Um milhão", "pt"), "1000000")
Exemple #20
0
def is_word_number(string):
    try:
        text2num(string, 'en')
        return True
    except Exception:
        return False
Exemple #21
0
def compareToken(captionTokens, index, cleanTitle, xValueArr,
                 yValueArr, cleanXAxis, cleanYAxis, entities):
    token = captionTokens[index].replace(',', '').lower()
    if is_word_number(token):
        token = str(text2num(token, 'en'))
    # iterate through x and y values
    for xWords, yWords, i in zip(xValueArr, yValueArr, range(0, len(xValueArr))):
        # iterate through values with multiple tokens in them, delimited by '_'
        for xWord in xWords.split('_'):
            xWord = xWord.replace(',', '').lower()
            if is_word_number(xWord):
                xWord = str(text2num(xWord, 'en'))
            if token == xWord:
                adjustDataLabel(1, 'x', i)
                return templateAssigner(token, xValueArr, xWords, i, 'X')
            elif is_number(token) and are_numbers(xValueArr):
                if numberComparison(float(token), captionTokens, index, float(xWord), cleanXAxis):
                    adjustDataLabel(1, 'x', i)
                    return templateAssigner(token, xValueArr, xWords, i, 'X')
        for yWord in yWords.split('_'):
            yWord = yWord.replace(',', '').lower()
            if is_word_number(yWord):
                yWord = str(text2num(yWord, 'en'))
            if token == yWord:
                adjustDataLabel(1, 'y', i)
                return templateAssigner(token, yValueArr, yWords, i, 'Y')
            elif is_number(token) and are_numbers(yValueArr):
                if numberComparison(float(token), captionTokens, index, float(yWord), cleanYAxis):
                    adjustDataLabel(1, 'y', i)
                    return templateAssigner(token, yValueArr, yWords, i, 'Y')
    # check if token in axis names
    # remove filler words from labels
    cleanXArr = [xWord for xWord in cleanXAxis.split('_') if xWord.lower() not in fillers]
    cleanYArr = [yWord for yWord in cleanYAxis.split('_') if yWord.lower() not in fillers]
    for xLabelToken, i in zip(cleanXArr, range(0, len(cleanXArr))):
        xLabelWord = xLabelToken.replace('_', ' ').lower()
        if str(token).lower() == xLabelWord:
            return [1, f'{token}']
        elif str(token).lower() in numbers:  # and xLabelWord.lower() in numbers:
            return [1, f'{token}']
    for yLabelToken, i in zip(cleanYArr, range(0, len(cleanYArr))):
        yLabelWord = yLabelToken.replace('_', ' ').lower()
        if str(token).lower() == yLabelWord:
            return [1, f'{token}']
        elif str(token).lower() in numbers:  # and yLabelWord.lower() in numbers:
            return [1, f'{token}']
    # check if token in title
    for titleToken, i in zip(cleanTitle, range(0, len(cleanTitle))):
        titleWord = titleToken.lower()
        if str(token).lower() == titleWord:
            for subject, n in zip(entities['Subject'], range(0, len(entities['Subject']))):
                if titleWord in subject.lower():
                    return [1, f'{token}']
            for date, m in zip(entities['Date'], range(0, len(entities['Date']))):
                if titleWord == str(date).lower():
                    if len(entities['Date']) > 1:
                        # cant check for parallels in title
                        if date == max(entities['Date']):
                            return [1, f'{token}']
                        elif date == min(entities['Date']):
                            return [1, f'{token}']
                    return [1, f'{token}']
            return [1, f'{token}']
    # replace unmatched united states tokens with country to reduce bias
    if index < len(captionTokens) - 1:
        nextToken = captionTokens[index + 1]
        if token.lower() == 'united' and nextToken.lower() == 'states':
            if 'U.S.' in cleanTitle:
                usIndex = cleanTitle.index('U.S.')
                captionTokens[index] = f'{token}'
                captionTokens.pop(index + 1)
                return [1, f'{token}']
            elif 'American' in cleanTitle:
                usIndex = cleanTitle.index('American')
                captionTokens[index] = f'{token}'
                captionTokens.pop(index + 1)
                return [1, f'{token}']
            else:
                captionTokens.pop(index + 1)
                captionTokens[index] = 'country'
                return [0, 'country']
        elif token.lower() == 'u.s.' or token.lower() == 'u.s':
            if 'U.S.' in cleanTitle:
                usIndex = cleanTitle.index('U.S.')
                captionTokens[index] = f'{token}'
                return [1, f'{token}']
            elif 'United' in cleanTitle and 'States' in cleanTitle:
                usIndex = cleanTitle.index('States')
                captionTokens[index] = f'{token}'
                return [1, f'{token}']
    return [0, token]
Exemple #22
0
def compareMultiColumnToken(captionTokens, index, cleanTitle,
                            colData, cleanCols, entities):
    token = captionTokens[index].replace(',', '').lower()
    if is_word_number(token):
        token = str(text2num(token, 'en'))
    # iterate through x and y values
    for column, columnLabel, i in zip(colData, cleanCols, range(len(colData))):
        for cell, n in zip(column, range(len(column))):
            # iterate through values with multiple tokens in them, delimited by '_'
            cleanValues = [value for value in cell.split('_') if value.lower() not in fillers]
            for words in cleanValues:
                valueWord = words.replace(',', '').lower()
                if is_word_number(valueWord):
                    valueWord = str(text2num(valueWord, 'en'))
                if token == valueWord:
                    adjustMultiColumnLabel(1, n, i)
                    return multiColumnTemplater(token, column, valueWord, n, i)
                elif is_number(token) and are_numbers(column):
                    if numberComparison(float(token), captionTokens, index, float(valueWord), columnLabel):
                        adjustMultiColumnLabel(1, n, i)
                        return multiColumnTemplater(token, column, valueWord, n, i)
        # check if token in axis names
        # remove filler words from labels
        cleanLabels = [word for word in columnLabel.split('_') if word.lower() not in fillers]
        for labelToken, m in zip(cleanLabels, range(len(cleanLabels))):
            labelWord = labelToken.replace('_', ' ').lower()
            if str(token).lower() == labelWord:
                return [1, f'{token}']
            elif str(token).lower() in numbers:
                return [1, f'{token}']
        # check if token in title
        for titleToken, i in zip(cleanTitle, range(0, len(cleanTitle))):
            titleWord = titleToken.lower()
            if str(token).lower() == titleWord:
                for subject, n in zip(entities['Subject'], range(0, len(entities['Subject']))):
                    if titleWord in subject.lower():
                        return [1, f'{token}']
                for date, m in zip(entities['Date'], range(0, len(entities['Date']))):
                    if titleWord == str(date).lower():
                        if len(entities['Date']) > 1:
                            # cant check for parallels in title
                            if date == max(entities['Date']):
                                return [1, f'{token}']
                            elif date == min(entities['Date']):
                                return [1, f'{token}']
                        return [1, f'{token}']
                return [1, f'{token}']
        # replace unmatched united states tokens with country to reduce bias
        if index < len(captionTokens) - 1:
            nextToken = captionTokens[index + 1]
            if token.lower() == 'united' and nextToken.lower() == 'states':
                if 'U.S.' in cleanTitle:
                    usIndex = cleanTitle.index('U.S.')
                    captionTokens[index] = f'{token}'
                    captionTokens.pop(index + 1)
                    return [1, f'{token}']
                elif 'American' in cleanTitle:
                    usIndex = cleanTitle.index('American')
                    captionTokens[index] = f'templateTitle[{usIndex}]'
                    captionTokens.pop(index + 1)
                    return [1, f'templateTitle[{usIndex}]']
                else:
                    captionTokens.pop(index + 1)
                    captionTokens[index] = 'country'
                    return [0, 'country']
            elif token.lower() == 'u.s.' or token.lower() == 'u.s':
                if 'U.S.' in cleanTitle:
                    usIndex = cleanTitle.index('U.S.')
                    captionTokens[index] = f'templateTitle[{usIndex}]'
                    return [1, f'templateTitle[{usIndex}]']
                elif 'United' in cleanTitle and 'States' in cleanTitle:
                    usIndex = cleanTitle.index('States')
                    captionTokens[index] = f'templateTitle[{usIndex}]'
                    return [1, f'templateTitle[{usIndex}]']
    return [0, token]
Exemple #23
0
        records = box_preproc2(entry)
        src_instance = " ".join(records)
        all_ents, players, teams, cities, total_players, total_teams, total_cities = get_ents(entry)
        home_players, vis_players = get_player_idxs(entry)
        box_score = entry["box_score"]
        player_name_map = {y: x for x, y in box_score['PLAYER_NAME'].iteritems()}
        home_line_score = entry["home_line"]
        vis_line_score = entry["vis_line"]
        summary = entry['summary']
    else:
        args = line.split("|")
        name = args[0]
        record_type = args[2].strip()
        value = args[1]
        if not value.isdigit():
            value = text2num(value)
        if record_type.startswith("PLAYER-"):
            record_type = record_type[len("PLAYER-"):]

        name = name.replace("UNK","").strip()
        if name == 'Los Angeles' and 'LA' in total_cities:
            name = 'LA'
        if name in total_players:
            pass
        elif name in total_teams:
            pass
        elif name in players:
            name = resolve_name(name, total_players)
        elif name == 'Los Angeles Clippers' and 'LA Clippers' in total_teams:
            name = 'LA Clippers'
        elif name in teams:
    def test_text2num(self):
        self.assertEqual(text2num("zero", "pt"), 0)
        self.assertEqual(text2num("um", "pt"), 1)
        self.assertEqual(text2num("oito", "pt"), 8)
        self.assertEqual(text2num("dez", "pt"), 10)
        self.assertEqual(text2num("onze", "pt"), 11)
        self.assertEqual(text2num("dezanove", "pt"), 19)
        self.assertEqual(text2num("vinte", "pt"), 20)
        self.assertEqual(text2num("vinte e um", "pt"), 21)
        self.assertEqual(text2num("trinta", "pt"), 30)
        self.assertEqual(text2num("trinta e um", "pt"), 31)
        self.assertEqual(text2num("trinta e três", "pt"), 33)
        self.assertEqual(text2num("trinta e nove", "pt"), 39)
        self.assertEqual(text2num("noventa e nove", "pt"), 99)
        self.assertEqual(text2num("cem", "pt"), 100)
        self.assertEqual(text2num("cento e um", "pt"), 101)
        self.assertEqual(text2num("duzentos", "pt"), 200)
        self.assertEqual(text2num("duzentos e um", "pt"), 201)
        self.assertEqual(text2num("mil", "pt"), 1000)
        self.assertEqual(text2num("mil e um", "pt"), 1001)
        self.assertEqual(text2num("dois mil", "pt"), 2000)
        self.assertEqual(text2num("dois mil noventa e nove", "pt"), 2099)
        self.assertEqual(text2num("nove mil novecentos noventa e nove", "pt"),
                         9999)
        self.assertEqual(
            text2num("novecentos noventa e nove mil novecentos noventa e nove",
                     "pt"), 999999)

        self.assertEqual(alpha2digit("um vírgula um", "pt"), "1,1")
        self.assertEqual(alpha2digit("um vírgula quatrocentos e um", "pt"),
                         "1,401")

        # fail
        #        self.assertEqual(alpha2digit("zero vírgula cinco", "pt"), "0,5")

        #     test1 = "cincuenta y tres mil veinte millones doscientos cuarenta y tres mil setecientos veinticuatro"
        #     self.assertEqual(text2num(test1, "pt"), 53_020_243_724)

        #     test2 = (
        #         "cincuenta y un millones quinientos setenta y ocho mil trescientos dos"
        #     )
        #     self.assertEqual(text2num(test2, "pt"), 51_578_302)

        test3 = "oitenta e cinco"
        self.assertEqual(text2num(test3, "pt"), 85)

        test4 = "oitenta e um"
        self.assertEqual(text2num(test4, "pt"), 81)

        self.assertEqual(text2num("quinze", "pt"), 15)
        self.assertEqual(text2num("cento quinze", "pt"), 115)
        self.assertEqual(text2num("setenta e cinco mil", "pt"), 75000)
        self.assertEqual(text2num("mil novecentos vinte", "pt"), 1920)
Exemple #25
0
def translate_number_string(string):
    try:
        text2num(string, 'en')
    except Exception:
        return 1
Exemple #26
0
def to_inch(input):
	## Define variables ##
	meter_to_inch = 0 # calcaulated inches from meters
	feet_to_inch = 0 # calcaulated inches from feet
	inches = [] # stated inches (Not calcaulated)
	result = [] # final result
	string_to_number_list = [] # Convert string list items `string` to integers (if applicable)
	string_to_number_final = [] # final converted list
	feet_ind = 0.0 # feet in case no individual inches

	# trim and then convert our input to list of words
	string = input.strip().split(" ")
	# lets convert written numbers into <int> data type
	# print(string)        
	for item in string: # Parse `Half` and `Quarter`
		if item in ["half", "Half"]:
			string_to_number_list.append(0.5)
		elif item in ["quarter", "Quarter"]:
			string_to_number_list.append(0.25)
		else: # Parse written digits   
			try:
				string_to_number_list.append(text2num(item, "en"))
			except:
				if item != ("an" or "a"): # Drop Indefinite Articles
					string_to_number_list.append(item)

	for item in string_to_number_list: # check if a digit still in <String> data type
		try:
			if type(item) != str:
				string_to_number_final.append(item)
			else:
				string_to_number_final.append(int(item))

		except:
			string_to_number_final.append(item)            
	# print(string_to_number_final)
	## Deeper check start
	for i in range(0,len(string_to_number_final)):
		# find feet in final list
		try:
			if ((string_to_number_final[i+1] in ["feet", "foot", "ft"]) and (type(string_to_number_final[i-1]) != int)) :
				feet_to_inch = string_to_number_final[i]*12
		except:
			pass        
		# find meters in final list and convert to feet
		try:
			if (string_to_number_final[i+1] in ["meter", "meters", "m"]) and (type(string_to_number_final[i-1]) != int):
				meter_to_inch = string_to_number_final[i]*39.3700787 # convert meter to inch
		except:
			pass
		# find individual inches (case 1 - separate from number)       
		try:
			if (string_to_number_final[i+1] in ["inch", "inches", "in"]) and (type(string_to_number_final[i-1]) != int):
				inches.append(string_to_number_final[i])
		except:
			pass        
		# find individual inches (case 2 - stick to number)       
		try:
			if string_to_number_final[i][-2:] == "in":            
				my_inches = string_to_number_final[i][:-2] # all except last to chars
				inches.append(int(my_inches))
		except:
			pass        
		# find individual inches in a range
		try:
			if string_to_number_final[i+1] == "or":            
				inches.append(int(string_to_number_final[i]))
		except:
			pass        
	# calcaulate total feet
	feet = feet_to_inch+meter_to_inch
	if len(inches):
		for inch in inches:
			result.append(inch+feet)
	else:
		result.append(feet)
	
	## Case no individual inches    
	if (input.find("in") == -1 and input.find("inch") == -1 ): # check if case happens
		if "or" in string_to_number_final: # if `OR` also exists in this case
			z = string_to_number_final.index("or")
			item_1 = string_to_number_final[z-1]*12
			item_2 = string_to_number_final[z+1]*12
			return_list = [item_1, item_2]
			return_list = [item for item in return_list if not isinstance(item, str)]
			
			if len(return_list) == 0:
				return  [None]
			else:
				return return_list

		i = 0 # initialize counter
		for x in range(0,len(string_to_number_final)):        
			try:
				if string_to_number_final[x-1] not in ["foot", "feet"]:
					feet_ind = feet_ind + string_to_number_final[x]
				i = i+1
			except:
				pass
				# print(traceback.format_exc())
		if i == 2:
			feet_ind = feet_ind*12
			if (feet_ind).is_integer() == True: # if no decimals convert to <Int> data type
				feet_ind = int(feet_ind)     
			#return [type(feet_ind).__name__+'('+str(feet_ind)+')']
			if type(feet_ind) == 'str':
				return [None]
			else:
				return [feet_ind]
		if i > 2: # Input string seems to be incorrect
			return [None]
		# if i equlas 1 will continue and ignore this block    

	 
	# return none if bad input or zero result
	if (len(result) == 0 or (len(result) == 1 and result[0] == 0)):
		return [None]
	# build new list to print output in proper format
	ls = [item for item in result if not isinstance(item, str)] #block strings
	
	if len(ls) == 0:
		return [None]
	else:
		return(ls)
 def test_text2num_centuries(self):
     self.assertEqual(text2num('dix-neuf cent soixante-treize'), 1973)