def test_text2num_zeroes(self): self.assertEqual(text2num("zero", "ca"), 0) self.assertEqual(text2num("zero vuit", "ca"), 8) self.assertEqual(text2num("zero zero cent vint-i-cinc", "ca"), 125) self.assertRaises(ValueError, text2num, "cinc zero", "ca") self.assertRaises(ValueError, text2num, "cinquanta zero tres", "ca") self.assertRaises(ValueError, text2num, "cinquanta-tres zero", "ca")
def test_text2num_zeroes(self): self.assertEqual(text2num("zéro", "fr"), 0) self.assertEqual(text2num("zéro huit", "fr"), 8) self.assertEqual(text2num("zéro zéro cent vingt-cinq", "fr"), 125) self.assertRaises(ValueError, text2num, "cinq zéro", "fr") self.assertRaises(ValueError, text2num, "cinquante zéro trois", "fr") self.assertRaises(ValueError, text2num, "cinquante trois zéro", "fr")
def test_text2num_zeroes(self): self.assertEqual(text2num("zero", "en"), 0) self.assertEqual(text2num("zero eight", "en"), 8) self.assertEqual(text2num("zero zero hundred twenty five", "en"), 125) self.assertRaises(ValueError, text2num, "five zero", "en") self.assertRaises(ValueError, text2num, "fifty zero three", "en") self.assertRaises(ValueError, text2num, "fifty three zero", "en")
def test_text2num_zeroes(self): self.assertEqual(text2num("cero", "es"), 0) self.assertEqual(text2num("cero ocho", "es"), 8) self.assertEqual(text2num("cero cero ciento veinticinco", "es"), 125) self.assertRaises(ValueError, text2num, "cinco cero", "es") self.assertRaises(ValueError, text2num, "cincuenta cero tres", "es") self.assertRaises(ValueError, text2num, "cincuenta y tres cero", "es")
def test_text2num_zeroes(self): self.assertEqual(text2num("zero", "pt"), 0) self.assertEqual(text2num("zero oito", "pt"), 8) self.assertEqual(text2num("zero zero cento vinte e cinco", "pt"), 125) self.assertRaises(ValueError, text2num, "cinco zero", "pt") self.assertRaises(ValueError, text2num, "cinquenta zero três", "pt") self.assertRaises(ValueError, text2num, "cinquenta e três zero", "pt")
def test_text2num_variants(self): self.assertEqual(text2num("quatre-vingt dix-huit", "fr"), 98) self.assertEqual(text2num("nonante-huit", "fr"), 98) self.assertEqual(text2num("soixante-dix-huit", "fr"), 78) self.assertEqual(text2num("septante-huit", "fr"), 78) self.assertEqual(text2num("quatre-vingt-huit", "fr"), 88) self.assertEqual(text2num("octante-huit", "fr"), 88) self.assertEqual(text2num("huitante-huit", "fr"), 88) self.assertEqual(text2num("huitante-et-un", "fr"), 81) self.assertEqual(text2num("quatre-vingts", "fr"), 80) self.assertEqual(text2num("mil neuf cent vingt", "fr"), 1920)
def _preprocess_french_number_words(self): # Since the French model has no number entities, need to deal with number words by hand # for now. Could eventually train our own cardinal entity, but in the medium # term this should probably be made a pipeline component, although the text # immutability may be an issue french_number_words = { 'millier': 1E3, 'milliers': 1E3, 'million': 1E6, 'millions': 1E6, 'milliard': 1E9, 'milliards': 1E9 } words = self.text.split(' ') for i, word in enumerate(words): if word in french_number_words.keys(): prev_word = words[i - 1] if re.match('^\\d+$', prev_word): number = int(prev_word) need_to_merge = True else: try: number = text2num(str(prev_word)) need_to_merge = True except ValueError: number = 2 # Multiply 1 million or whatever by 2 need_to_merge = False number *= french_number_words[word] if need_to_merge: search_text = '{}\\s+{}'.format(prev_word, word) else: search_text = word self.text = re.sub(search_text, str(int(number)), self.text)
def test_text2num_variants(self): self.assertEqual(text2num('quatre-vingt dix-huit'), 98) self.assertEqual(text2num('nonante-huit'), 98) self.assertEqual(text2num('soixante-dix-huit'), 78) self.assertEqual(text2num('septante-huit'), 78) self.assertEqual(text2num('quatre-vingt-huit'), 88) self.assertEqual(text2num('octante-huit'), 88) self.assertEqual(text2num('huitante-huit'), 88) self.assertEqual(text2num('huitante-et-un'), 81)
def test_text2num(self): test1 = "fifty-three billion two hundred forty-three thousand seven hundred twenty-four" self.assertEqual(text2num(test1, "en"), 53_000_243_724) test2 = ( "fifty-one million five hundred seventy-eight thousand three hundred two" ) self.assertEqual(text2num(test2, "en"), 51_578_302) test3 = "eighty-five" self.assertEqual(text2num(test3, "en"), 85) test4 = "eighty-one" self.assertEqual(text2num(test4, "en"), 81) self.assertEqual(text2num("fifteen", "en"), 15) self.assertEqual(text2num("hundred fifteen", "en"), 115) self.assertEqual(text2num("one hundred fifteen", "en"), 115) self.assertEqual(text2num("seventy-five thousands", "en"), 75000) self.assertEqual(text2num("thousand nine hundred twenty", "en"), 1920)
def extract_number_words(span): """ Detects literals like "twenty two" """ index = 0 while index < len(span) and span[ index] in number_words and not has_to_be_ignored( span, index): index += 1 if index == 0: return 1, span[0] try: __result = text2num(" ".join(span[:index]), lang="en") return index, __result except ValueError: return index, " ".join(span[:index])
def create_questions(self, sentence, chunked): gaps = [] for word in chunked: if type(word) != tuple: target = [] for y in word: target.append(y[0]) orig_phrase = " ".join(target) if word.label() == "NUMBER": modified_phrase = orig_phrase[:] try: # convert spelled out word to numerical value modified_phrase = t2n.text2num(modified_phrase, lang=T2N_LANG) except: try: test = int(modified_phrase) + \ float(modified_phrase) except: # if the word could not be converted and # was not already numerical, ignore it continue if self.probably_range(modified_phrase): return gaps.append((word.label(), orig_phrase, modified_phrase)) elif word.label() in ["LOCATION", "PROPER"]: gaps.append((word.label(), orig_phrase, orig_phrase)) if len(gaps) >= 1 and len(gaps) == len(set(gaps)): gaps_filtered = [ gap for gap in gaps if gap[0] == 'NUMBER' or gap[0] == 'LOCATION' ] if len(gaps_filtered): self.quiz.add(QuestionSentence(sentence, gaps_filtered))
def test_text2num(self): test1 = "cinquante trois mille millions deux cent quarante trois mille sept cent vingt quatre" self.assertEqual(text2num(test1), 53_000_243_724) test2 = "cinquante et un million cinq cent soixante dix-huit mille trois cent deux" self.assertEqual(text2num(test2), 51_578_302) test3 = "quatre-vingt cinq" self.assertEqual(text2num(test3), 85) test4 = "quatre-vingt un" self.assertEqual(text2num(test4), 81) self.assertEqual(text2num('quinze'), 15) self.assertEqual(text2num('soixante quinze mille'), 75000)
def get_speech_dict(self, recognition_result, language): print("Start get_speech_dict") speech_list = recognition_result.split() self.speech_dict.update({"keyword": self.keyword}) speech_list.remove(self.keyword) for identifier in self.identifiers_list: words_by_identifier = list( set(speech_list) & set(identifier.content_list)) for word in words_by_identifier: speech_list.remove(word) self.speech_dict.update({identifier.name: words_by_identifier}) if self.find_number: numbers_list = [] for word in speech_list: number = None try: number = text2num(word, language[0:2]) except ValueError: try: number = int(word) except ValueError: pass finally: if number: numbers_list.append(number) speech_list.remove(word) self.speech_dict.update({"numbers": numbers_list}) self.speech_dict.update({"other": speech_list}) print("Speech dictionary {}".format(self.speech_dict))
def test_accent(self): self.assertEqual(text2num("un milió", "ca"), 1000000) self.assertEqual(text2num("un milio", "ca"), 1000000) self.assertEqual(alpha2digit("Un milió", "ca"), "1000000") self.assertEqual(alpha2digit("Un milio", "ca"), "1000000")
def test_text2num(self): self.assertEqual(text2num("cero", "es"), 0) self.assertEqual(text2num("uno", "es"), 1) self.assertEqual(text2num("nueve", "es"), 9) self.assertEqual(text2num("diez", "es"), 10) self.assertEqual(text2num("once", "es"), 11) self.assertEqual(text2num("diecinueve", "es"), 19) self.assertEqual(text2num("veinte", "es"), 20) self.assertEqual(text2num("veintiuno", "es"), 21) self.assertEqual(text2num("treinta", "es"), 30) self.assertEqual(text2num("treinta y uno", "es"), 31) self.assertEqual(text2num("treinta y dos", "es"), 32) self.assertEqual(text2num("treinta y nueve", "es"), 39) self.assertEqual(text2num("noventa y nueve", "es"), 99) self.assertEqual(text2num("cien", "es"), 100) self.assertEqual(text2num("ciento uno", "es"), 101) self.assertEqual(text2num("doscientos", "es"), 200) self.assertEqual(text2num("doscientos uno", "es"), 201) self.assertEqual(text2num("mil", "es"), 1000) self.assertEqual(text2num("mil uno", "es"), 1001) self.assertEqual(text2num("dos mil", "es"), 2000) self.assertEqual(text2num("dos mil noventa y nueve", "es"), 2099) self.assertEqual( text2num("nueve mil novecientos noventa y nueve", "es"), 9999) self.assertEqual( text2num( "novecientos noventa y nueve mil novecientos noventa y nueve", "es"), 999999) long_text = "novecientos noventa y nueve mil novecientos noventa y nueve millones novecientos noventa y nueve mil novecientos noventa y nueve" self.assertEqual(text2num(long_text, "es"), 999999999999) self.assertEqual(alpha2digit("uno coma uno", "es"), '1.1') self.assertEqual(alpha2digit("uno coma cuatrocientos uno", "es"), '1.401') # TODO: # self.assertEqual(alpha2digit("cero coma cinco", "es"), '0.5') test1 = "cincuenta y tres mil veinte millones doscientos cuarenta y tres mil setecientos veinticuatro" self.assertEqual(text2num(test1, "es"), 53_020_243_724) test2 = ( "cincuenta y un millones quinientos setenta y ocho mil trescientos dos" ) self.assertEqual(text2num(test2, "es"), 51_578_302) test3 = "ochenta y cinco" self.assertEqual(text2num(test3, "es"), 85) test4 = "ochenta y uno" self.assertEqual(text2num(test4, "es"), 81) self.assertEqual(text2num("quince", "es"), 15) self.assertEqual(text2num("ciento quince", "es"), 115) self.assertEqual(text2num("setenta y cinco mil", "es"), 75000) self.assertEqual(text2num("mil novecientos veinte", "es"), 1920)
def test_accent(self): self.assertEqual(text2num("un millon", "es"), 1000000) self.assertEqual(text2num("un millón", "es"), 1000000) self.assertEqual(alpha2digit("Un millon", "es"), "1000000") self.assertEqual(alpha2digit("Un millón", "es"), "1000000")
def test_text2num(self): self.assertEqual(text2num("zero", "ca"), 0) self.assertEqual(text2num("un", "ca"), 1) self.assertEqual(text2num("nou", "ca"), 9) self.assertEqual(text2num("deu", "ca"), 10) self.assertEqual(text2num("onze", "ca"), 11) self.assertEqual(text2num("dinou", "ca"), 19) self.assertEqual(text2num("vint", "ca"), 20) self.assertEqual(text2num("vint-i-dues", "ca"), 22) self.assertEqual(text2num("trenta", "ca"), 30) self.assertEqual(text2num("trenta-u", "ca"), 31) self.assertEqual(text2num("trenta-dos", "ca"), 32) self.assertEqual(text2num("trenta-huit", "ca"), 38) self.assertEqual(text2num("noranta-nou", "ca"), 99) self.assertEqual(text2num("cent", "ca"), 100) self.assertEqual(text2num("cent u", "ca"), 101) self.assertEqual(text2num("dues-centes", "ca"), 200) self.assertEqual(text2num("dues-centes una", "ca"), 201) self.assertEqual(text2num("mil", "ca"), 1000) self.assertEqual(text2num("mil un", "ca"), 1001) self.assertEqual(text2num("dos mil", "ca"), 2000) self.assertEqual(text2num("dos mil noranta-nou", "ca"), 2099) self.assertEqual(text2num("nou mil nou-cents noranta-nou", "ca"), 9999) self.assertEqual( text2num("nou-cents noranta-nou mil nou-cents noranta-nou", "ca"), 999999) long_text = "nou-cents noranta-nou mil nou-cents noranta-nou milions nou-cents noranta-nou mil nou-cents noranta-nou" self.assertEqual(text2num(long_text, "ca"), 999999999999) self.assertEqual(alpha2digit("un coma un", "ca"), '1,1') self.assertEqual(alpha2digit("u coma quatre-cents u", "ca"), '1,401') #FIXME: self.assertEqual(alpha2digit("zero coma cinc", "ca"), '0,5') test1 = "cinquanta-tres mil vint milions dos-cents quaranta-tres mil set-cents vint-i-quatre" self.assertEqual(text2num(test1, "ca"), 53020243724) test2 = ( "cinquanta-un milions cinc-cents setanta-vuit mil tres-cents dos") self.assertEqual(text2num(test2, "ca"), 51578302) test3 = "huitanta-cinc" self.assertEqual(text2num(test3, "ca"), 85) test4 = "vuitanta-un" self.assertEqual(text2num(test4, "ca"), 81) self.assertEqual(text2num("quinze", "ca"), 15) self.assertEqual(text2num("cent quinze", "ca"), 115) self.assertEqual(text2num("setanta-cinc mil", "ca"), 75000) self.assertEqual(text2num("mil nou-cents vint", "ca"), 1920)
def test_text2num_centuries(self): self.assertEqual(text2num("nineteen hundred seventy-three", "en"), 1973)
def test_accent(self): self.assertEqual(text2num("um milhao", "pt"), 1000000) self.assertEqual(text2num("um milhão", "pt"), 1000000) self.assertEqual(alpha2digit("Um milhao", "pt"), "1000000") self.assertEqual(alpha2digit("Um milhão", "pt"), "1000000")
def is_word_number(string): try: text2num(string, 'en') return True except Exception: return False
def compareToken(captionTokens, index, cleanTitle, xValueArr, yValueArr, cleanXAxis, cleanYAxis, entities): token = captionTokens[index].replace(',', '').lower() if is_word_number(token): token = str(text2num(token, 'en')) # iterate through x and y values for xWords, yWords, i in zip(xValueArr, yValueArr, range(0, len(xValueArr))): # iterate through values with multiple tokens in them, delimited by '_' for xWord in xWords.split('_'): xWord = xWord.replace(',', '').lower() if is_word_number(xWord): xWord = str(text2num(xWord, 'en')) if token == xWord: adjustDataLabel(1, 'x', i) return templateAssigner(token, xValueArr, xWords, i, 'X') elif is_number(token) and are_numbers(xValueArr): if numberComparison(float(token), captionTokens, index, float(xWord), cleanXAxis): adjustDataLabel(1, 'x', i) return templateAssigner(token, xValueArr, xWords, i, 'X') for yWord in yWords.split('_'): yWord = yWord.replace(',', '').lower() if is_word_number(yWord): yWord = str(text2num(yWord, 'en')) if token == yWord: adjustDataLabel(1, 'y', i) return templateAssigner(token, yValueArr, yWords, i, 'Y') elif is_number(token) and are_numbers(yValueArr): if numberComparison(float(token), captionTokens, index, float(yWord), cleanYAxis): adjustDataLabel(1, 'y', i) return templateAssigner(token, yValueArr, yWords, i, 'Y') # check if token in axis names # remove filler words from labels cleanXArr = [xWord for xWord in cleanXAxis.split('_') if xWord.lower() not in fillers] cleanYArr = [yWord for yWord in cleanYAxis.split('_') if yWord.lower() not in fillers] for xLabelToken, i in zip(cleanXArr, range(0, len(cleanXArr))): xLabelWord = xLabelToken.replace('_', ' ').lower() if str(token).lower() == xLabelWord: return [1, f'{token}'] elif str(token).lower() in numbers: # and xLabelWord.lower() in numbers: return [1, f'{token}'] for yLabelToken, i in zip(cleanYArr, range(0, len(cleanYArr))): yLabelWord = yLabelToken.replace('_', ' ').lower() if str(token).lower() == yLabelWord: return [1, f'{token}'] elif str(token).lower() in numbers: # and yLabelWord.lower() in numbers: return [1, f'{token}'] # check if token in title for titleToken, i in zip(cleanTitle, range(0, len(cleanTitle))): titleWord = titleToken.lower() if str(token).lower() == titleWord: for subject, n in zip(entities['Subject'], range(0, len(entities['Subject']))): if titleWord in subject.lower(): return [1, f'{token}'] for date, m in zip(entities['Date'], range(0, len(entities['Date']))): if titleWord == str(date).lower(): if len(entities['Date']) > 1: # cant check for parallels in title if date == max(entities['Date']): return [1, f'{token}'] elif date == min(entities['Date']): return [1, f'{token}'] return [1, f'{token}'] return [1, f'{token}'] # replace unmatched united states tokens with country to reduce bias if index < len(captionTokens) - 1: nextToken = captionTokens[index + 1] if token.lower() == 'united' and nextToken.lower() == 'states': if 'U.S.' in cleanTitle: usIndex = cleanTitle.index('U.S.') captionTokens[index] = f'{token}' captionTokens.pop(index + 1) return [1, f'{token}'] elif 'American' in cleanTitle: usIndex = cleanTitle.index('American') captionTokens[index] = f'{token}' captionTokens.pop(index + 1) return [1, f'{token}'] else: captionTokens.pop(index + 1) captionTokens[index] = 'country' return [0, 'country'] elif token.lower() == 'u.s.' or token.lower() == 'u.s': if 'U.S.' in cleanTitle: usIndex = cleanTitle.index('U.S.') captionTokens[index] = f'{token}' return [1, f'{token}'] elif 'United' in cleanTitle and 'States' in cleanTitle: usIndex = cleanTitle.index('States') captionTokens[index] = f'{token}' return [1, f'{token}'] return [0, token]
def compareMultiColumnToken(captionTokens, index, cleanTitle, colData, cleanCols, entities): token = captionTokens[index].replace(',', '').lower() if is_word_number(token): token = str(text2num(token, 'en')) # iterate through x and y values for column, columnLabel, i in zip(colData, cleanCols, range(len(colData))): for cell, n in zip(column, range(len(column))): # iterate through values with multiple tokens in them, delimited by '_' cleanValues = [value for value in cell.split('_') if value.lower() not in fillers] for words in cleanValues: valueWord = words.replace(',', '').lower() if is_word_number(valueWord): valueWord = str(text2num(valueWord, 'en')) if token == valueWord: adjustMultiColumnLabel(1, n, i) return multiColumnTemplater(token, column, valueWord, n, i) elif is_number(token) and are_numbers(column): if numberComparison(float(token), captionTokens, index, float(valueWord), columnLabel): adjustMultiColumnLabel(1, n, i) return multiColumnTemplater(token, column, valueWord, n, i) # check if token in axis names # remove filler words from labels cleanLabels = [word for word in columnLabel.split('_') if word.lower() not in fillers] for labelToken, m in zip(cleanLabels, range(len(cleanLabels))): labelWord = labelToken.replace('_', ' ').lower() if str(token).lower() == labelWord: return [1, f'{token}'] elif str(token).lower() in numbers: return [1, f'{token}'] # check if token in title for titleToken, i in zip(cleanTitle, range(0, len(cleanTitle))): titleWord = titleToken.lower() if str(token).lower() == titleWord: for subject, n in zip(entities['Subject'], range(0, len(entities['Subject']))): if titleWord in subject.lower(): return [1, f'{token}'] for date, m in zip(entities['Date'], range(0, len(entities['Date']))): if titleWord == str(date).lower(): if len(entities['Date']) > 1: # cant check for parallels in title if date == max(entities['Date']): return [1, f'{token}'] elif date == min(entities['Date']): return [1, f'{token}'] return [1, f'{token}'] return [1, f'{token}'] # replace unmatched united states tokens with country to reduce bias if index < len(captionTokens) - 1: nextToken = captionTokens[index + 1] if token.lower() == 'united' and nextToken.lower() == 'states': if 'U.S.' in cleanTitle: usIndex = cleanTitle.index('U.S.') captionTokens[index] = f'{token}' captionTokens.pop(index + 1) return [1, f'{token}'] elif 'American' in cleanTitle: usIndex = cleanTitle.index('American') captionTokens[index] = f'templateTitle[{usIndex}]' captionTokens.pop(index + 1) return [1, f'templateTitle[{usIndex}]'] else: captionTokens.pop(index + 1) captionTokens[index] = 'country' return [0, 'country'] elif token.lower() == 'u.s.' or token.lower() == 'u.s': if 'U.S.' in cleanTitle: usIndex = cleanTitle.index('U.S.') captionTokens[index] = f'templateTitle[{usIndex}]' return [1, f'templateTitle[{usIndex}]'] elif 'United' in cleanTitle and 'States' in cleanTitle: usIndex = cleanTitle.index('States') captionTokens[index] = f'templateTitle[{usIndex}]' return [1, f'templateTitle[{usIndex}]'] return [0, token]
records = box_preproc2(entry) src_instance = " ".join(records) all_ents, players, teams, cities, total_players, total_teams, total_cities = get_ents(entry) home_players, vis_players = get_player_idxs(entry) box_score = entry["box_score"] player_name_map = {y: x for x, y in box_score['PLAYER_NAME'].iteritems()} home_line_score = entry["home_line"] vis_line_score = entry["vis_line"] summary = entry['summary'] else: args = line.split("|") name = args[0] record_type = args[2].strip() value = args[1] if not value.isdigit(): value = text2num(value) if record_type.startswith("PLAYER-"): record_type = record_type[len("PLAYER-"):] name = name.replace("UNK","").strip() if name == 'Los Angeles' and 'LA' in total_cities: name = 'LA' if name in total_players: pass elif name in total_teams: pass elif name in players: name = resolve_name(name, total_players) elif name == 'Los Angeles Clippers' and 'LA Clippers' in total_teams: name = 'LA Clippers' elif name in teams:
def test_text2num(self): self.assertEqual(text2num("zero", "pt"), 0) self.assertEqual(text2num("um", "pt"), 1) self.assertEqual(text2num("oito", "pt"), 8) self.assertEqual(text2num("dez", "pt"), 10) self.assertEqual(text2num("onze", "pt"), 11) self.assertEqual(text2num("dezanove", "pt"), 19) self.assertEqual(text2num("vinte", "pt"), 20) self.assertEqual(text2num("vinte e um", "pt"), 21) self.assertEqual(text2num("trinta", "pt"), 30) self.assertEqual(text2num("trinta e um", "pt"), 31) self.assertEqual(text2num("trinta e três", "pt"), 33) self.assertEqual(text2num("trinta e nove", "pt"), 39) self.assertEqual(text2num("noventa e nove", "pt"), 99) self.assertEqual(text2num("cem", "pt"), 100) self.assertEqual(text2num("cento e um", "pt"), 101) self.assertEqual(text2num("duzentos", "pt"), 200) self.assertEqual(text2num("duzentos e um", "pt"), 201) self.assertEqual(text2num("mil", "pt"), 1000) self.assertEqual(text2num("mil e um", "pt"), 1001) self.assertEqual(text2num("dois mil", "pt"), 2000) self.assertEqual(text2num("dois mil noventa e nove", "pt"), 2099) self.assertEqual(text2num("nove mil novecentos noventa e nove", "pt"), 9999) self.assertEqual( text2num("novecentos noventa e nove mil novecentos noventa e nove", "pt"), 999999) self.assertEqual(alpha2digit("um vírgula um", "pt"), "1,1") self.assertEqual(alpha2digit("um vírgula quatrocentos e um", "pt"), "1,401") # fail # self.assertEqual(alpha2digit("zero vírgula cinco", "pt"), "0,5") # test1 = "cincuenta y tres mil veinte millones doscientos cuarenta y tres mil setecientos veinticuatro" # self.assertEqual(text2num(test1, "pt"), 53_020_243_724) # test2 = ( # "cincuenta y un millones quinientos setenta y ocho mil trescientos dos" # ) # self.assertEqual(text2num(test2, "pt"), 51_578_302) test3 = "oitenta e cinco" self.assertEqual(text2num(test3, "pt"), 85) test4 = "oitenta e um" self.assertEqual(text2num(test4, "pt"), 81) self.assertEqual(text2num("quinze", "pt"), 15) self.assertEqual(text2num("cento quinze", "pt"), 115) self.assertEqual(text2num("setenta e cinco mil", "pt"), 75000) self.assertEqual(text2num("mil novecentos vinte", "pt"), 1920)
def translate_number_string(string): try: text2num(string, 'en') except Exception: return 1
def to_inch(input): ## Define variables ## meter_to_inch = 0 # calcaulated inches from meters feet_to_inch = 0 # calcaulated inches from feet inches = [] # stated inches (Not calcaulated) result = [] # final result string_to_number_list = [] # Convert string list items `string` to integers (if applicable) string_to_number_final = [] # final converted list feet_ind = 0.0 # feet in case no individual inches # trim and then convert our input to list of words string = input.strip().split(" ") # lets convert written numbers into <int> data type # print(string) for item in string: # Parse `Half` and `Quarter` if item in ["half", "Half"]: string_to_number_list.append(0.5) elif item in ["quarter", "Quarter"]: string_to_number_list.append(0.25) else: # Parse written digits try: string_to_number_list.append(text2num(item, "en")) except: if item != ("an" or "a"): # Drop Indefinite Articles string_to_number_list.append(item) for item in string_to_number_list: # check if a digit still in <String> data type try: if type(item) != str: string_to_number_final.append(item) else: string_to_number_final.append(int(item)) except: string_to_number_final.append(item) # print(string_to_number_final) ## Deeper check start for i in range(0,len(string_to_number_final)): # find feet in final list try: if ((string_to_number_final[i+1] in ["feet", "foot", "ft"]) and (type(string_to_number_final[i-1]) != int)) : feet_to_inch = string_to_number_final[i]*12 except: pass # find meters in final list and convert to feet try: if (string_to_number_final[i+1] in ["meter", "meters", "m"]) and (type(string_to_number_final[i-1]) != int): meter_to_inch = string_to_number_final[i]*39.3700787 # convert meter to inch except: pass # find individual inches (case 1 - separate from number) try: if (string_to_number_final[i+1] in ["inch", "inches", "in"]) and (type(string_to_number_final[i-1]) != int): inches.append(string_to_number_final[i]) except: pass # find individual inches (case 2 - stick to number) try: if string_to_number_final[i][-2:] == "in": my_inches = string_to_number_final[i][:-2] # all except last to chars inches.append(int(my_inches)) except: pass # find individual inches in a range try: if string_to_number_final[i+1] == "or": inches.append(int(string_to_number_final[i])) except: pass # calcaulate total feet feet = feet_to_inch+meter_to_inch if len(inches): for inch in inches: result.append(inch+feet) else: result.append(feet) ## Case no individual inches if (input.find("in") == -1 and input.find("inch") == -1 ): # check if case happens if "or" in string_to_number_final: # if `OR` also exists in this case z = string_to_number_final.index("or") item_1 = string_to_number_final[z-1]*12 item_2 = string_to_number_final[z+1]*12 return_list = [item_1, item_2] return_list = [item for item in return_list if not isinstance(item, str)] if len(return_list) == 0: return [None] else: return return_list i = 0 # initialize counter for x in range(0,len(string_to_number_final)): try: if string_to_number_final[x-1] not in ["foot", "feet"]: feet_ind = feet_ind + string_to_number_final[x] i = i+1 except: pass # print(traceback.format_exc()) if i == 2: feet_ind = feet_ind*12 if (feet_ind).is_integer() == True: # if no decimals convert to <Int> data type feet_ind = int(feet_ind) #return [type(feet_ind).__name__+'('+str(feet_ind)+')'] if type(feet_ind) == 'str': return [None] else: return [feet_ind] if i > 2: # Input string seems to be incorrect return [None] # if i equlas 1 will continue and ignore this block # return none if bad input or zero result if (len(result) == 0 or (len(result) == 1 and result[0] == 0)): return [None] # build new list to print output in proper format ls = [item for item in result if not isinstance(item, str)] #block strings if len(ls) == 0: return [None] else: return(ls)
def test_text2num_centuries(self): self.assertEqual(text2num('dix-neuf cent soixante-treize'), 1973)