def test_alpha2digit_decimals(self): source = ("dotze coma noranta-nou, cent vint coma zero cinc," " u coma dos-cents trenta-sis, un coma dos tres sis.") expected = "12,99, 120,05, 1,236, 1,2 3 6." self.assertEqual(alpha2digit(source, "ca"), expected) self.assertEqual(alpha2digit("coma quinze", "ca"), "0,15")
def test_one_as_noun_or_article(self): source = "Un moment! trenta-un gats. Un dos tres quatre!" expected = "Un moment! 31 gats. 1 2 3 4!" self.assertEqual(alpha2digit(source, "ca"), expected) # End of segment source = "Ni un. U un. Trenta-un" expected = "Ni un. 1 1. 31" self.assertEqual(alpha2digit(source, "ca"), expected)
def test_alpha2digit_formal(self): source = "més trenta-tres nou seixanta zero sis dotze vint-i-u" expected = "+33 9 60 06 12 21" self.assertEqual(alpha2digit(source, "ca"), expected) source = "zero nou seixanta zero sis dotze vint-i-u" expected = "09 60 06 12 21" self.assertEqual(alpha2digit(source, "ca"), expected)
def test_one_as_noun_or_article(self): source = "Um momento por favor! trinta e um gatos. Um dois três quatro!" expected = "Um momento por favor! 31 gatos. 1 2 3 4!" self.assertEqual(alpha2digit(source, "pt"), expected) # End of segment source = "Nem um. Um um. Trinta e um" expected = "Nem um. 1 1. 31" self.assertEqual(alpha2digit(source, "pt"), expected)
def test_alpha2digit_formal(self): source = "plus trente-trois neuf soixante zéro six douze vingt et un" expected = "+33 9 60 06 12 21" self.assertEqual(alpha2digit(source), expected) source = "zéro neuf soixante zéro six douze vingt et un" expected = "09 60 06 12 21" self.assertEqual(alpha2digit(source), expected)
def test_alpha2digit_decimals(self): source = ( "doce coma noventa y nueve, ciento veinte coma cero cinco," " uno coma doscientos treinta y seis, uno coma dos tres seis.") expected = "12.99, 120.05, 1.236, 1.2 3 6." self.assertEqual(alpha2digit(source, "es"), expected) self.assertEqual(alpha2digit("coma quince", "es"), "0.15")
def test_alpha2digit_zero(self): source = "thirteen thousand zero ninety" expected = "13000 090" self.assertEqual(alpha2digit(source, "en"), expected) source = "thirteen thousand o ninety" self.assertEqual(alpha2digit(source, "en"), expected) self.assertEqual(alpha2digit("zero", "en"), "0")
def test_alpha2digit_formal(self): source = "mas treinta y tres nueve sesenta cero seis doce veintiuno" expected = "+33 9 60 06 12 21" self.assertEqual(alpha2digit(source, "es"), expected) source = "cero nueve sesenta cero seis doce veintiuno" expected = "09 60 06 12 21" self.assertEqual(alpha2digit(source, "es"), expected)
def test_alpha2digit_decimals(self): source = ( "doze vírgula noventa e nove, cento e vinte vírgula zero cinco, " "um vírgula duzentos e trinta e seis, um vírgula dois três seis.") expected = "12,99, 120,05, 1,236, 1,2 3 6." self.assertEqual(alpha2digit(source, "pt"), expected) self.assertEqual(alpha2digit("vírgula quinze", "pt"), "0,15")
def test_alpha2digit_formal(self): source = "mais trinta e três nove sessenta zero seis doze vinte e um" expected = "+33 9 60 06 12 21" self.assertEqual(alpha2digit(source, "pt"), expected) source = "zero nove sessenta zero seis doze vinte e um" expected = "09 60 06 12 21" self.assertEqual(alpha2digit(source, "pt"), expected)
def test_one_as_noun_or_article(self): source = "Un momento por favor! treinta y un gatos. Uno dos tres cuatro!" expected = "Un momento por favor! 31 gatos. 1 2 3 4!" self.assertEqual(alpha2digit(source, "es"), expected) # End of segment source = "Ni uno. Uno uno. Treinta y uno" expected = "Ni uno. 1 1. 31" self.assertEqual(alpha2digit(source, "es"), expected)
def test_alpha2digit_decimals(self): source = ( "twelve point ninety-nine, one hundred twenty point zero five," " one hundred twenty point o five, one point two hundred thirty-six." ) expected = "12.99, 120.05, 120.05, 1.236." self.assertEqual(alpha2digit(source, "en"), expected) self.assertEqual(alpha2digit("point fifteen", "en"), "0.15")
def test_text2num(self): self.assertEqual(text2num("cero", "es"), 0) self.assertEqual(text2num("uno", "es"), 1) self.assertEqual(text2num("nueve", "es"), 9) self.assertEqual(text2num("diez", "es"), 10) self.assertEqual(text2num("once", "es"), 11) self.assertEqual(text2num("diecinueve", "es"), 19) self.assertEqual(text2num("veinte", "es"), 20) self.assertEqual(text2num("veintiuno", "es"), 21) self.assertEqual(text2num("treinta", "es"), 30) self.assertEqual(text2num("treinta y uno", "es"), 31) self.assertEqual(text2num("treinta y dos", "es"), 32) self.assertEqual(text2num("treinta y nueve", "es"), 39) self.assertEqual(text2num("noventa y nueve", "es"), 99) self.assertEqual(text2num("cien", "es"), 100) self.assertEqual(text2num("ciento uno", "es"), 101) self.assertEqual(text2num("doscientos", "es"), 200) self.assertEqual(text2num("doscientos uno", "es"), 201) self.assertEqual(text2num("mil", "es"), 1000) self.assertEqual(text2num("mil uno", "es"), 1001) self.assertEqual(text2num("dos mil", "es"), 2000) self.assertEqual(text2num("dos mil noventa y nueve", "es"), 2099) self.assertEqual( text2num("nueve mil novecientos noventa y nueve", "es"), 9999) self.assertEqual( text2num( "novecientos noventa y nueve mil novecientos noventa y nueve", "es"), 999999) long_text = "novecientos noventa y nueve mil novecientos noventa y nueve millones novecientos noventa y nueve mil novecientos noventa y nueve" self.assertEqual(text2num(long_text, "es"), 999999999999) self.assertEqual(alpha2digit("uno coma uno", "es"), '1.1') self.assertEqual(alpha2digit("uno coma cuatrocientos uno", "es"), '1.401') # TODO: # self.assertEqual(alpha2digit("cero coma cinco", "es"), '0.5') test1 = "cincuenta y tres mil veinte millones doscientos cuarenta y tres mil setecientos veinticuatro" self.assertEqual(text2num(test1, "es"), 53_020_243_724) test2 = ( "cincuenta y un millones quinientos setenta y ocho mil trescientos dos" ) self.assertEqual(text2num(test2, "es"), 51_578_302) test3 = "ochenta y cinco" self.assertEqual(text2num(test3, "es"), 85) test4 = "ochenta y uno" self.assertEqual(text2num(test4, "es"), 81) self.assertEqual(text2num("quince", "es"), 15) self.assertEqual(text2num("ciento quince", "es"), 115) self.assertEqual(text2num("setenta y cinco mil", "es"), 75000) self.assertEqual(text2num("mil novecientos veinte", "es"), 1920)
def test_one_as_noun_or_article(self): source = "This is the one I'm looking for. One moment please! Twenty one cats. One two three four!" expected = "This is the one I'm looking for. One moment please! 21 cats. 1 2 3 4!" self.assertEqual(alpha2digit(source, "en"), expected) source = "No one is innocent. Another one bites the dust." self.assertEqual(alpha2digit(source, "en"), source) # End of segment source = "No one. Another one. One one. Twenty one" expected = "No one. Another one. 1 1. 21" self.assertEqual(alpha2digit(source, "en"), expected)
def test_alpha2digit_ordinals(self): source = ( "Fifth third second twenty-first hundredth one thousand two hundred thirtieth twenty-fifth thirty-eighth forty-ninth." ) expected = "5th third second 21st 100th 1230th 25th 38th 49th." self.assertEqual(alpha2digit(source, "en"), expected) source = ( "first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth." ) expected = "first, second, third, 4th, 5th, 6th, 7th, 8th, 9th, 10th." self.assertEqual(alpha2digit(source, "en"), expected)
def test_relaxed(self): source = "un deux trois quatre vingt quinze." expected = "1 2 3 95." self.assertEqual(alpha2digit(source, relaxed=True), expected) source = "Quatre, vingt, quinze." expected = "4, 20, 15." self.assertEqual(alpha2digit(source, relaxed=True), expected) source = "trente-quatre = trente quatre" expected = "34 = 34" self.assertEqual(alpha2digit(source, relaxed=True), expected)
def test_relaxed(self): source = "un dos tres quatre trenta cinc." expected = "1 2 3 4 35." self.assertEqual(alpha2digit(source, "ca", relaxed=True), expected) source = "un dues tres quatre vint, cinc." expected = "1 2 3 4 20, 5." self.assertEqual(alpha2digit(source, "ca", relaxed=True), expected) source = "trenta-quatre == trenta quatre" expected = "34 == 34" self.assertEqual(alpha2digit(source, "ca", relaxed=True), expected)
def test_alpha2digit_signed(self): source = ( "Il fait plus vingt degrés à l'intérieur et moins quinze à l'extérieur." ) expected = "Il fait +20 degrés à l'intérieur et -15 à l'extérieur." self.assertEqual(alpha2digit(source, "fr"), expected) source = "J'en ai vu au moins trois dans le jardin, et non plus deux." expected = "J'en ai vu au moins 3 dans le jardin, et non plus 2." self.assertEqual(alpha2digit(source, "fr", signed=False), expected) self.assertNotEqual(alpha2digit(source, "fr", signed=True), expected)
def test_alpha2digit_zero(self): source = "treize mille zéro quatre-vingt-dix" expected = "13000 090" self.assertEqual(alpha2digit(source, "fr"), expected) source = "treize mille zéro quatre-vingts" expected = "13000 080" self.assertEqual(alpha2digit(source, "fr"), expected) # source = "Votre service est zéro !" # self.assertEqual(alpha2digit(source, "fr"), source) self.assertEqual(alpha2digit("zéro", "fr"), "0")
def test_relaxed(self): source = "un dos tres cuatro treinta cinco." expected = "1 2 3 4 35." self.assertEqual(alpha2digit(source, "es", relaxed=True), expected) source = "un dos tres cuatro veinte, cinco." expected = "1 2 3 4 20, 5." self.assertEqual(alpha2digit(source, "es", relaxed=True), expected) source = "treinta y cuatro = treinta cuatro" expected = "34 = 34" self.assertEqual(alpha2digit(source, "es", relaxed=True), expected)
def test_relaxed(self): source = "um dois três quatro trinta e cinco." expected = "1 2 3 4 35." self.assertEqual(alpha2digit(source, "pt", relaxed=True), expected) source = "um dois três quatro vinte, cinco." expected = "1 2 3 4 20, 5." self.assertEqual(alpha2digit(source, "pt", relaxed=True), expected) source = "trinta e quatro = trinta quatro" expected = "34 = 34" self.assertEqual(alpha2digit(source, "pt", relaxed=True), expected)
def test_relaxed(self): source = "one two three four twenty five." expected = "1 2 3 4 25." self.assertEqual(alpha2digit(source, "en", relaxed=True), expected) source = "one two three four twenty, five." expected = "1 2 3 4 20, 5." self.assertEqual(alpha2digit(source, "en", relaxed=True), expected) source = "thirty-four = thirty four" expected = "34 = 34" self.assertEqual(alpha2digit(source, "en", relaxed=True), expected)
def test_text2num(self): self.assertEqual(text2num("zero", "pt"), 0) self.assertEqual(text2num("um", "pt"), 1) self.assertEqual(text2num("oito", "pt"), 8) self.assertEqual(text2num("dez", "pt"), 10) self.assertEqual(text2num("onze", "pt"), 11) self.assertEqual(text2num("dezanove", "pt"), 19) self.assertEqual(text2num("vinte", "pt"), 20) self.assertEqual(text2num("vinte e um", "pt"), 21) self.assertEqual(text2num("trinta", "pt"), 30) self.assertEqual(text2num("trinta e um", "pt"), 31) self.assertEqual(text2num("trinta e três", "pt"), 33) self.assertEqual(text2num("trinta e nove", "pt"), 39) self.assertEqual(text2num("noventa e nove", "pt"), 99) self.assertEqual(text2num("cem", "pt"), 100) self.assertEqual(text2num("cento e um", "pt"), 101) self.assertEqual(text2num("duzentos", "pt"), 200) self.assertEqual(text2num("duzentos e um", "pt"), 201) self.assertEqual(text2num("mil", "pt"), 1000) self.assertEqual(text2num("mil e um", "pt"), 1001) self.assertEqual(text2num("dois mil", "pt"), 2000) self.assertEqual(text2num("dois mil noventa e nove", "pt"), 2099) self.assertEqual(text2num("nove mil novecentos noventa e nove", "pt"), 9999) self.assertEqual( text2num("novecentos noventa e nove mil novecentos noventa e nove", "pt"), 999999) self.assertEqual(alpha2digit("um vírgula um", "pt"), "1,1") self.assertEqual(alpha2digit("um vírgula quatrocentos e um", "pt"), "1,401") # fail # self.assertEqual(alpha2digit("zero vírgula cinco", "pt"), "0,5") # test1 = "cincuenta y tres mil veinte millones doscientos cuarenta y tres mil setecientos veinticuatro" # self.assertEqual(text2num(test1, "pt"), 53_020_243_724) # test2 = ( # "cincuenta y un millones quinientos setenta y ocho mil trescientos dos" # ) # self.assertEqual(text2num(test2, "pt"), 51_578_302) test3 = "oitenta e cinco" self.assertEqual(text2num(test3, "pt"), 85) test4 = "oitenta e um" self.assertEqual(text2num(test4, "pt"), 81) self.assertEqual(text2num("quinze", "pt"), 15) self.assertEqual(text2num("cento quinze", "pt"), 115) self.assertEqual(text2num("setenta e cinco mil", "pt"), 75000) self.assertEqual(text2num("mil novecentos vinte", "pt"), 1920)
def analyze_integer_response(self): def is_float(s): try: float(s) return True except: return False def is_int(s): try: int(s) return True except: return False if not is_int(self.surveycto_answer): return False, f'Surveycto answer not a num: {self.surveycto_answer}' #CASE CORRECT INTEGER WAS RECORDED #Lets look at last phrase in trascript and see if it has the survey_cto_answer #We first check if number in digits is present if str(int(self.surveycto_answer)) in self.transcript_of_answer_only: return True, f'Found {self.surveycto_answer} in transcript' #We then check if number in words is present if num2words(self.surveycto_answer, lang='esp') in self.transcript_of_answer_only.lower(): return True, f"Found {num2words(self.surveycto_answer, lang='esp')} in transcript" #We check if words that reprsent 0 are present for string_that_represent_cero in ['ningun', 'no']: if int(self.surveycto_answer)==0 and \ string_that_represent_cero in self.transcript_of_answer_only.lower().split(" "): return True, f"'{string_that_represent_cero}' is associated to 0 and apppears in response" #Try to capture if any number where present as words in question_transcript, and check if its != to the one in surveycto #Create list of integers we can find in transcript numeric_values_in_transcript = [int(float(alpha2digit(w,"es"))) \ for w in self.transcript_of_answer_only.split(" ") \ if is_float(alpha2digit(w,"es"))] #Compare digits found with surveycto answer if len(numeric_values_in_transcript)>0: if int(self.surveycto_answer)!=numeric_values_in_transcript[-1]: return False, f'Value {numeric_values_in_transcript[-1]} detected in answer, different to {int(self.surveycto_answer)}' else: return True, f'Value {numeric_values_in_transcript[-1]} detected in answer' #Check if there is presence of background noise if 'background' in self.transcript_of_answer_only.lower().split(" "): return None, "background noise in transcription, can't conclude" return None, 'Could not conclude'
def test_text2num(self): self.assertEqual(text2num("zero", "ca"), 0) self.assertEqual(text2num("un", "ca"), 1) self.assertEqual(text2num("nou", "ca"), 9) self.assertEqual(text2num("deu", "ca"), 10) self.assertEqual(text2num("onze", "ca"), 11) self.assertEqual(text2num("dinou", "ca"), 19) self.assertEqual(text2num("vint", "ca"), 20) self.assertEqual(text2num("vint-i-dues", "ca"), 22) self.assertEqual(text2num("trenta", "ca"), 30) self.assertEqual(text2num("trenta-u", "ca"), 31) self.assertEqual(text2num("trenta-dos", "ca"), 32) self.assertEqual(text2num("trenta-huit", "ca"), 38) self.assertEqual(text2num("noranta-nou", "ca"), 99) self.assertEqual(text2num("cent", "ca"), 100) self.assertEqual(text2num("cent u", "ca"), 101) self.assertEqual(text2num("dues-centes", "ca"), 200) self.assertEqual(text2num("dues-centes una", "ca"), 201) self.assertEqual(text2num("mil", "ca"), 1000) self.assertEqual(text2num("mil un", "ca"), 1001) self.assertEqual(text2num("dos mil", "ca"), 2000) self.assertEqual(text2num("dos mil noranta-nou", "ca"), 2099) self.assertEqual(text2num("nou mil nou-cents noranta-nou", "ca"), 9999) self.assertEqual( text2num("nou-cents noranta-nou mil nou-cents noranta-nou", "ca"), 999999) long_text = "nou-cents noranta-nou mil nou-cents noranta-nou milions nou-cents noranta-nou mil nou-cents noranta-nou" self.assertEqual(text2num(long_text, "ca"), 999999999999) self.assertEqual(alpha2digit("un coma un", "ca"), '1,1') self.assertEqual(alpha2digit("u coma quatre-cents u", "ca"), '1,401') #FIXME: self.assertEqual(alpha2digit("zero coma cinc", "ca"), '0,5') test1 = "cinquanta-tres mil vint milions dos-cents quaranta-tres mil set-cents vint-i-quatre" self.assertEqual(text2num(test1, "ca"), 53020243724) test2 = ( "cinquanta-un milions cinc-cents setanta-vuit mil tres-cents dos") self.assertEqual(text2num(test2, "ca"), 51578302) test3 = "huitanta-cinc" self.assertEqual(text2num(test3, "ca"), 85) test4 = "vuitanta-un" self.assertEqual(text2num(test4, "ca"), 81) self.assertEqual(text2num("quinze", "ca"), 15) self.assertEqual(text2num("cent quinze", "ca"), 115) self.assertEqual(text2num("setanta-cinc mil", "ca"), 75000) self.assertEqual(text2num("mil nou-cents vint", "ca"), 1920)
def test_alpha2digit_formal(self): source = "plus thirty-three nine sixty zero six twelve twenty-one" expected = "+33 9 60 06 12 21" self.assertEqual(alpha2digit(source, "en"), expected) source = "plus thirty-three nine sixty o six twelve twenty-one" self.assertEqual(alpha2digit(source, "en"), expected) source = "zero nine sixty zero six twelve twenty-one" expected = "09 60 06 12 21" self.assertEqual(alpha2digit(source, "en"), expected) source = "o nine sixty o six twelve twenty-one" self.assertEqual(alpha2digit(source, "en"), expected) source = "My name is o s c a r." self.assertEqual(alpha2digit(source, "en"), source)
def test_alpha2digit_integers(self): source = "veinticinco vacas, doce gallinas y ciento veinticinco kg de patatas." expected = "25 vacas, 12 gallinas y 125 kg de patatas." self.assertEqual(alpha2digit(source, "es"), expected) source = "mil doscientos sesenta y seis dolares." expected = "1266 dolares." self.assertEqual(alpha2digit(source, "es"), expected) source = "un dos tres cuatro veinte quince" expected = "1 2 3 4 20 15" self.assertEqual(alpha2digit(source, "es"), expected) source = "veintiuno, treinta y uno." expected = "21, 31." self.assertEqual(alpha2digit(source, "es"), expected)
def test_alpha2digit_integers(self): source = "vint-i-cinc vaques, dotze gallines i cent vint-i-cinc kg de patates." expected = "25 vaques, 12 gallines i 125 kg de patates." self.assertEqual(alpha2digit(source, "ca"), expected) source = "mil dos-cents seixanta-sis dòlars." expected = "1266 dòlars." self.assertEqual(alpha2digit(source, "ca"), expected) source = "un dos tres quatre vint quinze" expected = "1 2 3 4 20 15" self.assertEqual(alpha2digit(source, "ca"), expected) source = "vint-i-un, trenta-un." expected = "21, 31." self.assertEqual(alpha2digit(source, "ca"), expected)
def test_alpha2digit_integers(self): source = "vinte cinco vacas, doze galinhas e cento vinte e cinco kg de batatas." expected = "25 vacas, 12 galinhas e 125 kg de batatas." self.assertEqual(alpha2digit(source, "pt"), expected) source = "mil duzentos sessenta e seis dólares." expected = "1266 dólares." self.assertEqual(alpha2digit(source, "pt"), expected) source = "um dois três quatro vinte quinze" expected = "1 2 3 4 20 15" self.assertEqual(alpha2digit(source, "pt"), expected) source = "vinte e um, trinta e um." expected = "21, 31." self.assertEqual(alpha2digit(source, "pt"), expected)
def test_alpha2digit_integers(self): source = "twenty-five cows, twelve chickens and one hundred twenty five kg of potatoes." expected = "25 cows, 12 chickens and 125 kg of potatoes." self.assertEqual(alpha2digit(source, "en"), expected) source = "one thousand two hundred sixty-six dollars." expected = "1266 dollars." self.assertEqual(alpha2digit(source, "en"), expected) source = "one two three four twenty fifteen" expected = "1 2 3 4 20 15" self.assertEqual(alpha2digit(source, "en"), expected) source = "twenty-one, thirty-one." expected = "21, 31." self.assertEqual(alpha2digit(source, "en"), expected)