def test_spaces_fr(self): self.assertEqual(normalize(" c'est le test", lang="fr-fr"), "c'est test") self.assertEqual(normalize(" c'est le test ", lang="fr-fr"), "c'est test") self.assertEqual(normalize(" c'est un test", lang="fr-fr"), "c'est 1 test")
def test_agressive_pruning_pt(self): self.assertEqual(normalize("uma palavra", lang="pt"), "1 palavra") self.assertEqual(normalize("esta palavra um", lang="pt"), "palavra 1") self.assertEqual(normalize("o homem batia-lhe", lang="pt"), "homem batia") self.assertEqual(normalize("quem disse asneira nesse dia", lang="pt"), "quem disse asneira dia")
def test_spaces(self): self.assertEqual(normalize(" tohle je test"), "tohle je test") self.assertEqual(normalize(" tohle je test "), "tohle je test") self.assertEqual(normalize(" tohle je jedna test"), "tohle je 1 test")
def test_articles_es(self): self.assertEqual( normalize("esta es la prueba", lang="es", remove_articles=True), "esta es prueba") self.assertEqual( normalize("y otra prueba", lang="es", remove_articles=True), "y otra prueba")
def test_spaces_pt(self): self.assertEqual(normalize(" isto e o teste", lang="pt"), "isto teste") self.assertEqual(normalize(" isto sao os testes ", lang="pt"), "isto sao testes") self.assertEqual(normalize(" isto e um teste", lang="pt", remove_articles=False), "isto 1 teste")
def test_articles(self): self.assertEqual( normalize("dies ist der test", lang="de-de", remove_articles=True), "dies ist test") self.assertEqual( normalize("und noch ein Test", lang="de-de", remove_articles=True), "und noch 1 Test") self.assertEqual(normalize("dies ist der Extra-Test", lang="de-de", remove_articles=False), "dies ist der Extra-Test")
def test_articles(self): self.assertEqual(normalize("this is a test", remove_articles=True), "this is test") self.assertEqual(normalize("this is the test", remove_articles=True), "this is test") self.assertEqual(normalize("and another test", remove_articles=True), "and another test") self.assertEqual( normalize("this is an extra test", remove_articles=False), "this is an extra test")
def test_articles(self): self.assertEqual( normalize("dit is de test", LANG, remove_articles=True), "dit is test") self.assertEqual( normalize("en nog een Test", LANG, remove_articles=True), "en nog 1 Test") self.assertEqual(normalize("dit is de Extra-Test", LANG, remove_articles=False), "dit is de Extra-Test")
def test_numbers(self): self.assertEqual(normalize("dette er en to tre test", lang="da-dk"), "dette er 1 2 3 test") self.assertEqual( normalize("dette er fire fem seks test", lang="da-dk"), "dette er 4 5 6 test") self.assertEqual(normalize("dette er syv otte ni test", lang="da-dk"), "dette er 7 8 9 test") self.assertEqual(normalize("dette er ti elve tolv test", lang="da-dk"), "dette er 10 11 12 test")
def test_agressive_pruning_ca(self): self.assertEqual(normalize("una paraula", lang="ca"), "1 paraula") self.assertEqual(normalize("un mot", lang="ca"), "1 mot") self.assertEqual(normalize("aquesta paraula u", lang="ca"), "paraula 1") self.assertEqual(normalize("l'home el va pegar", lang="ca"), "l'home va pegar") self.assertEqual( normalize("qui va equivocar-se aquell dia", lang="ca"), "qui va equivocar-se dia")
def test_articles(self): self.assertEqual( normalize("dette er en test", lang="da-dk", remove_articles=True), "dette er 1 test") self.assertEqual( normalize("og endnu en test", lang="da-dk", remove_articles=True), "og endnu 1 test") self.assertEqual( normalize("dette er en extra-test", lang="da-dk", remove_articles=False), "dette er 1 extra-test")
def test_spaces_ca(self): self.assertEqual(normalize(" això és el test", lang="ca"), "això és test") self.assertEqual(normalize(" això és l'intent", lang="ca"), "això és l'intent") self.assertEqual(normalize(" això són les proves ", lang="ca"), "això són proves") self.assertEqual( normalize(" això és un test", lang="ca", remove_articles=False), "això és 1 test")
def test_articles_pt(self): self.assertEqual( normalize("isto é o teste", lang="pt", remove_articles=True), "isto é teste") self.assertEqual( normalize("isto é a frase", lang="pt", remove_articles=True), "isto é frase") self.assertEqual( normalize("e outro teste", lang="pt", remove_articles=True), "outro teste") self.assertEqual( normalize("isto é o teste extra", lang="pt", remove_articles=False), "isto é o teste extra")
def test_spaces_it(self): """ Test cases for Italian remove spaces """ self.assertEqual(normalize('questo è un test ', lang='it'), 'questo è 1 test') self.assertEqual(normalize('un altro test ', lang='it'), '1 altro test') self.assertEqual(normalize('questa è un\' altra amica ', lang='it', remove_articles=False), 'questa è 1 altra amica') self.assertEqual(normalize('questo è un test ', lang='it', remove_articles=False), 'questo è 1 test')
def test_numbers(self): self.assertEqual( normalize("dit is een twee drie test", LANG), "dit is 1 2 3 test") self.assertEqual( normalize("dit is vier vijf zes test", LANG), "dit is 4 5 6 test") self.assertEqual( normalize("dit is zeven acht negen test", LANG), "dit is 7 8 9 test") self.assertEqual( normalize("dit is zeven acht negen test", LANG), "dit is 7 8 9 test") self.assertEqual( normalize("dit is tien elf twaalf test", LANG), "dit is 10 11 12 test") self.assertEqual( normalize("dit is dertien veertien test", LANG), "dit is 13 14 test") self.assertEqual( normalize(u"dit is vijftien zestien zeventien", LANG), "dit is 15 16 17") self.assertEqual( normalize("dit is achttien negentien twintig", LANG), "dit is 18 19 20")
def test_articles_ca(self): self.assertEqual( normalize("aquesta és la prova", lang="ca", remove_articles=True), "és prova") self.assertEqual( normalize("això és una frase", lang="ca", remove_articles=True), "això és 1 frase") self.assertEqual( normalize("i una altra prova", lang="ca", remove_articles=True), "1 altra prova") self.assertEqual( normalize("això és un test extra", lang="ca", remove_articles=False), "això és 1 test extra")
def test_numbers_pt(self): self.assertEqual(normalize("isto e o um dois três teste", lang="pt"), "isto 1 2 3 teste") self.assertEqual(normalize("é a sete oito nove test", lang="pt"), "é 7 8 9 test") self.assertEqual( normalize("teste zero dez onze doze treze", lang="pt"), "teste 0 10 11 12 13") self.assertEqual( normalize("teste mil seiscentos e sessenta e seis", lang="pt", remove_articles=False), "teste 1000 600 60 6") self.assertEqual( normalize("teste sete e meio", lang="pt", remove_articles=False), "teste 7 meio") self.assertEqual( normalize("teste dois ponto nove", lang="pt"), "teste 2 ponto 9") self.assertEqual( normalize("teste cento e nove", lang="pt", remove_articles=False), "teste 100 9") self.assertEqual( normalize("teste vinte e 1", lang="pt"), "teste 20 1")
def test_numbers(self): self.assertEqual( normalize("det här är ett ett två tre test", lang='sv-se'), "det här är 1 1 2 3 test") self.assertEqual( normalize(" det är fyra fem sex test", lang='sv-se'), "det är 4 5 6 test") self.assertEqual(normalize("det är sju åtta nio test", lang='sv-se'), "det är 7 8 9 test") self.assertEqual(normalize("det är tio elva tolv test", lang='sv-se'), "det är 10 11 12 test") self.assertEqual( normalize("det är arton nitton tjugo test", lang='sv-se'), "det är 18 19 20 test")
def test_numbers(self): self.assertEqual( normalize("dies ist eins zwei drei test", lang="de-de"), "dies ist 1 2 3 test") self.assertEqual( normalize("es ist vier fünf sechs test", lang="de-de"), "es ist 4 5 6 test") self.assertEqual( normalize("es ist sieben acht neun test", lang="de-de"), "es ist 7 8 9 test") self.assertEqual( normalize("es ist sieben acht neun test", lang="de-de"), "es ist 7 8 9 test") self.assertEqual( normalize("dies ist zehn elf zwölf test", lang="de-de"), "dies ist 10 11 12 test") self.assertEqual( normalize("dies ist dreizehn vierzehn test", lang="de-de"), "dies ist 13 14 test") self.assertEqual( normalize("dies ist fünfzehn sechzehn siebzehn", lang="de-de"), "dies ist 15 16 17") self.assertEqual( normalize("dies ist achtzehn neunzehn zwanzig", lang="de-de"), "dies ist 18 19 20")
def test_articles_fr(self): self.assertEqual( normalize("c'est le test", remove_articles=True, lang="fr-fr"), "c'est test") self.assertEqual( normalize("et l'autre test", remove_articles=True, lang="fr-fr"), "et autre test") self.assertEqual( normalize("et la tentative", remove_articles=True, lang="fr-fr"), "et tentative") self.assertEqual( normalize("la dernière tentative", remove_articles=False, lang="fr-fr"), "la dernière tentative")
def test_articles_it(self): """ Test cases for Italian remove_articles """ self.assertEqual(normalize('questo è il test', lang='it', remove_articles=True), 'questo è test') self.assertEqual(normalize('questa è la frase', lang='it', remove_articles=True), 'questa è frase') self.assertEqual(normalize('questo è lo scopo', lang='it', remove_articles=True), 'questo è scopo') self.assertEqual(normalize('questo è il test extra', lang='it', remove_articles=False), 'questo è il test extra')
def test_numbers_ca(self): self.assertEqual(normalize("això és el test un dos tres", lang="ca"), "això és test 1 2 3") self.assertEqual( normalize("és una prova set vuit nou huit", lang="ca"), "és 1 prova 7 8 9 8") self.assertEqual( normalize("prova zero deu onze dotze tretze", lang="ca"), "prova 0 10 11 12 13") #TODO: seixanta-sis > 66 #self.assertEqual( # normalize("prova 1000 600 seixanta-sis", lang="ca", # remove_articles=False), # "prova 1000 600 66") #TODO: mil dotze > 1012 #self.assertEqual( # normalize("prova mil dotze", lang="ca", # remove_articles=False), # "prova 1012") #TODO: dues-centes vint-i-quatre > 224 #self.assertEqual( # normalize("prova dues-centes vint-i-quatre", lang="ca", # remove_articles=False), # "prova 224") self.assertEqual( normalize("test set i mig", lang="ca", remove_articles=False), "test 7 mig") self.assertEqual(normalize("test dos punt nou", lang="ca"), "test 2 punt 9") self.assertEqual( normalize("test cent i nou", lang="ca", remove_articles=False), "test 100 9") self.assertEqual(normalize("test vint i 1", lang="ca"), "test 20 1")
def test_numbers(self): self.assertEqual(normalize("to jest jeden dwa trzy test"), "to jest 1 2 3 test") self.assertEqual(normalize(" to jest cztery pięć sześć test"), "to jest 4 5 6 test") self.assertEqual( normalize("to jest dziesięć jedenaście dwanaście test"), "to jest 10 11 12 test") self.assertEqual( normalize("to jest osiemnaście dziewiętnaście dwadzieścia"), "to jest 18 19 20") self.assertEqual( normalize("to jest jeden dziewiętnaście dwadzieścia dwa"), "to jest 1 19 20 2") self.assertEqual(normalize("to jest jeden dwa dwadzieścia dwa"), "to jest 1 2 20 2") self.assertEqual(normalize("to jest jeden i pół"), "to jest 1 pół") self.assertEqual(normalize("to jest jeden i pół i pięć sześć"), "to jest 1 pół 5 6")
def test_numbers_it(self): """ Test cases for Italian normalize lang='it' """ self.assertEqual(normalize('è un test sette otto nove', lang='it'), 'è 1 test 7 8 9') self.assertEqual(normalize('test zero dieci undici dodici tredici', lang='it'), 'test 0 10 11 12 13') self.assertEqual(normalize('test mille seicento sessanta e sei', lang='it', remove_articles=False), 'test 1000 600 60 e 6') self.assertEqual(normalize('test sette e mezzo', lang='it', remove_articles=False), 'test 7 e 0.5') self.assertEqual(normalize('test due punto nove', lang='it'), 'test 2 punto 9') self.assertEqual(normalize('test cento e nove', lang='it', remove_articles=False), 'test 100 e 9') self.assertEqual(normalize('test venti e 1', lang='it'), 'test 20 e 1') self.assertEqual(normalize('test ventuno e ventisette', lang='it'), 'test 21 e 27')
def test_contractions(self): self.assertEqual(normalize("ain't"), "is not") self.assertEqual(normalize("aren't"), "are not") self.assertEqual(normalize("can't"), "can not") self.assertEqual(normalize("could've"), "could have") self.assertEqual(normalize("couldn't"), "could not") self.assertEqual(normalize("didn't"), "did not") self.assertEqual(normalize("doesn't"), "does not") self.assertEqual(normalize("don't"), "do not") self.assertEqual(normalize("gonna"), "going to") self.assertEqual(normalize("gotta"), "got to") self.assertEqual(normalize("hadn't"), "had not") self.assertEqual(normalize("hadn't have"), "had not have") self.assertEqual(normalize("hasn't"), "has not") self.assertEqual(normalize("haven't"), "have not") # TODO: Ambiguous with "he had" self.assertEqual(normalize("he'd"), "he would") self.assertEqual(normalize("he'll"), "he will") # TODO: Ambiguous with "he has" self.assertEqual(normalize("he's"), "he is") # TODO: Ambiguous with "how would" self.assertEqual(normalize("how'd"), "how did") self.assertEqual(normalize("how'll"), "how will") # TODO: Ambiguous with "how has" and "how does" self.assertEqual(normalize("how's"), "how is") # TODO: Ambiguous with "I had" self.assertEqual(normalize("I'd"), "I would") self.assertEqual(normalize("I'll"), "I will") self.assertEqual(normalize("I'm"), "I am") self.assertEqual(normalize("I've"), "I have") self.assertEqual(normalize("I haven't"), "I have not") self.assertEqual(normalize("isn't"), "is not") self.assertEqual(normalize("it'd"), "it would") self.assertEqual(normalize("it'll"), "it will") # TODO: Ambiguous with "it has" self.assertEqual(normalize("it's"), "it is") self.assertEqual(normalize("it isn't"), "it is not") self.assertEqual(normalize("mightn't"), "might not") self.assertEqual(normalize("might've"), "might have") self.assertEqual(normalize("mustn't"), "must not") self.assertEqual(normalize("mustn't have"), "must not have") self.assertEqual(normalize("must've"), "must have") self.assertEqual(normalize("needn't"), "need not") self.assertEqual(normalize("oughtn't"), "ought not") self.assertEqual(normalize("shan't"), "shall not") # TODO: Ambiguous wiht "she had" self.assertEqual(normalize("she'd"), "she would") self.assertEqual(normalize("she hadn't"), "she had not") self.assertEqual(normalize("she'll"), "she will") self.assertEqual(normalize("she's"), "she is") self.assertEqual(normalize("she isn't"), "she is not") self.assertEqual(normalize("should've"), "should have") self.assertEqual(normalize("shouldn't"), "should not") self.assertEqual(normalize("shouldn't have"), "should not have") self.assertEqual(normalize("somebody's"), "somebody is") # TODO: Ambiguous with "someone had" self.assertEqual(normalize("someone'd"), "someone would") self.assertEqual(normalize("someone hadn't"), "someone had not") self.assertEqual(normalize("someone'll"), "someone will") # TODO: Ambiguous with "someone has" self.assertEqual(normalize("someone's"), "someone is") self.assertEqual(normalize("that'll"), "that will") # TODO: Ambiguous with "that has" self.assertEqual(normalize("that's"), "that is") # TODO: Ambiguous with "that had" self.assertEqual(normalize("that'd"), "that would") # TODO: Ambiguous with "there had" self.assertEqual(normalize("there'd"), "there would") self.assertEqual(normalize("there're"), "there are") # TODO: Ambiguous with "there has" self.assertEqual(normalize("there's"), "there is") # TODO: Ambiguous with "they had" self.assertEqual(normalize("they'd"), "they would") self.assertEqual(normalize("they'll"), "they will") self.assertEqual(normalize("they won't have"), "they will not have") self.assertEqual(normalize("they're"), "they are") self.assertEqual(normalize("they've"), "they have") self.assertEqual(normalize("they haven't"), "they have not") self.assertEqual(normalize("wasn't"), "was not") # TODO: Ambiguous wiht "we had" self.assertEqual(normalize("we'd"), "we would") self.assertEqual(normalize("we would've"), "we would have") self.assertEqual(normalize("we wouldn't"), "we would not") self.assertEqual(normalize("we wouldn't have"), "we would not have") self.assertEqual(normalize("we'll"), "we will") self.assertEqual(normalize("we won't have"), "we will not have") self.assertEqual(normalize("we're"), "we are") self.assertEqual(normalize("we've"), "we have") self.assertEqual(normalize("weren't"), "were not") self.assertEqual(normalize("what'd"), "what did") self.assertEqual(normalize("what'll"), "what will") self.assertEqual(normalize("what're"), "what are") # TODO: Ambiguous with "what has" / "what does") self.assertEqual(normalize("whats"), "what is") self.assertEqual(normalize("what's"), "what is") self.assertEqual(normalize("what've"), "what have") # TODO: Ambiguous with "when has" self.assertEqual(normalize("when's"), "when is") self.assertEqual(normalize("where'd"), "where did") # TODO: Ambiguous with "where has" / where does" self.assertEqual(normalize("where's"), "where is") self.assertEqual(normalize("where've"), "where have") # TODO: Ambiguous with "who had" "who did") self.assertEqual(normalize("who'd"), "who would") self.assertEqual(normalize("who'd've"), "who would have") self.assertEqual(normalize("who'll"), "who will") self.assertEqual(normalize("who're"), "who are") # TODO: Ambiguous with "who has" / "who does" self.assertEqual(normalize("who's"), "who is") self.assertEqual(normalize("who've"), "who have") self.assertEqual(normalize("why'd"), "why did") self.assertEqual(normalize("why're"), "why are") # TODO: Ambiguous with "why has" / "why does" self.assertEqual(normalize("why's"), "why is") self.assertEqual(normalize("won't"), "will not") self.assertEqual(normalize("won't've"), "will not have") self.assertEqual(normalize("would've"), "would have") self.assertEqual(normalize("wouldn't"), "would not") self.assertEqual(normalize("wouldn't've"), "would not have") self.assertEqual(normalize("ya'll"), "you all") self.assertEqual(normalize("y'all"), "you all") self.assertEqual(normalize("y'ain't"), "you are not") # TODO: Ambiguous with "you had" self.assertEqual(normalize("you'd"), "you would") self.assertEqual(normalize("you'd've"), "you would have") self.assertEqual(normalize("you'll"), "you will") self.assertEqual(normalize("you're"), "you are") self.assertEqual(normalize("you aren't"), "you are not") self.assertEqual(normalize("you've"), "you have") self.assertEqual(normalize("you haven't"), "you have not")
def test_numbers_fr(self): self.assertEqual(normalize("c'est un deux trois test", lang="fr-fr"), "c'est 1 2 3 test") self.assertEqual( normalize(" c'est le quatre cinq six test", lang="fr-fr"), "c'est 4 5 6 test") self.assertEqual( normalize("c'est le sept huit neuf test", lang="fr-fr"), "c'est 7 8 9 test") self.assertEqual( normalize("c'est le sept huit neuf test", lang="fr-fr"), "c'est 7 8 9 test") self.assertEqual( normalize("voilà le test dix onze douze", lang="fr-fr"), "voilà test 10 11 12") self.assertEqual( normalize("voilà le treize quatorze test", lang="fr-fr"), "voilà 13 14 test") self.assertEqual( normalize("ça fait quinze seize dix-sept", lang="fr-fr"), "ça fait 15 16 17") self.assertEqual( normalize("ça fait dix-huit dix-neuf vingt", lang="fr-fr"), "ça fait 18 19 20") self.assertEqual(normalize("ça fait mille cinq cents", lang="fr-fr"), "ça fait 1500") self.assertEqual( normalize("voilà cinq cents trente et un mille euros", lang="fr-fr"), "voilà 531000 euros") self.assertEqual( normalize( "voilà trois cents soixante mille cinq" " cents quatre-vingt-dix-huit euros", lang="fr-fr"), "voilà 360598 euros") self.assertEqual(normalize("voilà vingt et un euros", lang="fr-fr"), "voilà 21 euros") self.assertEqual(normalize("joli zéro sur vingt", lang="fr-fr"), "joli 0 sur 20") self.assertEqual(normalize("je veux du quatre-quart", lang="fr-fr"), "je veux quatre-quart") self.assertEqual(normalize("pour la neuf centième fois", lang="fr-fr"), "pour 900e fois") self.assertEqual(normalize("pour la première fois", lang="fr-fr"), "pour 1er fois") self.assertEqual( normalize("le neuf cents quatre-vingt-dix" " millième épisode", lang="fr-fr"), "990000e épisode") self.assertEqual(normalize("la septième clé", lang="fr-fr"), "7e clé") self.assertEqual(normalize("la neuvième porte", lang="fr-fr"), "9e porte") self.assertEqual(normalize("le cinquième jour", lang="fr-fr"), "5e jour") self.assertEqual( normalize("le trois-cents-soixante-cinquième jour", lang="fr-fr"), "365e jour") self.assertEqual(normalize("la 1ère fois", lang="fr-fr"), "1er fois") self.assertEqual(normalize("le centième centime", lang="fr-fr"), "100e centime") self.assertEqual(normalize("le millième millésime", lang="fr-fr"), "1000e millésime") self.assertEqual(normalize("le trentième anniversaire", lang="fr-fr"), "30e anniversaire")
def test_combinations(self): self.assertEqual(normalize("I couldn't have guessed there'd be two"), "I could not have guessed there would be 2") self.assertEqual(normalize("I wouldn't have"), "I would not have") self.assertEqual(normalize("I hadn't been there"), "I had not been there") self.assertEqual(normalize("I would've"), "I would have") self.assertEqual(normalize("it hadn't"), "it had not") self.assertEqual(normalize("it hadn't have"), "it had not have") self.assertEqual(normalize("it would've"), "it would have") self.assertEqual(normalize("she wouldn't have"), "she would not have") self.assertEqual(normalize("she would've"), "she would have") self.assertEqual(normalize("someone wouldn't have"), "someone would not have") self.assertEqual(normalize("someone would've"), "someone would have") self.assertEqual(normalize("what's the weather like"), "what is weather like") self.assertEqual(normalize("that's what I told you"), "that is what I told you") self.assertEqual(normalize("whats 8 + 4"), "what is 8 + 4")
def test_spaces(self): self.assertEqual(normalize(" to jest test"), "to jest test") self.assertEqual(normalize(" to jest test "), "to jest test") self.assertEqual(normalize(" to jest jeden test"), "to jest 1 test")
def test_spaces(self): self.assertEqual(normalize(" dette er en test", lang="da-dk"), "dette er 1 test") self.assertEqual(normalize(" dette er en test ", lang="da-dk"), "dette er 1 test")
def testExtract_it(text, expected_date, expected_leftover): res = extractWithFormat(normalize(text)) self.assertEqual(res[0], expected_date, 'per =' + text) self.assertEqual(res[1], expected_leftover, 'per =' + text)