def test_language(self): # Assert language recognition. self.assertEqual(text.language(u"the cat sat on the mat")[0], "en") self.assertEqual(text.language(u"de kat zat op de mat")[0], "nl") self.assertEqual( text.language(u"le chat s'était assis sur le tapis")[0], "fr") print("pattern.text.language()")
def test_language(self): # Assert language recognition. self.assertEqual(text.language("the cat sat on the mat")[0], "en") self.assertEqual(text.language("de kat zat op de mat")[0], "nl") self.assertEqual( text.language("le chat s'était assis sur le tapis")[0], "fr") print("pattern.text.language()")
def predict_language(q=""): #print(q) iso, confidence = language(q) # (takes some time to load the first time) return { "language": iso, "confidence": round(confidence, 2) }
def predict_language(q=""): #print q iso, confidence = language(q) # (takes some time to load the first time) return { "language": iso, "confidence": round(confidence, 2) }
print(parse("les chats noirs", chunks=False, language="fr", tagset=UNIVERSAL)) print(parse("i gatti neri", chunks=False, language="it", tagset=UNIVERSAL)) print(parse("de zwarte katten", chunks=False, language="nl", tagset=UNIVERSAL)) print("") # This comes at the expense of (in this example) losing information about plural nouns (NNS => NN). # But it may be more comfortable for you to build multilingual apps # using the universal constants (e.g., PRON, PREP, CONJ), # instead of learning the Penn Treebank tagset by heart, # or wonder why the Italian "che" is tagged "PRP", "IN" or "CC" # (in the universal tagset it is a PRON or a CONJ). from pattern.text import parsetree for sentence in parsetree("i gatti neri che sono la mia", language="it", tagset=UNIVERSAL): for word in sentence.words: if word.tag == PRON: print(word) # The language() function in pattern.text can be used to guess the language of a text. # It returns a (language code, confidence)-tuple. # It can guess en, es, de, fr, it, nl. from pattern.text import language print("") print(language(u"the cat sat on the mat")) # ("en", 1.00) print(language(u"de kat zat op de mat")) # ("nl", 0.80) print(language(u"le chat s'était assis sur le tapis")) # ("fr", 0.86)
print(parse("the black cats" , chunks=False, language="en", tagset=UNIVERSAL)) print(parse("los gatos negros" , chunks=False, language="es", tagset=UNIVERSAL)) print(parse("les chats noirs" , chunks=False, language="fr", tagset=UNIVERSAL)) print(parse("i gatti neri" , chunks=False, language="it", tagset=UNIVERSAL)) print(parse("de zwarte katten" , chunks=False, language="nl", tagset=UNIVERSAL)) print() # This comes at the expense of (in this example) losing information about plural nouns (NNS => NN). # But it may be more comfortable for you to build multilingual apps # using the universal constants (e.g., PRON, PREP, CONJ), # instead of learning the Penn Treebank tagset by heart, # or wonder why the Italian "che" is tagged "PRP", "IN" or "CC" # (in the universal tagset it is a PRON or a CONJ). from pattern.text import parsetree for sentence in parsetree("i gatti neri che sono la mia", language="it", tagset=UNIVERSAL): for word in sentence.words: if word.tag == PRON: print(word) # The language() function in pattern.text can be used to guess the language of a text. # It returns a (language code, confidence)-tuple. # It can guess en, es, de, fr, it, nl. from pattern.text import language print() print(language(u"the cat sat on the mat")) # ("en", 1.00) print(language(u"de kat zat op de mat")) # ("nl", 0.80) print(language(u"le chat s'était assis sur le tapis")) # ("fr", 0.86)
def predict_language_paid(q="", key=None): return {"language": language(q)[0]}