Beispiel #1
0
 def test_cleanSentence(self):
     sentence = "At 8 o'clock on Thursday morning, the boys and girls didn't feel very good."
     sentenceProcList = ["removeUrl", "removeUserMention"]
     functions.stopwords = load_stopwords("etc/stopwords_en.txt")
     tokenProcList = [
         "stemming",
         "toLowerCase",
         "removePunctuationAndNumbers",
         "stopwording",
         "removeSingleChar",
         "removeDoubleChar",
     ]
     newSentence = cleanSentence(sentence, sentenceProcList, tokenProcList)
     goldSentence = "oclock thursday morning boy girl feel good"
     self.assertEqual(newSentence, goldSentence)
Beispiel #2
0
 def test_cleanSentenceUnicode(self):
     sentence = (
         u"Según @NWS_PTWC, no hay riesgo generalizado de #tsunami tras el #sismo de Japón http://t.co/icErcNfSCf"
     )
     sentenceProcList = ["removeUrl", "removeUserMention"]
     functions.stopwords = load_stopwords("etc/stopwords_en.txt")
     tokenProcList = [
         "stemming",
         "toLowerCase",
         "removePunctuationAndNumbers",
         "stopwording",
         "removeSingleChar",
         "removeDoubleChar",
     ]
     newSentence = cleanSentence(sentence, sentenceProcList, tokenProcList)
     goldSentence = u"según hay riesgo generalizado tsunami tras sismo japón"
     self.assertEqual(newSentence, goldSentence)