def cleanUpText(self, text): cleanedWords = [] # perform lowercase words = text.lower().split(' ') # get vocabulary vocab = Vocabulary() for word in words: # check Portuguese stopwords # TODO: Implement other languages tokenizers if not (word in vocab.getPTStopWords()): cleanedWords.append(word) return cleanedWords