コード例 #1
0
 def _cleanUp(self, line):
     a = factory.EngLowercase()
     lines = a.lowercase(line)
     stopset = set(stopwords.words('english')) | set(string.punctuation)
     cleanup = " ".join(filter(lambda word: word not in stopset, lines.split()))
     if (self.remove_digit):
         cleanup = ''.join(i for i in cleanup if not i.isdigit())
     if(self.remove_punctuation):
         cleanup = cleanup.translate(self.table)
     return cleanup
def clean_up(line):
    a = factory.EngLowercase()
    lines = a.lowercase(line)
    stopset = set(stopwords.words('english')) | set(string.punctuation)
    cleanup = " ".join(filter(lambda word: word not in stopset, lines.split()))
    return cleanup