def supported_languages(): # get NLTK list of stopwords stopwords_listdir = [] try: stopwords_listdir = [file for file in os.listdir(stopwords._get_root()) if file.islower()] print("stopwords._get_root():"+stopwords._get_root()) except LookupError: # when no NLTK data is available pass return [file.capitalize() for file in stopwords_listdir]
def supported_languages(): # get NLTK list of stopwords stopwords_listdir = [] try: stopwords_listdir = [file for file in os.listdir(stopwords._get_root()) if file.islower()] except LookupError: # when no NLTK data is available pass return [file.capitalize() for file in stopwords_listdir]
def supported_languages(): # get NLTK list of stopwords stopwords_listdir = [] try: stopwords_listdir = [ file for file in os.listdir(stopwords._get_root()) if file.islower() ] except LookupError: # when no NLTK data is available pass # return sorted(file.capitalize() for file in stopwords_listdir) all_stopwords_listdir = ['中文'] + \ sorted(file.capitalize() for file in stopwords_listdir) return all_stopwords_listdir
def from_file(self, path): self.file_path = path if not path: self.word_list = [] else: enc = detect_encoding(path) with open(path, encoding=enc) as f: self.word_list = set([line.strip() for line in f]) # get NLTK list of stopwords stopwords_listdir = [] try: stopwords_listdir = [ file for file in os.listdir(stopwords._get_root()) if file.islower() ] except LookupError: # when no NLTK data is available pass class StopwordsFilter(BaseTokenFilter, WordListMixin): """ Remove tokens present in NLTK's language specific lists or a file. """ name = 'Stopwords' supported_languages = [file.capitalize() for file in stopwords_listdir] @wait_nltk_data def __init__(self, language='English', word_list=None): WordListMixin.__init__(self, word_list) super().__init__()
self.file_path = None self.word_list = word_list or [] def from_file(self, path): self.file_path = path if not path: self.word_list = [] else: enc = detect_encoding(path) with open(path, encoding=enc) as f: self.word_list = set([line.strip() for line in f]) # get NLTK list of stopwords stopwords_listdir = [] try: stopwords_listdir = [file for file in os.listdir(stopwords._get_root()) if file.islower()] except LookupError: # when no NLTK data is available pass class StopwordsFilter(BaseTokenFilter, WordListMixin): """ Remove tokens present in NLTK's language specific lists or a file. """ name = 'Stopwords' supported_languages = [file.capitalize() for file in stopwords_listdir] def __init__(self, language='English', word_list=None): WordListMixin.__init__(self, word_list) super().__init__() self.language = language