def __init__(self, use_stopword=True, stop_words_file=utils.default_stopwords_file(), max_iter=100, tol=0.0001, window=2): self.__use_stopword = use_stopword self.__max_iter = max_iter self.__tol = tol self.__window = window self.__stop_words = set() self.__stop_words_file = utils.default_stopwords_file() if stop_words_file: self.__stop_words_file = stop_words_file if use_stopword: with open(self.__stop_words_file, 'r', encoding='utf-8') as f: for word in f: self.__stop_words.add(word.strip())
def __init__(self, use_stopword=True, stop_words_file=None, dict_path=None, max_iter=100, tol=0.0001): if dict_path: raise RuntimeError("True") self.__use_stopword = use_stopword self.__dict_path = dict_path self.__max_iter = max_iter self.__tol = tol self.__stop_words = set() self.__stop_words_file = utils.default_stopwords_file() if stop_words_file: self.__stop_words_file = stop_words_file if use_stopword: for word in open(self.__stop_words_file, 'r', encoding='utf-8'): self.__stop_words.add(word.strip())
def test_default_stopwords(self): file = utils.default_stopwords_file() stopwords = [x.strip() for x in open(file, 'r', encoding='utf-8')] self.assertTrue(len(stopwords) > 0)