def load_set_stopwords(filename): stopwordset = set() gen = file_generator(filename) for line in gen: # print line stopwordset.add(line.rstrip('\r\n ').decode('utf-8')) # print stopwordset return stopwordset
def pop_word_set_gen(file): ''' Takes in a file of popular word, with words/line and returns a set of words normalized into lower case. ''' pop_word_set = set() file_generator = utility.file_generator(file) for line in file_generator: word = line.strip() #Now put it into set pop_word_set.add(word.lower()) return pop_word_set