def load_set_stopwords(filename):
    stopwordset = set()
    gen = file_generator(filename)
    for line in gen:
        # print line
        stopwordset.add(line.rstrip('\r\n ').decode('utf-8'))
    # print stopwordset
    return stopwordset
Example #2
0
def pop_word_set_gen(file):
    '''
    Takes in a file of popular word, with words/line and returns a set of
    words normalized into lower case.
    '''
    pop_word_set = set()
    file_generator = utility.file_generator(file)
    for line in file_generator:
        word = line.strip()
        #Now put it into set
        pop_word_set.add(word.lower())
    return pop_word_set