def str_to_dict(s): ''' creates dictionary of words and counts input: s string output: dictionary {word: count} ''' s = s.encode('ascii', 'ignore') s = str(s) word_dict = {} l = re.findall(WORDRE, s) for w in l: w = w.lower() # make all letters lowercase if w[0] == "'": # remove single quotes from beginning/ w = w[1:] # end of words in l elif w[-1] == "'": w = w[:-1] w = EnglishStemmer().stem(w) # stems non-noun/verbs w = w.encode('ascii', 'ignore') if w != '': if w not in word_dict: # build dictionary word_dict[w] = 1 else: word_dict[w] += 1 return word_dict
def str_to_dict(s): ''' creates dictionary of words and counts input: s string output: dictionary {word: count} ''' s = s.encode('ascii','ignore') s = str(s) word_dict = {} l = re.findall(WORDRE, s) for w in l: w = w.lower() # make all letters lowercase if w[0] == "'": # remove single quotes from beginning/ w = w[1:] # end of words in l elif w[-1] == "'": w = w[:-1] w = EnglishStemmer().stem(w) # stems non-noun/verbs w = w.encode('ascii','ignore') if w != '': if w not in word_dict: # build dictionary word_dict[w] = 1 else: word_dict[w] += 1 return word_dict