def cleanup(fname): text = fileio.read_file(fname) text = remove_headers( text ) words = text.split() new_text = '' for word in words: new_text+= word + '\n' new_fname = fname[:fname.find('.txt')] + '_clean.txt' fileio.write_file( new_text, new_fname )
def generate_chains( fname=FILE, key_size=KEY_SIZE ): chains = {} text = reader.read_file( fname ) words = text.split() i = 0 while i < len(words) - key_size: key = ' '.join( words[i : i+key_size] ) value = words[i + key_size] if key in chains: chains[ key ].append( value ) else: new_list = [] new_list.append( value ) chains[ key ] = new_list i+= 1 return chains
def generate_chains(fname): chains = {} text = reader.read_file(fname) words = text.split() i = 0 while i < len(words) - 2: key = words[i] + " " + words[i + 1] value = words[i + 2] if key in chains: chains[key].append(value) else: new_list = [] new_list.append(value) chains[key] = new_list i += 1 # print chains return chains