Esempio n. 1
0
def cleanup(fname):
    text = fileio.read_file(fname)    
    text = remove_headers( text )
    words = text.split()
    new_text = ''
    for word in words:
        new_text+= word + '\n'
    new_fname = fname[:fname.find('.txt')] + '_clean.txt'





    
    fileio.write_file( new_text, new_fname )
Esempio n. 2
0
def generate_chains( fname=FILE, key_size=KEY_SIZE ):
    chains = {}
    text = reader.read_file( fname )
    
    words = text.split()
    i = 0
    while i < len(words) - key_size:
        key = ' '.join( words[i : i+key_size] ) 
        value = words[i + key_size]
        if key in chains:
            chains[ key ].append( value )
        else:
            new_list = []
            new_list.append( value )
            chains[ key ] = new_list
        i+= 1
    return chains
Esempio n. 3
0
def generate_chains(fname):
    chains = {}
    text = reader.read_file(fname)

    words = text.split()
    i = 0
    while i < len(words) - 2:
        key = words[i] + " " + words[i + 1]
        value = words[i + 2]
        if key in chains:
            chains[key].append(value)
        else:
            new_list = []
            new_list.append(value)
            chains[key] = new_list
        i += 1
    # print chains
    return chains