except IOError:
        print "cache not found. Recomputing"
        c = fun(*args)
        np.save(fname, c)
        return c

pi, A, pi_v, theta_v, word_freq = cache_or_compute("cache/bigram.npy", compute_bigram, debug=False)

# Segmentation

starts, ends, chunks = cache_or_compute(
    lambda arg: get_chunk_starts(arg),
    data[int(file_range[0] * len(data)) : int(file_range[1] * len(data))],

if not isinstance(starts, list):
    starts = starts.astype(int)
    ends = ends.astype(int)

if "Segmentation" in PRINT_SET:
    print "num_chunks", len(chunks)

spaces = cache_or_compute("cache/spaces.npy", find_spaces, data, starts, ends)

N, M = 0, 1000000
ii1 = np.zeros(len(data))
Example: ./aggregate 7 sound7.*.wav
will aggregate sound files
sound7.1.wav sound7.2.wav, etc
with corresponding text files
text7.1.txt text7.2.txt
The output aggregation will be guaranteed to have perfect segmentation
import sys
from mlalgs import (load_data, get_chunk_starts)
import scipy.io.wavfile
import numpy as np

tot = []
tot_txt = []
tot_chars = 0
for f in sys.argv[2:]:
    text_file = 'text' + f[5:-3] + 'txt'
    rate, data, text = load_data(f, text_file)
    starts, _, _ = get_chunk_starts(data)
    if len(starts) != len(text):
        print '%s rejected: %d != %d' % (f, len(starts), len(text))
    tot_chars += len(text)

print 'Created data file with %d characters' % tot_chars
scipy.io.wavfile.write('sound%s.wav' % sys.argv[1], rate, np.concatenate(tot))
with open('text%s.txt' % sys.argv[1], "w") as f:
        if minj == letters.index(c):
            score += 1
    print ''.join(pred)
    print ''.join(real)
    return means, stds, score/float(len(test))

if __name__ == '__main__':
    if len(sys.argv) != 4:
        print 'Usage: %s training|test soundf textf' % sys.argv[0]

    soundf = sys.argv[2]
    textf = sys.argv[3]

    rate, data, text = load_data(soundf, textf)
    starts, ends, chunks = get_chunk_starts(data)
    f = get_features(data, starts, ends, include_fft=True, include_cepstrum=True)

    if sys.argv[1] == 'training':
        means, stds, score = naive_bayes(text, f)
        print 'Naive Bayes', score

        logreg_score, logreg = logistic_test(text, f)
        svm_score, svm = svm_test(text, f)
        joblib.dump(logreg, 'cache/logistic.pkl')
        print 'Logistic test', logreg_score
        print 'SVM test', svm_score
            logreg = joblib.load('cache/logistic.pkl')