Exemple #1
0
def train(callback=None, out_weights='weights.h5'):
    reload(audiotransform)
    reload(speechmodel)

    hz = 6000
    repeat = 1
    goalSize = 30000 # samples after padding
    embedSize = 10

    model = speechmodel.makeModel()

    model.compile(loss='mean_squared_error',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    paths = []
    words = []
    for p in sampleSet1(): # or findSounds(words)
        try:
            raw = load(p, hz=hz)
            crop = audiotransform.autoCrop(raw, rate=hz)
            audiotransform.randomPad(crop, goalSize) # must not error
            print 'using %s cropped to %s samples' % (p, len(crop))
        except audiotransform.TooQuiet:
            print '%s too quiet' % p
            continue
        paths.append(p)
        word = soundFields(p)['word']
        if word not in words:
            words.append(word)

    x = numpy.zeros((len(paths) * repeat, goalSize), dtype=numpy.float)
    y = numpy.zeros((len(paths) * repeat, embedSize), dtype=numpy.float)

    for row, p in enumerate(paths * repeat):
        audio = load(p, hz=hz)
        audio = audiotransform.autoCrop(audio, rate=hz)
        #audio = audiotransform.rightPad(audio, goalSize)
        audio = audiotransform.randomPad(audio, goalSize, path=p)
        audio = audiotransform.randomScale(audio)
        x[row,:] = audio
        y[row,:] = np_utils.to_categorical(words.index(soundFields(p)['word']),
                                           embedSize)
        if callback:
            callback.loaded_sound(row, len(paths) * repeat)

    callbacks = []
    #callbacks.append(keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=1, write_graph=True))
    if callback:
        callbacks.append(callback)

    model.fit(x, y, batch_size=100, nb_epoch=20, validation_split=.0,
              shuffle=True,
              callbacks=callbacks)

    model.save_weights(out_weights)
    if callback:
        callback.on_save(out_weights, fileSize=os.path.getsize(out_weights))
Exemple #2
0
def findSounds(words):
    # incomplete, no user filtering
    top = FilePath('sounds/incoming/13EubbAsOYgy3eZX4LAHsB5Hzq72/will')
    for p in sorted(top.walk()):
        if p.isfile():
            word = soundFields(p.path)['word']
            yield p.path
Exemple #3
0
def findSounds(words):
    # incomplete, no user filtering
    top = FilePath('sounds/incoming/')
    for p in sorted(top.walk()):
        if p.isfile():
            word = soundFields(p)['word']
            if word not in words:
                continue
            yield p.path
Exemple #4
0
 def get(self):
     top = FilePath('sounds')
     self.write({
         'sounds': [{
             'path': '/'.join(p.segmentsFrom(top)),
             'fields': soundFields('/'.join(p.segmentsFrom(top))),
         } for p in sorted(top.walk()) if p.isfile()],
         'hostname':
         socket.gethostname(),
     })
Exemple #5
0
def train(callback=None, out_weights='weights.h5'):
    reload(audiotransform)
    reload(speechmodel)

    model = speechmodel.makeModel()

    model.compile(loss='mean_squared_error',
                  optimizer=keras.optimizers.Nadam(lr=0.00002,
                                                   beta_1=0.9,
                                                   beta_2=0.999,
                                                   epsilon=1e-08,
                                                   schedule_decay=0.004),
                  metrics=['accuracy'])

    paths = []
    words = []
    for p in sampleSet2():  # or findSounds(words)
        try:
            raw = load(p, hz=speechmodel.rate)
        except:
            print "load failed", p
            continue

        try:
            crop = audiotransform.autoCrop(raw, rate=speechmodel.rate)
            print 'using %s autocropped to %s samples' % (p, len(crop))
        except audiotransform.TooQuiet:
            print '%s too quiet' % p
            continue
        paths.append(p)
        word = soundFields(p)['word']
        if word not in words:
            words.append(word)

    repeat = 2
    x = numpy.zeros((len(paths) * repeat, speechmodel.xWidth),
                    dtype=numpy.float)
    y = numpy.zeros((len(paths) * repeat, speechmodel.embedSize),
                    dtype=numpy.float)

    for row, p in enumerate(paths * repeat):
        audio = load(p, hz=speechmodel.rate)
        audio = audiotransform.autoCrop(audio, rate=speechmodel.rate)
        #audio = audiotransform.rightPad(audio, speechmodel.goalSize)
        audio = audiotransform.randomPad(audio, speechmodel.goalSize, path=p)
        audio = audiotransform.randomScale(audio)
        m = mfcc(audio, samplerate=speechmodel.rate)
        x[row, :] = m.reshape((1, speechmodel.xWidth))
        y[row, :] = np_utils.to_categorical(
            words.index(soundFields(p)['word']), speechmodel.embedSize)
        if callback:
            callback.loaded_sound(row, len(paths) * repeat)

    callbacks = []
    #callbacks.append(keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=1, write_graph=True))
    if callback:
        callbacks.append(callback)

    model.fit(x,
              y,
              batch_size=500,
              epochs=500,
              validation_split=.2,
              shuffle=True,
              callbacks=callbacks)

    model.save_weights(out_weights)
    with open(out_weights + '.words', 'w') as f:
        f.write(json.dumps(words) + '\n')
    if callback:
        callback.on_save(out_weights, fileSize=os.path.getsize(out_weights))
Exemple #6
0
def sampleSet3():
    return [
        p
        for p in glob.glob('sounds/incoming/d8Lo6MJMqZOGXeGDbnHkpXzeovY2/*/*')
        if soundFields(p)['word'] in ['i', 'like', 'pizza']
    ]