def train(callback=None, out_weights='weights.h5'): reload(audiotransform) reload(speechmodel) hz = 6000 repeat = 1 goalSize = 30000 # samples after padding embedSize = 10 model = speechmodel.makeModel() model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy']) paths = [] words = [] for p in sampleSet1(): # or findSounds(words) try: raw = load(p, hz=hz) crop = audiotransform.autoCrop(raw, rate=hz) audiotransform.randomPad(crop, goalSize) # must not error print 'using %s cropped to %s samples' % (p, len(crop)) except audiotransform.TooQuiet: print '%s too quiet' % p continue paths.append(p) word = soundFields(p)['word'] if word not in words: words.append(word) x = numpy.zeros((len(paths) * repeat, goalSize), dtype=numpy.float) y = numpy.zeros((len(paths) * repeat, embedSize), dtype=numpy.float) for row, p in enumerate(paths * repeat): audio = load(p, hz=hz) audio = audiotransform.autoCrop(audio, rate=hz) #audio = audiotransform.rightPad(audio, goalSize) audio = audiotransform.randomPad(audio, goalSize, path=p) audio = audiotransform.randomScale(audio) x[row,:] = audio y[row,:] = np_utils.to_categorical(words.index(soundFields(p)['word']), embedSize) if callback: callback.loaded_sound(row, len(paths) * repeat) callbacks = [] #callbacks.append(keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=1, write_graph=True)) if callback: callbacks.append(callback) model.fit(x, y, batch_size=100, nb_epoch=20, validation_split=.0, shuffle=True, callbacks=callbacks) model.save_weights(out_weights) if callback: callback.on_save(out_weights, fileSize=os.path.getsize(out_weights))
def findSounds(words): # incomplete, no user filtering top = FilePath('sounds/incoming/13EubbAsOYgy3eZX4LAHsB5Hzq72/will') for p in sorted(top.walk()): if p.isfile(): word = soundFields(p.path)['word'] yield p.path
def findSounds(words): # incomplete, no user filtering top = FilePath('sounds/incoming/') for p in sorted(top.walk()): if p.isfile(): word = soundFields(p)['word'] if word not in words: continue yield p.path
def get(self): top = FilePath('sounds') self.write({ 'sounds': [{ 'path': '/'.join(p.segmentsFrom(top)), 'fields': soundFields('/'.join(p.segmentsFrom(top))), } for p in sorted(top.walk()) if p.isfile()], 'hostname': socket.gethostname(), })
def train(callback=None, out_weights='weights.h5'): reload(audiotransform) reload(speechmodel) model = speechmodel.makeModel() model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Nadam(lr=0.00002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004), metrics=['accuracy']) paths = [] words = [] for p in sampleSet2(): # or findSounds(words) try: raw = load(p, hz=speechmodel.rate) except: print "load failed", p continue try: crop = audiotransform.autoCrop(raw, rate=speechmodel.rate) print 'using %s autocropped to %s samples' % (p, len(crop)) except audiotransform.TooQuiet: print '%s too quiet' % p continue paths.append(p) word = soundFields(p)['word'] if word not in words: words.append(word) repeat = 2 x = numpy.zeros((len(paths) * repeat, speechmodel.xWidth), dtype=numpy.float) y = numpy.zeros((len(paths) * repeat, speechmodel.embedSize), dtype=numpy.float) for row, p in enumerate(paths * repeat): audio = load(p, hz=speechmodel.rate) audio = audiotransform.autoCrop(audio, rate=speechmodel.rate) #audio = audiotransform.rightPad(audio, speechmodel.goalSize) audio = audiotransform.randomPad(audio, speechmodel.goalSize, path=p) audio = audiotransform.randomScale(audio) m = mfcc(audio, samplerate=speechmodel.rate) x[row, :] = m.reshape((1, speechmodel.xWidth)) y[row, :] = np_utils.to_categorical( words.index(soundFields(p)['word']), speechmodel.embedSize) if callback: callback.loaded_sound(row, len(paths) * repeat) callbacks = [] #callbacks.append(keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=1, write_graph=True)) if callback: callbacks.append(callback) model.fit(x, y, batch_size=500, epochs=500, validation_split=.2, shuffle=True, callbacks=callbacks) model.save_weights(out_weights) with open(out_weights + '.words', 'w') as f: f.write(json.dumps(words) + '\n') if callback: callback.on_save(out_weights, fileSize=os.path.getsize(out_weights))
def sampleSet3(): return [ p for p in glob.glob('sounds/incoming/d8Lo6MJMqZOGXeGDbnHkpXzeovY2/*/*') if soundFields(p)['word'] in ['i', 'like', 'pizza'] ]