def build_tensorboard(tmp_generator, tb_folder): for a_file in os.listdir(tb_folder): file_path = join(tb_folder, a_file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception as e: print(e, file=sys.stderr) tb = TensorBoard(tb_folder, write_graph=False, histogram_freq=1, write_grads=True, write_images=False) x, y = next(tmp_generator) tb.validation_data = x tb.validation_data[1] = np.expand_dims(tb.validation_data[1], axis=-1) if isinstance(y, list): num_targets = len(y) tb.validation_data += [y[0]] + y[1:] else: tb.validation_data += [y] num_targets = 1 tb.validation_data += [np.ones(x[0].shape[0])] * num_targets + [0.0] return tb
def train(model_name): model = load_model(model_name) train_gen = train_generator(model.output_shape[3], (['wnp'], ['fill']), (height, width), 64) check_pointer = ModelCheckpoint(filepath='models/' + model_name + '.hdf5', verbose=1, save_best_only=True, monitor='loss') reduce = ReduceLROnPlateau(monitor='loss', factor=0.3, verbose=1, cooldown=10) tensor_board = TensorBoard( log_dir='models/' + model_name + '/', #write_images=True, #write_grads=True, #write_graph=True, #histogram_freq=1 ) tensor_board.validation_data = input_samples epoch = 0 epochs = 20 #while running: try: for i, o in train_gen: if not running: break check_memory() calc_sample_results(model) model.fit(i, o, epochs=epoch + epochs, initial_epoch=epoch, callbacks=[check_pointer, tensor_board], batch_size=6) #model.fit_generator( # train_gen, steps_per_epoch=30, # epochs=epoch + epochs, initial_epoch=epoch, callbacks=[check_pointer]) epoch += epochs except MemoryError: return
def main(): corpus = TIMIT(dirpath=TIMIT_DATA_DIR) quantizer = MuLawQuantizer(k=256) gm = AutoregressiveGenerativeModel(quantizer, nbChannels=corpus.nbChannels, nbFilters=64, name='gm') modelPath = os.path.join(CDIR, 'models') if not os.path.exists(modelPath): os.makedirs(modelPath) modelFilename = os.path.join(modelPath, 'gm.h5') logPath = os.path.join( CDIR, 'logs', 'gm', datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) if not os.path.exists(logPath): os.makedirs(logPath) gm.compile(optimizer=Adam(1e-2), loss='categorical_crossentropy', metrics=[ 'categorical_accuracy', ]) # top_3_categorical_accuracy, # top_5_categorical_accuracy, # top_10_categorical_accuracy]) print(gm.summary()) # NOTE: memory requirement for processing speech on the raw waveform is too high to have a large batch size batchSize = 8 trainData = corpus.trainData() trainGenerator = AutoregressiveGenerativeModelDataGenerator( gm, trainData, corpus.fs, batchSize) logger.info('Number of audio samples in training set: %d' % (len(trainData))) testData = corpus.testData() testGenerator = AutoregressiveGenerativeModelDataGenerator( gm, testData, corpus.fs, batchSize) logger.info('Number of audio samples in test set: %d' % (len(testData))) callbacks = [] tensorboard = TensorBoard(logPath, histogram_freq=1, write_graph=False, batch_size=batchSize, write_grads=True) tensorboardGenerator = AutoregressiveGenerativeModelDataGenerator( gm, testData[:10], corpus.fs, batchSize) x, y = tensorboardGenerator[0] tensorboard.validation_data = [ x, # X y, # y np.ones(len(x)), # sample weights ] callbacks.append(tensorboard) checkpointer = ModelCheckpoint(modelFilename, monitor='val_loss', save_best_only=True, save_weights_only=True) callbacks.append(checkpointer) callbacks.append( ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, min_lr=1e-5)) callbacks.append(TerminateOnNaN()) callbacks.append( EarlyStopping(monitor='val_loss', min_delta=1e-6, patience=50, mode='auto')) try: gm.fit_generator(trainGenerator, epochs=200, validation_data=testGenerator, use_multiprocessing=False, shuffle=True, verbose=1, callbacks=callbacks) except KeyboardInterrupt: logger.info("Training interrupted by the user") gm.save_weights(modelFilename) gm.save_weights(modelFilename) logger.info('All done.')