def test_cnn_blstm_fit(self): sample_data = "data_dir/sample_librivox-test-clean.csv" _, df = combine_all_wavs_and_trans_from_csvs(sample_data) data_generator = DataGenerator(df, feature_type='spectrogram', batch_size=6, frame_length=320, hop_length=160, n_mels=40, epoch_length=0, shuffle=True) model = models.cnn_blstm(units=256, input_dim=40, output_dim=29, dropout=0.2, cudnn=False, n_layers=1) model.compile(loss=self.loss, optimizer=self.optimizer) # Run training model.fit_generator(generator=data_generator, epochs=1, verbose=0)
def test_brnn_fit(self): sample_data = "data_dir/sample_librivox-test-clean.csv" _, df = combine_all_wavs_and_trans_from_csvs(sample_data) data_generator = DataGenerator(df, feature_type='mfcc', batch_size=6, frame_length=320, hop_length=160, n_mels=40, mfcc_features=26, epoch_length=0, shuffle=True) model = models.brnn(units=256, input_dim=26, output_dim=29, dropout=0.2, numb_of_dense=3) model.compile(loss=self.loss, optimizer=self.optimizer) # Run training model.fit_generator(generator=data_generator, epochs=1, verbose=0)
def main(args): ''' There are 5 simple steps to this program ''' # 1. combine all data into 2 dataframes (train, valid) print("Getting data from arguments") train_dataprops, df_train = combine_all_wavs_and_trans_from_csvs( args.train_files) valid_dataprops, df_valid = combine_all_wavs_and_trans_from_csvs( args.valid_files) # check any special data model requirments e.g. a spectrogram if (args.model_arch == 1): model_input_type = "mfcc" elif (args.model_arch == 2 or args.model_arch == 5): print("Spectrogram required") # spectrogram = True model_input_type = "spectrogram" else: model_input_type = "mfcc" ## 2. init data generators print("Creating data batch generators") traindata = BatchGenerator(dataframe=df_train, training=True, batch_size=args.batchsize, model_input_type=model_input_type) validdata = BatchGenerator(dataframe=df_valid, training=False, batch_size=args.batchsize, model_input_type=model_input_type) inputs, outputs = traindata.get_batch(0) input_shape = inputs['the_input'].shape[1:] output_shape = inputs['the_labels'].shape[1:] output_dir = os.path.join('checkpoints/results', 'model') if not os.path.isdir(output_dir): os.makedirs(output_dir) ## 3. Load existing or create new model if args.loadcheckpointpath: # load existing print("Loading model") cp = args.loadcheckpointpath assert (os.path.isdir(cp)) model_path = os.path.join(cp, "model") # assert(os.path.isfile(model_path)) model = load_model_checkpoint(model_path) print("Model loaded") else: # new model recipes here print('New model DS{}'.format(args.model_arch)) if (args.model_arch == 0): # DeepSpeech1 with Dropout model = ds1_dropout(input_dim=26, fc_size=args.fc_size, rnn_size=args.rnn_size, dropout=[0.1, 0.1, 0.1], output_dim=29) elif (args.model_arch == 1): # DeepSpeech1 - no dropout model = ds1(input_dim=26, fc_size=args.fc_size, rnn_size=args.rnn_size, output_dim=29) elif (args.model_arch == 2): # DeepSpeech2 model model = ds2_gru_model(input_dim=161, fc_size=args.fc_size, rnn_size=args.rnn_size, output_dim=29) elif (args.model_arch == 3): # own model model = ownModel(input_shape, output_shape, fc_size=args.fc_size, rnn_size=args.rnn_size, dropout=[0.1, 0.1, 0.1], output_dim=29) elif (args.model_arch == 4): # graves model model = graves(input_dim=26, rnn_size=args.rnn_size, output_dim=29, std=0.5) elif (args.model_arch == 5): # cnn city model = cnn_city(input_dim=161, fc_size=args.fc_size, rnn_size=args.rnn_size, output_dim=29) elif (args.model_arch == 6): # constrained model model = const(input_dim=26, fc_size=args.fc_size, rnn_size=args.rnn_size, output_dim=29) else: raise ("model not found") print(model.summary(line_length=140)) # required to save the JSON save_model(model, output_dir) if (args.opt.lower() == 'sgd'): opt = SGD(lr=args.learning_rate, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) elif (args.opt.lower() == 'adam'): opt = Adam(lr=args.learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8, clipnorm=5) elif (args.opt.lower() == 'nadam'): opt = Nadam(lr=args.learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8, clipnorm=5) else: raise Exception("optimiser not recognised") model.compile(optimizer=opt, loss=ctc) ## 4. train if args.train_steps == 0: args.train_steps = len(df_train.index) // args.batchsize # print(args.train_steps) # we use 1/xth of the validation data at each epoch end to test val score if args.valid_steps == 0: args.valid_steps = (len(df_valid.index) // args.batchsize) # print(args.valid_steps) if args.memcheck: cb_list = [MemoryCallback()] else: cb_list = [] if args.tensorboard: tb_cb = TensorBoard(log_dir='./tensorboard/{}/'.format(args.name), write_graph=False, write_images=True) cb_list.append(tb_cb) y_pred = model.get_layer('ctc').input[0] input_data = model.get_layer('the_input').input report = K.function([input_data, K.learning_phase()], [y_pred]) report_cb = ReportCallback(report, validdata, model, args.name, save=True) cb_list.append(report_cb) model.fit_generator( generator=traindata.next_batch(), steps_per_epoch=args.train_steps, epochs=args.epochs, callbacks=cb_list, validation_data=validdata.next_batch(), validation_steps=args.valid_steps, ) ## These are the most important metrics print("Mean WER :", report_cb.mean_wer_log) print("Mean LER :", report_cb.mean_ler_log) print("NormMeanLER:", report_cb.norm_mean_ler_log) # export to csv? K.clear_session()
def main(args): ''' only args.name args.test_files and args.loadcheckpointpath can be passed as args ''' print("Getting data from arguments") test_dataprops, df_test = combine_all_wavs_and_trans_from_csvs( args.test_files, sortagrad=False) # check any special data model requirments e.g. a spectrogram if (args.model_arch == 1): model_input_type = "mfcc" elif (args.model_arch == 2 or args.model_arch == 5): print("Spectrogram required") # spectrogram = True model_input_type = "spectrogram" else: model_input_type = "mfcc" ## 2. init data generators print("Creating data batch generators") testdata = BatchGenerator(dataframe=df_test, dataproperties=test_dataprops, training=False, batch_size=1, model_input_type=model_input_type) ## 3. Load existing or error if args.loadcheckpointpath: # load existing print("Loading model") cp = args.loadcheckpointpath assert (os.path.isdir(cp)) trimmed = False if trimmed: model_path = os.path.join(cp, "TRIMMED_ds_model") else: model_path = os.path.join(cp, "model") # assert(os.path.isfile(model_path)) model = load_model_checkpoint(model_path) opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) print("Model loaded") else: # new model raise ("You need to load an existing trained model") model.compile(optimizer=opt, loss=ctc) ## 4. test train_steps = len(df_test.index) // 200 try: y_pred = model.get_layer('ctc').input[0] except Exception as e: print("error", e) print( "couldn't find ctc layer, possibly a trimmed layer, trying other name" ) y_pred = model.get_layer('out').output input_data = model.get_layer('the_input').input K.set_learning_phase(0) report = K.function([input_data, K.learning_phase()], [y_pred]) report_cb = ReportCallback(report, testdata, model, args.name, save=False) report_cb.force_output = True report_cb.on_epoch_end(0, logs=None) K.clear_session()
def main(args): # Paths to .csv files path = "data_dir/librivox-train-clean-360.csv" path_validation = "data_dir/librivox-dev-clean.csv" path_test = "data_dir/librivox-test-clean.csv" # Create dataframes print "\nReading training data:" _, input_dataframe = combine_all_wavs_and_trans_from_csvs(path) print "\nReading validation data: " _, validation_df = combine_all_wavs_and_trans_from_csvs(path_validation) print "\nReading test data: " _, test_df = combine_all_wavs_and_trans_from_csvs(path_test) # Training params: batch_size = args.batch_size input_epoch_length = args.epoch_len epochs = args.epochs learning_rate = args.lr log_file = args.log_file # Multi GPU or single GPU / CPU training num_gpu = args.num_gpu # Preprocessing params feature_type = args.feature_type mfcc_features = args.mfccs n_mels = args.mels # Model params model_type = args.model_type units = args.units dropout = args.dropout n_layers = args.layers # Saving and loading params model_save = args.model_save checkpoint = args.checkpoint model_load = args.model_load load_multi = args.load_multi # Additional settings for training save_best = args.save_best_val # Save model with best val_loss (on path "model_save" + "_best") shuffle = args.shuffle_indexes reduce_lr = args.reduce_lr # Reduce learning rate on val_loss plateau early_stopping = args.early_stopping # Stop training early if val_loss stops improving frequency = 16 # Sampling rate of data in khz (LibriSpeech is 16khz) cudnnlstm = False # Data generation parameters data_params = { 'feature_type': feature_type, 'batch_size': batch_size, 'frame_length': 20 * frequency, 'hop_length': 10 * frequency, 'mfcc_features': mfcc_features, 'n_mels': n_mels, 'epoch_length': input_epoch_length, 'shuffle': shuffle } # Data generators for training, validation and testing data training_generator = DataGenerator(input_dataframe, **data_params) validation_generator = DataGenerator(validation_df, **data_params) test_generator = DataGenerator(test_df, **data_params) # Model input shape if feature_type == 'mfcc': input_dim = mfcc_features else: input_dim = n_mels output_dim = 29 # Output dim: features to predict + 1 for the CTC blank prediction # Optimization algorithm used to update network weights optimizer = Adam(lr=learning_rate, epsilon=1e-8, clipnorm=2.0) # Dummy loss-function for compiling model, actual CTC loss-function defined as a lambda layer in model loss = {'ctc': lambda y_true, y_pred: y_pred} # Print training data at the beginning of training calc_epoch_length = training_generator.__len__() print "\n\nModel and training parameters: " print "Starting time: ", datetime.now().strftime('%Y-%m-%d %H:%M:%S') print " - epochs: ", epochs, "\n - batch size: ", batch_size, \ "\n - input epoch length: ", input_epoch_length, "\n - network epoch length: ", calc_epoch_length, \ "\n - training on ", calc_epoch_length * batch_size, " files", "\n - learning rate: ", learning_rate, \ "\n - hidden units: ", units, "\n - mfcc features: ", mfcc_features, "\n - dropout: ", dropout, "\n" try: # Load previous model or create new. With device cpu ensures that the model is created/loaded on the cpu if model_load: with tf.device('/cpu:0'): # When loading custom objects, Keras needs to know where to find them. # The CTC lambda is a dummy function custom_objects = { 'clipped_relu': models.clipped_relu, '<lambda>': lambda y_true, y_pred: y_pred } # When loading a parallel model saved *while* running on multiple GPUs, use load_multi if load_multi: model = models.load_model(model_load, custom_objects=custom_objects) model = model.layers[-2] print "Loaded existing model at: ", model_load # Load single GPU/CPU model or model saved *after* finished training else: model = models.load_model(model_load, custom_objects=custom_objects) print "Loaded existing model at: ", model_load else: with tf.device('/cpu:0'): # Create new model model = models.model(model_type=model_type, units=units, input_dim=input_dim, output_dim=output_dim, dropout=dropout, cudnn=cudnnlstm, n_layers=n_layers) print "Creating new model: ", model_type # Loss callback parameters loss_callback_params = { 'validation_gen': validation_generator, 'test_gen': test_generator, 'checkpoint': checkpoint, 'path_to_save': model_save, 'log_file_path': log_file } # Model training parameters model_train_params = { 'generator': training_generator, 'epochs': epochs, 'verbose': 2, 'validation_data': validation_generator, 'workers': 1, 'shuffle': shuffle } # Optional callbacks for added functionality # Reduces learning rate when val_loss stagnates. if reduce_lr: print "Reducing learning rate on plateau" reduce_lr_cb = ReduceLROnPlateau(factor=0.2, patience=5, verbose=0, epsilon=0.1, min_lr=0.0000001) callbacks = [reduce_lr_cb] else: callbacks = [] # Stops the model early if the val_loss isn't improving if early_stopping: es_cb = EarlyStopping(min_delta=0, patience=5, verbose=0, mode='auto') callbacks.append(es_cb) # Saves the model if val_loss is improved at "model_save" + "_best" if save_best: save_best = model_save + str('_best') mcp_cb = ModelCheckpoint(save_best, verbose=1, save_best_only=True, period=1) callbacks.append(mcp_cb) # Train with parallel model on 2 or more GPUs, must be even number if num_gpu > 1: if num_gpu % 2 == 0: # Compile parallel model for training on GPUs > 1 parallel_model = multi_gpu_model(model, gpus=num_gpu) parallel_model.compile(loss=loss, optimizer=optimizer) # Print model summary model.summary() # Creates a test function that takes sound input and outputs predictions # Used to calculate WER while training the network input_data = model.get_layer('the_input').input y_pred = model.get_layer('ctc').input[0] test_func = K.function([input_data], [y_pred]) # The loss callback function that calculates WER while training loss_cb = LossCallback(test_func=test_func, model=model, **loss_callback_params) callbacks.append(loss_cb) # Run training parallel_model.fit_generator(callbacks=callbacks, **model_train_params) else: raise ValueError('Number of GPUs must be an even number') # Train with CPU or single GPU elif num_gpu == 1 or num_gpu == 0: # Compile model for training on GPUs < 2 model.compile(loss=loss, optimizer=optimizer) # Print model summary model.summary() # Creates a test function that takes preprocessed sound input and outputs predictions # Used to calculate WER while training the network input_data = model.get_layer('the_input').input y_pred = model.get_layer('ctc').input[0] test_func = K.function([input_data], [y_pred]) # The loss callback function that calculates WER while training loss_cb = LossCallback(test_func=test_func, model=model, **loss_callback_params) callbacks.append(loss_cb) # Run training model.fit_generator(callbacks=callbacks, **model_train_params) else: raise ValueError('Not a valid number of GPUs: ', num_gpu) if args.model_save: model.save(model_save) print "Model saved: ", model_save except (Exception, ArithmeticError) as e: template = "An exception of type {0} occurred. Arguments:\n{1!r}" message = template.format(type(e).__name__, e.args) print message finally: # Clear memory K.clear_session() print "Ending time: ", datetime.now().strftime('%Y-%m-%d %H:%M:%S')
def setUp(self): _, self.df = combine_all_wavs_and_trans_from_csvs( "data_dir/sample_librivox-test-clean.csv") self.dg = DataGenerator(self.df, batch_size=10, epoch_length=10)
def main(args): try: if not args.model_load: raise ValueError("Error within model arguments") audio_dir = args.audio_dir print("\nReading test data: ") _, df = combine_all_wavs_and_trans_from_csvs(audio_dir) batch_size = args.batch_size batch_index = args.batch_index mfcc_features = args.mfccs n_mels = args.mels # Sampling rate of data in khz (LibriSpeech is 16khz) frequency = 16 # Training data_params: model_load = args.model_load load_multi = args.load_multi # Sets the full dataset in audio_dir to be available through data_generator # The data_generator doesn't actually load the audio files until they are requested through __get_item__() epoch_length = 0 # Load trained model # When loading custom objects, Keras needs to know where to find them. # The CTC lambda is a dummy function custom_objects = { 'clipped_relu': models.clipped_relu, '<lambda>': lambda y_true, y_pred: y_pred } # When loading a parallel model saved *while* running on GPU, use load_multi if load_multi: model = models.load_model(model_load, custom_objects=custom_objects) model = model.layers[-2] print("\nLoaded existing model: ", model_load) # Load single GPU/CPU model or model saved *after* finished training else: model = models.load_model(model_load, custom_objects=custom_objects) print("\nLoaded existing model: ", model_load) # Dummy loss-function to compile model, actual CTC loss-function defined as a lambda layer in model loss = {'ctc': lambda y_true, y_pred: y_pred} model.compile(loss=loss, optimizer='Adam') feature_shape = model.input_shape[0][2] # Model feature type if not args.feature_type: if feature_shape == 26: feature_type = 'mfcc' else: feature_type = 'spectrogram' else: feature_type = args.feature_type print("Feature type: ", feature_type) # Data generation parameters data_params = { 'feature_type': feature_type, 'batch_size': batch_size, 'frame_length': 20 * frequency, 'hop_length': 10 * frequency, 'mfcc_features': mfcc_features, 'n_mels': n_mels, 'epoch_length': epoch_length, 'shuffle': False } # Data generators for training, validation and testing data data_generator = DataGenerator(df, **data_params) # Print(model summary) model.summary() # Creates a test function that takes preprocessed sound input and outputs predictions # Used to calculate WER while training the network input_data = model.get_layer('the_input').input y_pred = model.get_layer('ctc').input[0] test_func = K.function([input_data], [y_pred]) if args.calc_wer: print("\n - Calculation WER on ", audio_dir) wer = calc_wer(test_func, data_generator) print("Average WER: ", wer[1]) predictions = predict_on_batch(data_generator, test_func, batch_index) print("\n - Predictions from batch index: ", batch_index, "\nFrom: ", audio_dir, "\n") for i in predictions: print("Original: ", i[0]) print("Predicted: ", i[1], "\n") except (Exception, BaseException, GeneratorExit, SystemExit) as e: template = "An exception of type {0} occurred. Arguments:\n{1!r}" message = template.format(type(e).__name__, e.args) print("e.args: ", e.args) print(message) finally: # Clear memory K.clear_session()
print( "couldn't find ctc layer, possibly a trimmed layer, trying other name" ) y_pred = model.get_layer('out').output input_data = model.get_layer('the_input').input K.set_learning_phase(0) #2. record data and put it in live folder LOOP while 1: startloop(1) args.test_files = "./data/live/live.csv" print("Getting data from arguments") test_dataprops, df_test = combine_all_wavs_and_trans_from_csvs( args.test_files, sortagrad=False) ## x. init data generators print("Creating data batch generators") testdata = BatchGenerator(dataframe=df_test, dataproperties=test_dataprops, training=False, batch_size=1, model_input_type=model_input_type) ## RUN TEST report = K.function([input_data, K.learning_phase()], [y_pred]) report_cb = ReportCallback(report, testdata, model, args.name, save=False)
def main(args): try: if not args.model_load: raise ValueError() audio_dir = args.audio_dir print("\nReading test data: ") _, df = combine_all_wavs_and_trans_from_csvs(audio_dir) batch_size = args.batch_size batch_index = args.batch_index mfcc_features = args.mfccs n_mels = args.mels frequency = 22 # Sampling rate of data in khz (heroico is 22khz) # Training data_params: model_load = args.model_load epoch_length = 0 # Load trained model custom_objects = { 'clipped_relu': models.clipped_relu, '<lambda>': lambda y_true, y_pred: y_pred } # Load single GPU/CPU model or model saved *after* finished training model = mo.load_model(model_load, custom_objects=custom_objects) print("\nLoaded existing model: ", model_load) # Dummy loss-function to compile model, actual CTC loss-function defined as a lambda layer in model loss = {'ctc': lambda y_true, y_pred: y_pred} model.compile(loss=loss, optimizer='Adam') feature_shape = model.input_shape[0][2] # Model feature type if not args.feature_type: if feature_shape == 26: feature_type = 'mfcc' else: feature_type = 'spectrogram' else: feature_type = args.feature_type print("Feature type: ", feature_type) # Data generation parameters data_params = { 'feature_type': feature_type, 'batch_size': batch_size, 'frame_length': 20 * frequency, 'hop_length': 10 * frequency, 'mfcc_features': mfcc_features, 'n_mels': n_mels, 'epoch_length': epoch_length, 'shuffle': False } # Data generators for training, validation and testing data data_generator = DataGenerator(df, **data_params) # Print model summary model.summary() # Creates a test function that takes preprocessed sound input and outputs predictions # Used to calculate WER while training the network input_data = model.get_layer('the_input').input y_pred = model.get_layer('ctc').input[0] test_func = K.function([input_data], [y_pred]) if args.calc_wer: print("\n - Calculation WER on ", audio_dir) wer = calc_wer(test_func, data_generator) print("Average WER: ", wer[1]) predictions = predict_samples(data_generator, test_func) # predictions = predict_on_batch(data_generator, test_func, batch_index) # print ("\n - Predictions from batch index: ", batch_index, "\nFrom: ", audio_dir, "\n") for i in predictions: print("Original: ", i[0]) print("Predicted: ", i[1], "\n") except (Exception, GeneratorExit, SystemExit) as e: raise e # template = "An exception of type {0} occurred. Arguments:\n{1!r}" # message = template.format(type(e).__name__, e.args) # print ("e.args: ", e.args) # print (message) finally: # Clear memory K.clear_session()