def main(): gains = [40, 50, 60] tx_beams = np.arange(0, 24) num_samples_tot_gain_tx_beam = 10000 # Order is gain * indexes = np.arange( 0, num_samples_tot_gain_tx_beam * len(tx_beams) * len(gains) ) batch_size = 32 data_path = '/media/michele/rx-12-tx-tm-0-rx-tm-1.h5' num_blocks_per_frame = 15 how_many_blocks_per_frame = 1 num_samples_per_block = 2048 num_tx_beams = len(tx_beams) input_size = 1024 dg = DataGenerator( indexes, batch_size, data_path, num_tx_beams, num_blocks_per_frame, input_size, num_samples_per_block, how_many_blocks_per_frame, shuffle=False, is_2d=False ) batch_gain_tx_beam = num_samples_tot_gain_tx_beam / batch_size # for [i_g, val_g] in enumerate(gains): # print("Gain: " + str(val_g)) # for [i_t, val_t] in enumerate(tx_beams): # print("Beam idx: " + str(val_t)) # batch_index = (i_g * len(tx_beams) * batch_gain_tx_beam) + i_t * batch_gain_tx_beam # print("Batch idx: " + str(batch_index)) # [batch, batch_y] = dg.__getitem__(batch_index) # print("tx_beam %d y % s" % (val_t, batch_y[0])) # # print(batch_y[0]) for i in range(dg.__len__()): print("Batch idx: " + str(i)) [batch, batch_y] = dg.__getitem__(i) print("tx_beam %s %s y %s %s" % (batch[0][0], batch[-1][0], batch_y[0], batch_y[-1])) print("batch_x_size: %s, batch_y_size: %s" % (str(batch.shape), str(batch_y.shape)))
def main(args): # Paths to .csv files path = "data_dir/librivox-train-clean-360.csv" path_validation = "data_dir/librivox-dev-clean.csv" path_test = "data_dir/librivox-test-clean.csv" # Create dataframes print "\nReading training data:" _, input_dataframe = combine_all_wavs_and_trans_from_csvs(path) print "\nReading validation data: " _, validation_df = combine_all_wavs_and_trans_from_csvs(path_validation) print "\nReading test data: " _, test_df = combine_all_wavs_and_trans_from_csvs(path_test) # Training params: batch_size = args.batch_size input_epoch_length = args.epoch_len epochs = args.epochs learning_rate = args.lr log_file = args.log_file # Multi GPU or single GPU / CPU training num_gpu = args.num_gpu # Preprocessing params feature_type = args.feature_type mfcc_features = args.mfccs n_mels = args.mels # Model params model_type = args.model_type units = args.units dropout = args.dropout n_layers = args.layers # Saving and loading params model_save = args.model_save checkpoint = args.checkpoint model_load = args.model_load load_multi = args.load_multi # Additional settings for training save_best = args.save_best_val # Save model with best val_loss (on path "model_save" + "_best") shuffle = args.shuffle_indexes reduce_lr = args.reduce_lr # Reduce learning rate on val_loss plateau early_stopping = args.early_stopping # Stop training early if val_loss stops improving frequency = 16 # Sampling rate of data in khz (LibriSpeech is 16khz) cudnnlstm = False # Data generation parameters data_params = { 'feature_type': feature_type, 'batch_size': batch_size, 'frame_length': 20 * frequency, 'hop_length': 10 * frequency, 'mfcc_features': mfcc_features, 'n_mels': n_mels, 'epoch_length': input_epoch_length, 'shuffle': shuffle } # Data generators for training, validation and testing data training_generator = DataGenerator(input_dataframe, **data_params) validation_generator = DataGenerator(validation_df, **data_params) test_generator = DataGenerator(test_df, **data_params) # Model input shape if feature_type == 'mfcc': input_dim = mfcc_features else: input_dim = n_mels output_dim = 29 # Output dim: features to predict + 1 for the CTC blank prediction # Optimization algorithm used to update network weights optimizer = Adam(lr=learning_rate, epsilon=1e-8, clipnorm=2.0) # Dummy loss-function for compiling model, actual CTC loss-function defined as a lambda layer in model loss = {'ctc': lambda y_true, y_pred: y_pred} # Print training data at the beginning of training calc_epoch_length = training_generator.__len__() print "\n\nModel and training parameters: " print "Starting time: ", datetime.now().strftime('%Y-%m-%d %H:%M:%S') print " - epochs: ", epochs, "\n - batch size: ", batch_size, \ "\n - input epoch length: ", input_epoch_length, "\n - network epoch length: ", calc_epoch_length, \ "\n - training on ", calc_epoch_length * batch_size, " files", "\n - learning rate: ", learning_rate, \ "\n - hidden units: ", units, "\n - mfcc features: ", mfcc_features, "\n - dropout: ", dropout, "\n" try: # Load previous model or create new. With device cpu ensures that the model is created/loaded on the cpu if model_load: with tf.device('/cpu:0'): # When loading custom objects, Keras needs to know where to find them. # The CTC lambda is a dummy function custom_objects = { 'clipped_relu': models.clipped_relu, '<lambda>': lambda y_true, y_pred: y_pred } # When loading a parallel model saved *while* running on multiple GPUs, use load_multi if load_multi: model = models.load_model(model_load, custom_objects=custom_objects) model = model.layers[-2] print "Loaded existing model at: ", model_load # Load single GPU/CPU model or model saved *after* finished training else: model = models.load_model(model_load, custom_objects=custom_objects) print "Loaded existing model at: ", model_load else: with tf.device('/cpu:0'): # Create new model model = models.model(model_type=model_type, units=units, input_dim=input_dim, output_dim=output_dim, dropout=dropout, cudnn=cudnnlstm, n_layers=n_layers) print "Creating new model: ", model_type # Loss callback parameters loss_callback_params = { 'validation_gen': validation_generator, 'test_gen': test_generator, 'checkpoint': checkpoint, 'path_to_save': model_save, 'log_file_path': log_file } # Model training parameters model_train_params = { 'generator': training_generator, 'epochs': epochs, 'verbose': 2, 'validation_data': validation_generator, 'workers': 1, 'shuffle': shuffle } # Optional callbacks for added functionality # Reduces learning rate when val_loss stagnates. if reduce_lr: print "Reducing learning rate on plateau" reduce_lr_cb = ReduceLROnPlateau(factor=0.2, patience=5, verbose=0, epsilon=0.1, min_lr=0.0000001) callbacks = [reduce_lr_cb] else: callbacks = [] # Stops the model early if the val_loss isn't improving if early_stopping: es_cb = EarlyStopping(min_delta=0, patience=5, verbose=0, mode='auto') callbacks.append(es_cb) # Saves the model if val_loss is improved at "model_save" + "_best" if save_best: save_best = model_save + str('_best') mcp_cb = ModelCheckpoint(save_best, verbose=1, save_best_only=True, period=1) callbacks.append(mcp_cb) # Train with parallel model on 2 or more GPUs, must be even number if num_gpu > 1: if num_gpu % 2 == 0: # Compile parallel model for training on GPUs > 1 parallel_model = multi_gpu_model(model, gpus=num_gpu) parallel_model.compile(loss=loss, optimizer=optimizer) # Print model summary model.summary() # Creates a test function that takes sound input and outputs predictions # Used to calculate WER while training the network input_data = model.get_layer('the_input').input y_pred = model.get_layer('ctc').input[0] test_func = K.function([input_data], [y_pred]) # The loss callback function that calculates WER while training loss_cb = LossCallback(test_func=test_func, model=model, **loss_callback_params) callbacks.append(loss_cb) # Run training parallel_model.fit_generator(callbacks=callbacks, **model_train_params) else: raise ValueError('Number of GPUs must be an even number') # Train with CPU or single GPU elif num_gpu == 1 or num_gpu == 0: # Compile model for training on GPUs < 2 model.compile(loss=loss, optimizer=optimizer) # Print model summary model.summary() # Creates a test function that takes preprocessed sound input and outputs predictions # Used to calculate WER while training the network input_data = model.get_layer('the_input').input y_pred = model.get_layer('ctc').input[0] test_func = K.function([input_data], [y_pred]) # The loss callback function that calculates WER while training loss_cb = LossCallback(test_func=test_func, model=model, **loss_callback_params) callbacks.append(loss_cb) # Run training model.fit_generator(callbacks=callbacks, **model_train_params) else: raise ValueError('Not a valid number of GPUs: ', num_gpu) if args.model_save: model.save(model_save) print "Model saved: ", model_save except (Exception, ArithmeticError) as e: template = "An exception of type {0} occurred. Arguments:\n{1!r}" message = template.format(type(e).__name__, e.args) print message finally: # Clear memory K.clear_session() print "Ending time: ", datetime.now().strftime('%Y-%m-%d %H:%M:%S')
datatype='imgs_to_gray', datadirs=paths, is_label_to_categorical=False, is_normalize_image_datatype=True, is_apply_text_preprocessing=FLAGS.is_apply_text_preprocessing, is_apply_sequence_preprocessing=FLAGS.is_apply_sequence_preprocessing) n_clusters = FLAGS.n_classes print("n_clusters") print(n_clusters) kmeans = KMeans(n_clusters=n_clusters, n_init=20, batch_size=FLAGS.batch_size) #random_state=0, #, n_jobs=4) y_pred_kmeans = kmeans if True: #False: #ToDo temp for bi in range(0, data.__len__()): x, y = data.__getitem__(bi, True, is_return_only_x=False) y_pred_kmeans = y_pred_kmeans.partial_fit(x[:, :]) ## print("y ", y.shape, y_pred_kmeans.labels_.shape) #print( metrics.acc(y, y_pred_kmeans.labels_) ) else: x, y = data.__getitem__(0, True, is_return_only_x=False) #to speed up moved out side loop, will it going to change anything with efficiency of the algorithm #print( metrics.acc(y, y_pred_kmeans.labels_) ) #padd entire y #dims = [x.shape[-1], 500, 500, 2000, 10] #dims = [x.shape[-1], 500, 500, 600, 10] dims = [x.shape[-1], 500, 500, 600, 234]