def run_training_process(model_name, bock_cv_path, bock_annotations_path, bock_feature_path, output_path, ii): test_cv_filename = os.path.join(bock_cv_path, '8-fold_cv_random_' + str(ii) + '.fold') train_fns = getTrainingFilenames(bock_annotations_path, test_cv_filename) feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights(train_fns, bock_feature_path, n_pattern=15, nlen=7, scaling=True) # create the temp bock folder if not exists temp_folder_bock = os.path.join(bock_feature_path, 'temp') if not os.path.exists(temp_folder_bock): os.makedirs(temp_folder_bock) filename_train_validation_set = os.path.join(temp_folder_bock, 'feature_bock_' + str(ii) + '.h5') filename_labels_train_validation_set = os.path.join(temp_folder_bock, 'labels_bock_' + str(ii) + '.pkl') filename_sample_weights = os.path.join(temp_folder_bock, 'sample_weights_bock_' + str(ii) + '.pkl') filename_scaler = os.path.join(temp_folder_bock, 'scaler_bock_' + str(ii) + '.pkl') saveFeatureLabelSampleweights(feature_all, label_all, sample_weights_all, scaler, filename_train_validation_set, filename_labels_train_validation_set, filename_sample_weights, filename_scaler) print('Finished organizing dataset.') # filename_train_validation_set_scratch = filename_train_validation_set file_path_model = os.path.join(output_path, model_name+str(ii)+'.h5') file_path_log = os.path.join(output_path, model_name+str(ii)+'.csv') input_dim = (80, 15) train_model_validation(filename_train_validation_set=filename_train_validation_set, filename_labels_train_validation_set=filename_labels_train_validation_set, filename_sample_weights=filename_sample_weights, filter_density=1, dropout=0.5, input_shape=input_dim, file_path_model=file_path_model, filename_log=file_path_log, model_name=model_name) os.remove(filename_train_validation_set) os.remove(filename_labels_train_validation_set) os.remove(filename_sample_weights)
print(model_0.count_params()) model_train(model_0, batch_size, patience, input_shape, filename_train_validation_set, filenames_train, Y_train, sample_weights_train, filenames_validation, Y_validation, sample_weights_validation, filenames_features, Y_train_validation, sample_weights, class_weights, file_path_model, filename_log) if __name__ == '__main__': for ii in range(8): test_cv_filename = join(bock_cv_path, '8-fold_cv_random_' + str(ii) + '.fold') train_fns = getTrainingFilenames(bock_annotations_path, test_cv_filename) feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights( train_fns, schluter_feature_data_path) filename_train_validation_set = join(schluter_feature_data_path, 'temp', 'feature_all_temporal_temp.h5') filename_labels_train_validation_set = join( schluter_feature_data_path, 'temp', 'labels_train_set_all_temporal_temp.pickle.gz') filename_sample_weights = join( schluter_feature_data_path, 'temp', 'sample_weights_all_temporal_temp.pickle.gz') filename_scaler = join(schluter_feature_data_path, 'temp', 'scaler_temporal_' + str(ii) + '.pickle.gz')
def syllableSeg_jordi_madmom_basecode(part, ii, model_name='jordi_timbral_schluter'): test_cv_filename = join(bock_cv_path, '8-fold_cv_random_' + str(ii) + '.fold') train_fns = getTrainingFilenames(bock_annotations_path, test_cv_filename) feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights( train_fns, bock_feature_data_path_madmom_simpleSampleWeighting, n_pattern=15, nlen=7, scaling=True) filename_train_validation_set = join( bock_feature_data_path_madmom_simpleSampleWeighting, 'temp', 'feature_all_' + model_name + '_temp_' + str(ii) + '.h5') filename_labels_train_validation_set = join( bock_feature_data_path_madmom_simpleSampleWeighting, 'temp', 'labels_train_set_all_' + model_name + '_temp_' + str(ii) + '.pickle.gz') filename_sample_weights = join( bock_feature_data_path_madmom_simpleSampleWeighting, 'temp', 'sample_weights_all_' + model_name + '_temp_' + str(ii) + '.pickle.gz') filename_scaler = join( bock_feature_data_path_madmom_simpleSampleWeighting, 'temp', 'scaler_' + model_name + '_madmom_simpleSampleWeighting_early_stopping_' + str(ii) + '.pickle.gz') saveFeatureLabelSampleweights(feature_all, label_all, sample_weights_all, scaler, filename_train_validation_set, filename_labels_train_validation_set, filename_sample_weights, filename_scaler) timestamp1 = time.time() filename_train_validation_set_scratch = join( '/scratch/rgongcnnSyllableSeg_part' + str(part) + '_' + model_name + '/syllableSeg', 'feature_all_' + model_name + '_temp_' + str(ii) + '.h5') shutil.copy2(filename_train_validation_set, filename_train_validation_set_scratch) timestamp2 = time.time() print("Copying to scratch took %.2f seconds" % (timestamp2 - timestamp1)) # train the model file_path_model = '/homedtic/rgong/cnnSyllableSeg/out/schulter_' + model_name + '_madmom_simpleSampleWeighting_early_stopping_adam_jan_params' + str( ii) + '.h5' file_path_log = '/homedtic/rgong/cnnSyllableSeg/out/log/schulter_' + model_name + '_madmom_simpleSampleWeighting_early_stopping_adam_jan_params' + str( ii) + '.csv' # filename_train_validation_set_scratch = filename_train_validation_set # file_path_model = '../../temp/schulter_'+model_name+'_madmom_simpleSampleWeighting_cv_'+str(ii)+'.h5' # file_path_log = '../../temp/schulter_'+model_name+'_madmom_simpleSampleWeighting_cv_'+str(ii)+'.csv' input_dim = (80, 15) train_model_validation( filename_train_validation_set=filename_train_validation_set_scratch, filename_labels_train_validation_set= filename_labels_train_validation_set, filename_sample_weights=filename_sample_weights, filter_density=1, dropout=0.5, input_shape=input_dim, file_path_model=file_path_model, filename_log=file_path_log, model_name=model_name) os.remove(filename_train_validation_set) os.remove(filename_labels_train_validation_set) os.remove(filename_sample_weights)
help="Path of the pretrained model") args = parser.parse_args() for ii_fold in range(0, 8): if args.architecture in [ 'baseline', 'relu_dense', 'no_dense', 'temporal', '9_layers_cnn', '5_layers_cnn', 'retrained', 'feature_extractor_a', 'feature_extractor_b' ]: # organize dataset ----------------------------------------------------------------------------------------- test_cv_filename = os.path.join( args.path_cv, '8-fold_cv_random_' + str(ii_fold) + '.fold') train_fns = getTrainingFilenames(args.path_annotation, test_cv_filename) feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights( train_fns, args.path_input, n_pattern=15, nlen=7, scaling=True) # create the temp bock folder if not exists temp_folder_bock = os.path.join(args.path_input, 'temp') if not os.path.exists(temp_folder_bock): os.makedirs(temp_folder_bock) filename_train_validation_set = os.path.join( temp_folder_bock, 'feature_bock_' + str(ii_fold) + '.h5') filename_labels_train_validation_set = os.path.join( temp_folder_bock, 'labels_bock_' + str(ii_fold) + '.pkl') filename_sample_weights = os.path.join( temp_folder_bock, 'sample_weights_bock_' + str(ii_fold) + '.pkl')
def run_bock_training(path_input, path_output, bock_cv_path, bock_annotations_path, len_seq, ii, bidi=True): file_path_model = os.path.join(path_output, 'bidi_lstms_' + str(len_seq) + str(ii) + '.h5') file_path_log = os.path.join(path_output, 'bidi_lstms_' + str(len_seq) + str(ii) + '.csv') bock_feature_data_scratch_path = os.path.join(path_input) test_cv_filename = os.path.join(bock_cv_path, '8-fold_cv_random_' + str(ii) + '.fold') train_validation_fns = getTrainingFilenames(bock_annotations_path, test_cv_filename) # split the training set to train and validation sets train_fns, validation_fns = None, None rs = ShuffleSplit(n_splits=1, test_size=.1) for train_idx, validation_idx in rs.split(train_validation_fns): train_fns = [train_validation_fns[ti] for ti in train_idx] validation_fns = [train_validation_fns[vi] for vi in validation_idx] scaler = pickle.load(open(os.path.join(path_input, 'scaler_bock_phrase.pkl'), 'r')) batch_size = 256 nb_epochs = 500 best_val_loss = 1.0 # initialize the val_loss counter = 0 patience = 15 # early stopping patience overlap = 10 # overlap frames input_shape = (batch_size, len_seq, 1, 80, 15) # initialize the model model = jan_original(filter_density=1, dropout=0.5, input_shape=input_shape, batchNorm=False, dense_activation='sigmoid', channel=1, stateful=False, bidi=bidi) input_shape_val = (1, len_seq, 1, 80, 15) # initialize the model model_val = jan_original(filter_density=1, dropout=0.5, input_shape=input_shape_val, batchNorm=False, dense_activation='sigmoid', channel=1, stateful=False, bidi=bidi) for ii_epoch in range(nb_epochs): batch_counter = 0 # initialize the tensors mfcc_line_tensor = np.zeros(input_shape, dtype='float32') label_tensor = np.zeros((batch_size, len_seq, 1), dtype='int') sample_weights_tensor = np.zeros((batch_size, len_seq)) # training for tfn in train_fns: mfcc_line, label, sample_weights = featureLabelSampleWeightsLoad(bock_feature_data_scratch_path, tfn, scaler) mfcc_line_pad, label_pad, sample_weights_pad, _ = \ featureLabelSampleWeightsPad(mfcc_line, label, sample_weights, overlap) for ii in range((len(mfcc_line_pad)-len_seq)/overlap+1): idx_start = ii*overlap idx_end = idx_start + len_seq mfcc_seg = mfcc_line_pad[idx_start:idx_end] label_seg = label_pad[idx_start:idx_end] sample_weights_seg = sample_weights_pad[idx_start:idx_end] # feed the tensor mfcc_line_tensor[batch_counter, :, 0, :, :] = mfcc_seg label_tensor[batch_counter, :, 0] = label_seg sample_weights_tensor[batch_counter, :] = sample_weights_seg if batch_counter >= batch_size - 1: train_loss, train_acc = model.train_on_batch(mfcc_line_tensor, label_tensor, sample_weight=sample_weights_tensor) batch_counter = 0 else: batch_counter += 1 # get weights for val weights_trained = model.get_weights() model_val.set_weights(weights_trained) # calculate losses train_loss = loss_cal(train_fns, bock_feature_data_scratch_path, scaler, model_val, len_seq) val_loss = loss_cal(validation_fns, bock_feature_data_scratch_path, scaler, model_val, len_seq) # save the best model if val_loss < best_val_loss: best_val_loss = val_loss counter = 0 model.save_weights(file_path_model) else: counter += 1 # write validation loss to csv writeValLossCsv(file_path_log, ii_epoch, val_loss, train_loss) # early stopping if counter >= patience: break random.shuffle(train_fns)