def run_training_process(model_name, bock_cv_path, bock_annotations_path, bock_feature_path, output_path, ii): test_cv_filename = os.path.join(bock_cv_path, '8-fold_cv_random_' + str(ii) + '.fold') train_fns = getTrainingFilenames(bock_annotations_path, test_cv_filename) feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights(train_fns, bock_feature_path, n_pattern=15, nlen=7, scaling=True) # create the temp bock folder if not exists temp_folder_bock = os.path.join(bock_feature_path, 'temp') if not os.path.exists(temp_folder_bock): os.makedirs(temp_folder_bock) filename_train_validation_set = os.path.join(temp_folder_bock, 'feature_bock_' + str(ii) + '.h5') filename_labels_train_validation_set = os.path.join(temp_folder_bock, 'labels_bock_' + str(ii) + '.pkl') filename_sample_weights = os.path.join(temp_folder_bock, 'sample_weights_bock_' + str(ii) + '.pkl') filename_scaler = os.path.join(temp_folder_bock, 'scaler_bock_' + str(ii) + '.pkl') saveFeatureLabelSampleweights(feature_all, label_all, sample_weights_all, scaler, filename_train_validation_set, filename_labels_train_validation_set, filename_sample_weights, filename_scaler) print('Finished organizing dataset.') # filename_train_validation_set_scratch = filename_train_validation_set file_path_model = os.path.join(output_path, model_name+str(ii)+'.h5') file_path_log = os.path.join(output_path, model_name+str(ii)+'.csv') input_dim = (80, 15) train_model_validation(filename_train_validation_set=filename_train_validation_set, filename_labels_train_validation_set=filename_labels_train_validation_set, filename_sample_weights=filename_sample_weights, filter_density=1, dropout=0.5, input_shape=input_dim, file_path_model=file_path_model, filename_log=file_path_log, model_name=model_name) os.remove(filename_train_validation_set) os.remove(filename_labels_train_validation_set) os.remove(filename_sample_weights)
def main(): tmp_folder = '/tmp/phoneEmbeddingModelsTraining' if not os.path.isdir(tmp_folder): os.mkdir(tmp_folder) path_dataset = '/homedtic/rgong/phoneEmbeddingModelsTraining/dataset/' filename_feature_teacher = os.path.join(path_dataset, 'feature_phn_embedding_train_teacher.pkl') filename_list_key_teacher = os.path.join(path_dataset, 'list_key_teacher.pkl') filename_feature_student = os.path.join(path_dataset, 'feature_phn_embedding_train_student.pkl') filename_list_key_student = os.path.join(path_dataset, 'list_key_student.pkl') filename_scaler = os.path.join(path_dataset, 'scaler_phn_embedding_train_teacher_student.pkl') filename_train_validation_set = os.path.join(tmp_folder, 'feature_frame.h5') filename_labels_train_validation_set = os.path.join(tmp_folder, 'labels.pkl') path_model = '/homedtic/rgong/phoneEmbeddingModelsTraining/out/' # path_dataset = '/Users/ronggong/Documents_using/MTG document/dataset/phoneEmbedding' # # filename_feature_teacher = os.path.join(path_dataset, 'feature_phn_embedding_train_teacher.pkl') # filename_list_key_teacher = os.path.join(path_dataset, 'list_key_teacher.pkl') # filename_feature_student = os.path.join(path_dataset, 'feature_phn_embedding_train_student.pkl') # filename_list_key_student = os.path.join(path_dataset, 'list_key_student.pkl') # # filename_scaler = os.path.join(path_dataset, 'scaler_phn_embedding_train_teacher_student.pkl') # # filename_train_validation_set = '../../temp/feature_frame.h5' # filename_labels_train_validation_set = '../../temp/labels.pkl' # # path_model = '../../temp' input_dim = (80, 15) output_shape = 2 # 54 # feature, label, scaler loading list_feature_teacher = pickle.load(open(filename_feature_teacher, 'rb')) list_key_teacher = pickle.load(open(filename_list_key_teacher, 'rb')) list_feature_student = pickle.load(open(filename_feature_student, 'rb')) list_key_student = pickle.load(open(filename_list_key_student, 'rb')) scaler = pickle.load(open(filename_scaler, 'rb')) array_feature_replicated_teacher, array_labels_teacher, labels_teacher = \ load_data_embedding_to_frame_level_teacher_student(list_feature=list_feature_teacher, list_key=list_key_teacher, scaler=scaler, data_str='_teacher') array_feature_replicated_student, array_labels_student, labels_student = \ load_data_embedding_to_frame_level_teacher_student(list_feature=list_feature_student, list_key=list_key_student, scaler=scaler, data_str='_student') array_feature_replicated = \ np.concatenate((array_feature_replicated_teacher, array_feature_replicated_student), axis=0) array_labels = np.concatenate((array_labels_teacher, array_labels_student)) # 2 class case if output_shape == 2: array_labels[array_labels <= 26] = 0 array_labels[array_labels > 26] = 1 model_name = 'wide_frame_level_emb_teacher_student_2_class' else: model_name = 'wide_frame_level_emb_teacher_student' # write feature and label to files h5f = h5py.File(filename_train_validation_set, 'w') h5f.create_dataset('feature_all', data=array_feature_replicated) h5f.close() pickle.dump(array_labels, open(filename_labels_train_validation_set, 'wb')) for ii in range(5): file_path_model = os.path.join(path_model, model_name + '_' + str(ii) + '.h5') file_path_log = os.path.join(path_model, 'log', model_name + '_' + str(ii) + '.csv') train_model_validation(filename_train_validation_set=filename_train_validation_set, filename_labels_train_validation_set=filename_labels_train_validation_set, filter_density=4, dropout=0.32, input_shape=input_dim, output_shape=output_shape, file_path_model=file_path_model, filename_log=file_path_log, channel=1) # clean the feature file atexit.register(exit_handler, filenames=[filename_train_validation_set, filename_labels_train_validation_set])
def syllableSeg_jordi_madmom_basecode(part, ii, model_name='jordi_timbral_schluter'): test_cv_filename = join(bock_cv_path, '8-fold_cv_random_' + str(ii) + '.fold') train_fns = getTrainingFilenames(bock_annotations_path, test_cv_filename) feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights( train_fns, bock_feature_data_path_madmom_simpleSampleWeighting, n_pattern=15, nlen=7, scaling=True) filename_train_validation_set = join( bock_feature_data_path_madmom_simpleSampleWeighting, 'temp', 'feature_all_' + model_name + '_temp_' + str(ii) + '.h5') filename_labels_train_validation_set = join( bock_feature_data_path_madmom_simpleSampleWeighting, 'temp', 'labels_train_set_all_' + model_name + '_temp_' + str(ii) + '.pickle.gz') filename_sample_weights = join( bock_feature_data_path_madmom_simpleSampleWeighting, 'temp', 'sample_weights_all_' + model_name + '_temp_' + str(ii) + '.pickle.gz') filename_scaler = join( bock_feature_data_path_madmom_simpleSampleWeighting, 'temp', 'scaler_' + model_name + '_madmom_simpleSampleWeighting_early_stopping_' + str(ii) + '.pickle.gz') saveFeatureLabelSampleweights(feature_all, label_all, sample_weights_all, scaler, filename_train_validation_set, filename_labels_train_validation_set, filename_sample_weights, filename_scaler) timestamp1 = time.time() filename_train_validation_set_scratch = join( '/scratch/rgongcnnSyllableSeg_part' + str(part) + '_' + model_name + '/syllableSeg', 'feature_all_' + model_name + '_temp_' + str(ii) + '.h5') shutil.copy2(filename_train_validation_set, filename_train_validation_set_scratch) timestamp2 = time.time() print("Copying to scratch took %.2f seconds" % (timestamp2 - timestamp1)) # train the model file_path_model = '/homedtic/rgong/cnnSyllableSeg/out/schulter_' + model_name + '_madmom_simpleSampleWeighting_early_stopping_adam_jan_params' + str( ii) + '.h5' file_path_log = '/homedtic/rgong/cnnSyllableSeg/out/log/schulter_' + model_name + '_madmom_simpleSampleWeighting_early_stopping_adam_jan_params' + str( ii) + '.csv' # filename_train_validation_set_scratch = filename_train_validation_set # file_path_model = '../../temp/schulter_'+model_name+'_madmom_simpleSampleWeighting_cv_'+str(ii)+'.h5' # file_path_log = '../../temp/schulter_'+model_name+'_madmom_simpleSampleWeighting_cv_'+str(ii)+'.csv' input_dim = (80, 15) train_model_validation( filename_train_validation_set=filename_train_validation_set_scratch, filename_labels_train_validation_set= filename_labels_train_validation_set, filename_sample_weights=filename_sample_weights, filter_density=1, dropout=0.5, input_shape=input_dim, file_path_model=file_path_model, filename_log=file_path_log, model_name=model_name) os.remove(filename_train_validation_set) os.remove(filename_labels_train_validation_set) os.remove(filename_sample_weights)
file_path_model = os.path.join( args.path_output, args.architecture + str(ii_fold) + '.h5') file_path_log = os.path.join( args.path_output, args.architecture + str(ii_fold) + '.csv') # architecture ------------------------------------------------------------------------------------------------- if args.architecture in [ 'baseline', 'relu_dense', 'no_dense', 'temporal', '9_layers_cnn', '5_layers_cnn' ]: train_model_validation(filename_train_validation_set, filename_labels_train_validation_set, filename_sample_weights, filter_density=1, dropout=0.5, input_shape=input_dim, file_path_model=file_path_model, filename_log=file_path_log, model_name=args.architecture, channel=1) elif args.architecture in [ 'retrained', 'feature_extractor_a', 'feature_extractor_b' ]: finetune_model_validation(filename_train_validation_set, filename_labels_train_validation_set, filename_sample_weights, filter_density=1, dropout=0.5, input_shape=input_dim, file_path_model=file_path_model,
import sys, os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import shutil from models import train_model_validation if __name__ == '__main__': nlen = 15 input_dim = (80, nlen) # change these two paths filename_train_validation_set = '/Users/gong/Documents/MTG document/dataset/acousticModels/feature_hsmm_am.h5' filename_labels_train_validation_set = '/Users/gong/Documents/MTG document/dataset/acousticModels/labels_hsmm_am.pickle.gz' for ii in range(1, 5): # change these two paths file_path_model = '/homedtic/rgong/acousticModelsTraining/out/hsmm_am_timbral_'+str(ii)+'.h5' file_path_log = '/homedtic/rgong/acousticModelsTraining/out/log/hsmm_am_timbral_'+str(ii)+'.csv' train_model_validation(filename_train_validation_set, filename_labels_train_validation_set, filter_density=4, dropout=0.32, input_shape=input_dim, file_path_model = file_path_model, filename_log = file_path_log, channel=1)