def run_training_process(model_name,
                         bock_cv_path,
                         bock_annotations_path,
                         bock_feature_path,
                         output_path,
                         ii):

    test_cv_filename = os.path.join(bock_cv_path, '8-fold_cv_random_' + str(ii) + '.fold')
    train_fns = getTrainingFilenames(bock_annotations_path, test_cv_filename)
    feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights(train_fns,
                                                                                              bock_feature_path,
                                                                                              n_pattern=15,
                                                                                              nlen=7,
                                                                                              scaling=True)

    # create the temp bock folder if not exists
    temp_folder_bock = os.path.join(bock_feature_path, 'temp')
    if not os.path.exists(temp_folder_bock):
        os.makedirs(temp_folder_bock)

    filename_train_validation_set = os.path.join(temp_folder_bock, 'feature_bock_' + str(ii) + '.h5')
    filename_labels_train_validation_set = os.path.join(temp_folder_bock, 'labels_bock_' + str(ii) + '.pkl')
    filename_sample_weights = os.path.join(temp_folder_bock, 'sample_weights_bock_' + str(ii) + '.pkl')
    filename_scaler = os.path.join(temp_folder_bock, 'scaler_bock_' + str(ii) + '.pkl')

    saveFeatureLabelSampleweights(feature_all, label_all, sample_weights_all, scaler,
                                  filename_train_validation_set, filename_labels_train_validation_set,
                                  filename_sample_weights, filename_scaler)

    print('Finished organizing dataset.')

    # filename_train_validation_set_scratch = filename_train_validation_set
    file_path_model = os.path.join(output_path, model_name+str(ii)+'.h5')
    file_path_log = os.path.join(output_path, model_name+str(ii)+'.csv')

    input_dim = (80, 15)

    train_model_validation(filename_train_validation_set=filename_train_validation_set,
                           filename_labels_train_validation_set=filename_labels_train_validation_set,
                           filename_sample_weights=filename_sample_weights,
                           filter_density=1,
                           dropout=0.5,
                           input_shape=input_dim,
                           file_path_model=file_path_model,
                           filename_log=file_path_log,
                           model_name=model_name)

    os.remove(filename_train_validation_set)
    os.remove(filename_labels_train_validation_set)
    os.remove(filename_sample_weights)
def main():

    tmp_folder = '/tmp/phoneEmbeddingModelsTraining'
    if not os.path.isdir(tmp_folder):
        os.mkdir(tmp_folder)

    path_dataset = '/homedtic/rgong/phoneEmbeddingModelsTraining/dataset/'

    filename_feature_teacher = os.path.join(path_dataset, 'feature_phn_embedding_train_teacher.pkl')
    filename_list_key_teacher = os.path.join(path_dataset, 'list_key_teacher.pkl')
    filename_feature_student = os.path.join(path_dataset, 'feature_phn_embedding_train_student.pkl')
    filename_list_key_student = os.path.join(path_dataset, 'list_key_student.pkl')
    filename_scaler = os.path.join(path_dataset, 'scaler_phn_embedding_train_teacher_student.pkl')

    filename_train_validation_set = os.path.join(tmp_folder, 'feature_frame.h5')
    filename_labels_train_validation_set = os.path.join(tmp_folder, 'labels.pkl')

    path_model = '/homedtic/rgong/phoneEmbeddingModelsTraining/out/'

    # path_dataset = '/Users/ronggong/Documents_using/MTG document/dataset/phoneEmbedding'
    #
    # filename_feature_teacher = os.path.join(path_dataset, 'feature_phn_embedding_train_teacher.pkl')
    # filename_list_key_teacher = os.path.join(path_dataset, 'list_key_teacher.pkl')
    # filename_feature_student = os.path.join(path_dataset, 'feature_phn_embedding_train_student.pkl')
    # filename_list_key_student = os.path.join(path_dataset, 'list_key_student.pkl')
    #
    # filename_scaler = os.path.join(path_dataset, 'scaler_phn_embedding_train_teacher_student.pkl')
    #
    # filename_train_validation_set = '../../temp/feature_frame.h5'
    # filename_labels_train_validation_set = '../../temp/labels.pkl'
    #
    # path_model = '../../temp'

    input_dim = (80, 15)
    output_shape = 2  # 54

    # feature, label, scaler loading
    list_feature_teacher = pickle.load(open(filename_feature_teacher, 'rb'))
    list_key_teacher = pickle.load(open(filename_list_key_teacher, 'rb'))
    list_feature_student = pickle.load(open(filename_feature_student, 'rb'))
    list_key_student = pickle.load(open(filename_list_key_student, 'rb'))
    scaler = pickle.load(open(filename_scaler, 'rb'))

    array_feature_replicated_teacher, array_labels_teacher, labels_teacher = \
        load_data_embedding_to_frame_level_teacher_student(list_feature=list_feature_teacher,
                                                           list_key=list_key_teacher,
                                                           scaler=scaler,
                                                           data_str='_teacher')

    array_feature_replicated_student, array_labels_student, labels_student = \
        load_data_embedding_to_frame_level_teacher_student(list_feature=list_feature_student,
                                                           list_key=list_key_student,
                                                           scaler=scaler,
                                                           data_str='_student')

    array_feature_replicated = \
        np.concatenate((array_feature_replicated_teacher, array_feature_replicated_student), axis=0)

    array_labels = np.concatenate((array_labels_teacher, array_labels_student))

    # 2 class case
    if output_shape == 2:
        array_labels[array_labels <= 26] = 0
        array_labels[array_labels > 26] = 1
        model_name = 'wide_frame_level_emb_teacher_student_2_class'
    else:
        model_name = 'wide_frame_level_emb_teacher_student'

    # write feature and label to files
    h5f = h5py.File(filename_train_validation_set, 'w')
    h5f.create_dataset('feature_all', data=array_feature_replicated)
    h5f.close()

    pickle.dump(array_labels, open(filename_labels_train_validation_set, 'wb'))

    for ii in range(5):
        file_path_model = os.path.join(path_model, model_name + '_' + str(ii) + '.h5')
        file_path_log = os.path.join(path_model, 'log', model_name + '_' + str(ii) + '.csv')

        train_model_validation(filename_train_validation_set=filename_train_validation_set,
                               filename_labels_train_validation_set=filename_labels_train_validation_set,
                               filter_density=4,
                               dropout=0.32,
                               input_shape=input_dim,
                               output_shape=output_shape,
                               file_path_model=file_path_model,
                               filename_log=file_path_log,
                               channel=1)

    # clean the feature file
    atexit.register(exit_handler, filenames=[filename_train_validation_set, filename_labels_train_validation_set])
def syllableSeg_jordi_madmom_basecode(part,
                                      ii,
                                      model_name='jordi_timbral_schluter'):
    test_cv_filename = join(bock_cv_path,
                            '8-fold_cv_random_' + str(ii) + '.fold')
    train_fns = getTrainingFilenames(bock_annotations_path, test_cv_filename)
    feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights(
        train_fns,
        bock_feature_data_path_madmom_simpleSampleWeighting,
        n_pattern=15,
        nlen=7,
        scaling=True)
    filename_train_validation_set = join(
        bock_feature_data_path_madmom_simpleSampleWeighting, 'temp',
        'feature_all_' + model_name + '_temp_' + str(ii) + '.h5')
    filename_labels_train_validation_set = join(
        bock_feature_data_path_madmom_simpleSampleWeighting, 'temp',
        'labels_train_set_all_' + model_name + '_temp_' + str(ii) +
        '.pickle.gz')
    filename_sample_weights = join(
        bock_feature_data_path_madmom_simpleSampleWeighting, 'temp',
        'sample_weights_all_' + model_name + '_temp_' + str(ii) + '.pickle.gz')
    filename_scaler = join(
        bock_feature_data_path_madmom_simpleSampleWeighting, 'temp',
        'scaler_' + model_name +
        '_madmom_simpleSampleWeighting_early_stopping_' + str(ii) +
        '.pickle.gz')

    saveFeatureLabelSampleweights(feature_all, label_all, sample_weights_all,
                                  scaler, filename_train_validation_set,
                                  filename_labels_train_validation_set,
                                  filename_sample_weights, filename_scaler)

    timestamp1 = time.time()
    filename_train_validation_set_scratch = join(
        '/scratch/rgongcnnSyllableSeg_part' + str(part) + '_' + model_name +
        '/syllableSeg',
        'feature_all_' + model_name + '_temp_' + str(ii) + '.h5')
    shutil.copy2(filename_train_validation_set,
                 filename_train_validation_set_scratch)
    timestamp2 = time.time()
    print("Copying to scratch took %.2f seconds" % (timestamp2 - timestamp1))

    # train the model
    file_path_model = '/homedtic/rgong/cnnSyllableSeg/out/schulter_' + model_name + '_madmom_simpleSampleWeighting_early_stopping_adam_jan_params' + str(
        ii) + '.h5'
    file_path_log = '/homedtic/rgong/cnnSyllableSeg/out/log/schulter_' + model_name + '_madmom_simpleSampleWeighting_early_stopping_adam_jan_params' + str(
        ii) + '.csv'

    # filename_train_validation_set_scratch = filename_train_validation_set
    # file_path_model = '../../temp/schulter_'+model_name+'_madmom_simpleSampleWeighting_cv_'+str(ii)+'.h5'
    # file_path_log = '../../temp/schulter_'+model_name+'_madmom_simpleSampleWeighting_cv_'+str(ii)+'.csv'

    input_dim = (80, 15)

    train_model_validation(
        filename_train_validation_set=filename_train_validation_set_scratch,
        filename_labels_train_validation_set=
        filename_labels_train_validation_set,
        filename_sample_weights=filename_sample_weights,
        filter_density=1,
        dropout=0.5,
        input_shape=input_dim,
        file_path_model=file_path_model,
        filename_log=file_path_log,
        model_name=model_name)

    os.remove(filename_train_validation_set)
    os.remove(filename_labels_train_validation_set)
    os.remove(filename_sample_weights)
Beispiel #4
0
            file_path_model = os.path.join(
                args.path_output, args.architecture + str(ii_fold) + '.h5')
            file_path_log = os.path.join(
                args.path_output, args.architecture + str(ii_fold) + '.csv')

            # architecture -------------------------------------------------------------------------------------------------
            if args.architecture in [
                    'baseline', 'relu_dense', 'no_dense', 'temporal',
                    '9_layers_cnn', '5_layers_cnn'
            ]:

                train_model_validation(filename_train_validation_set,
                                       filename_labels_train_validation_set,
                                       filename_sample_weights,
                                       filter_density=1,
                                       dropout=0.5,
                                       input_shape=input_dim,
                                       file_path_model=file_path_model,
                                       filename_log=file_path_log,
                                       model_name=args.architecture,
                                       channel=1)

            elif args.architecture in [
                    'retrained', 'feature_extractor_a', 'feature_extractor_b'
            ]:
                finetune_model_validation(filename_train_validation_set,
                                          filename_labels_train_validation_set,
                                          filename_sample_weights,
                                          filter_density=1,
                                          dropout=0.5,
                                          input_shape=input_dim,
                                          file_path_model=file_path_model,
import sys, os

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import shutil
from models import train_model_validation

if __name__ == '__main__':

    nlen = 15
    input_dim = (80, nlen)

    # change these two paths
    filename_train_validation_set = '/Users/gong/Documents/MTG document/dataset/acousticModels/feature_hsmm_am.h5'
    filename_labels_train_validation_set = '/Users/gong/Documents/MTG document/dataset/acousticModels/labels_hsmm_am.pickle.gz'

    for ii in range(1, 5):
        # change these two paths
        file_path_model = '/homedtic/rgong/acousticModelsTraining/out/hsmm_am_timbral_'+str(ii)+'.h5'
        file_path_log = '/homedtic/rgong/acousticModelsTraining/out/log/hsmm_am_timbral_'+str(ii)+'.csv'

        train_model_validation(filename_train_validation_set,
                               filename_labels_train_validation_set,
                               filter_density=4,
                               dropout=0.32,
                               input_shape=input_dim,
                               file_path_model = file_path_model,
                               filename_log = file_path_log,
                               channel=1)