コード例 #1
0
def run_training_process(model_name,
                         bock_cv_path,
                         bock_annotations_path,
                         bock_feature_path,
                         output_path,
                         ii):

    test_cv_filename = os.path.join(bock_cv_path, '8-fold_cv_random_' + str(ii) + '.fold')
    train_fns = getTrainingFilenames(bock_annotations_path, test_cv_filename)
    feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights(train_fns,
                                                                                              bock_feature_path,
                                                                                              n_pattern=15,
                                                                                              nlen=7,
                                                                                              scaling=True)

    # create the temp bock folder if not exists
    temp_folder_bock = os.path.join(bock_feature_path, 'temp')
    if not os.path.exists(temp_folder_bock):
        os.makedirs(temp_folder_bock)

    filename_train_validation_set = os.path.join(temp_folder_bock, 'feature_bock_' + str(ii) + '.h5')
    filename_labels_train_validation_set = os.path.join(temp_folder_bock, 'labels_bock_' + str(ii) + '.pkl')
    filename_sample_weights = os.path.join(temp_folder_bock, 'sample_weights_bock_' + str(ii) + '.pkl')
    filename_scaler = os.path.join(temp_folder_bock, 'scaler_bock_' + str(ii) + '.pkl')

    saveFeatureLabelSampleweights(feature_all, label_all, sample_weights_all, scaler,
                                  filename_train_validation_set, filename_labels_train_validation_set,
                                  filename_sample_weights, filename_scaler)

    print('Finished organizing dataset.')

    # filename_train_validation_set_scratch = filename_train_validation_set
    file_path_model = os.path.join(output_path, model_name+str(ii)+'.h5')
    file_path_log = os.path.join(output_path, model_name+str(ii)+'.csv')

    input_dim = (80, 15)

    train_model_validation(filename_train_validation_set=filename_train_validation_set,
                           filename_labels_train_validation_set=filename_labels_train_validation_set,
                           filename_sample_weights=filename_sample_weights,
                           filter_density=1,
                           dropout=0.5,
                           input_shape=input_dim,
                           file_path_model=file_path_model,
                           filename_log=file_path_log,
                           model_name=model_name)

    os.remove(filename_train_validation_set)
    os.remove(filename_labels_train_validation_set)
    os.remove(filename_sample_weights)
コード例 #2
0
    print(model_0.count_params())

    model_train(model_0, batch_size, patience, input_shape,
                filename_train_validation_set, filenames_train, Y_train,
                sample_weights_train, filenames_validation, Y_validation,
                sample_weights_validation, filenames_features,
                Y_train_validation, sample_weights, class_weights,
                file_path_model, filename_log)


if __name__ == '__main__':

    for ii in range(8):
        test_cv_filename = join(bock_cv_path,
                                '8-fold_cv_random_' + str(ii) + '.fold')
        train_fns = getTrainingFilenames(bock_annotations_path,
                                         test_cv_filename)
        feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights(
            train_fns, schluter_feature_data_path)

        filename_train_validation_set = join(schluter_feature_data_path,
                                             'temp',
                                             'feature_all_temporal_temp.h5')
        filename_labels_train_validation_set = join(
            schluter_feature_data_path, 'temp',
            'labels_train_set_all_temporal_temp.pickle.gz')
        filename_sample_weights = join(
            schluter_feature_data_path, 'temp',
            'sample_weights_all_temporal_temp.pickle.gz')
        filename_scaler = join(schluter_feature_data_path, 'temp',
                               'scaler_temporal_' + str(ii) + '.pickle.gz')
def syllableSeg_jordi_madmom_basecode(part,
                                      ii,
                                      model_name='jordi_timbral_schluter'):
    test_cv_filename = join(bock_cv_path,
                            '8-fold_cv_random_' + str(ii) + '.fold')
    train_fns = getTrainingFilenames(bock_annotations_path, test_cv_filename)
    feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights(
        train_fns,
        bock_feature_data_path_madmom_simpleSampleWeighting,
        n_pattern=15,
        nlen=7,
        scaling=True)
    filename_train_validation_set = join(
        bock_feature_data_path_madmom_simpleSampleWeighting, 'temp',
        'feature_all_' + model_name + '_temp_' + str(ii) + '.h5')
    filename_labels_train_validation_set = join(
        bock_feature_data_path_madmom_simpleSampleWeighting, 'temp',
        'labels_train_set_all_' + model_name + '_temp_' + str(ii) +
        '.pickle.gz')
    filename_sample_weights = join(
        bock_feature_data_path_madmom_simpleSampleWeighting, 'temp',
        'sample_weights_all_' + model_name + '_temp_' + str(ii) + '.pickle.gz')
    filename_scaler = join(
        bock_feature_data_path_madmom_simpleSampleWeighting, 'temp',
        'scaler_' + model_name +
        '_madmom_simpleSampleWeighting_early_stopping_' + str(ii) +
        '.pickle.gz')

    saveFeatureLabelSampleweights(feature_all, label_all, sample_weights_all,
                                  scaler, filename_train_validation_set,
                                  filename_labels_train_validation_set,
                                  filename_sample_weights, filename_scaler)

    timestamp1 = time.time()
    filename_train_validation_set_scratch = join(
        '/scratch/rgongcnnSyllableSeg_part' + str(part) + '_' + model_name +
        '/syllableSeg',
        'feature_all_' + model_name + '_temp_' + str(ii) + '.h5')
    shutil.copy2(filename_train_validation_set,
                 filename_train_validation_set_scratch)
    timestamp2 = time.time()
    print("Copying to scratch took %.2f seconds" % (timestamp2 - timestamp1))

    # train the model
    file_path_model = '/homedtic/rgong/cnnSyllableSeg/out/schulter_' + model_name + '_madmom_simpleSampleWeighting_early_stopping_adam_jan_params' + str(
        ii) + '.h5'
    file_path_log = '/homedtic/rgong/cnnSyllableSeg/out/log/schulter_' + model_name + '_madmom_simpleSampleWeighting_early_stopping_adam_jan_params' + str(
        ii) + '.csv'

    # filename_train_validation_set_scratch = filename_train_validation_set
    # file_path_model = '../../temp/schulter_'+model_name+'_madmom_simpleSampleWeighting_cv_'+str(ii)+'.h5'
    # file_path_log = '../../temp/schulter_'+model_name+'_madmom_simpleSampleWeighting_cv_'+str(ii)+'.csv'

    input_dim = (80, 15)

    train_model_validation(
        filename_train_validation_set=filename_train_validation_set_scratch,
        filename_labels_train_validation_set=
        filename_labels_train_validation_set,
        filename_sample_weights=filename_sample_weights,
        filter_density=1,
        dropout=0.5,
        input_shape=input_dim,
        file_path_model=file_path_model,
        filename_log=file_path_log,
        model_name=model_name)

    os.remove(filename_train_validation_set)
    os.remove(filename_labels_train_validation_set)
    os.remove(filename_sample_weights)
コード例 #4
0
                        help="Path of the pretrained model")

    args = parser.parse_args()

    for ii_fold in range(0, 8):

        if args.architecture in [
                'baseline', 'relu_dense', 'no_dense', 'temporal',
                '9_layers_cnn', '5_layers_cnn', 'retrained',
                'feature_extractor_a', 'feature_extractor_b'
        ]:

            # organize dataset -----------------------------------------------------------------------------------------
            test_cv_filename = os.path.join(
                args.path_cv, '8-fold_cv_random_' + str(ii_fold) + '.fold')
            train_fns = getTrainingFilenames(args.path_annotation,
                                             test_cv_filename)
            feature_all, label_all, sample_weights_all, scaler = concatenateFeatureLabelSampleweights(
                train_fns, args.path_input, n_pattern=15, nlen=7, scaling=True)

            # create the temp bock folder if not exists
            temp_folder_bock = os.path.join(args.path_input, 'temp')
            if not os.path.exists(temp_folder_bock):
                os.makedirs(temp_folder_bock)

            filename_train_validation_set = os.path.join(
                temp_folder_bock, 'feature_bock_' + str(ii_fold) + '.h5')
            filename_labels_train_validation_set = os.path.join(
                temp_folder_bock, 'labels_bock_' + str(ii_fold) + '.pkl')
            filename_sample_weights = os.path.join(
                temp_folder_bock,
                'sample_weights_bock_' + str(ii_fold) + '.pkl')
コード例 #5
0
def run_bock_training(path_input,
                      path_output,
                      bock_cv_path,
                      bock_annotations_path,
                      len_seq,
                      ii,
                      bidi=True):

    file_path_model = os.path.join(path_output, 'bidi_lstms_' + str(len_seq) + str(ii) + '.h5')
    file_path_log = os.path.join(path_output, 'bidi_lstms_' + str(len_seq) + str(ii) + '.csv')

    bock_feature_data_scratch_path = os.path.join(path_input)

    test_cv_filename = os.path.join(bock_cv_path, '8-fold_cv_random_' + str(ii) + '.fold')
    train_validation_fns = getTrainingFilenames(bock_annotations_path, test_cv_filename)

    # split the training set to train and validation sets
    train_fns, validation_fns = None, None
    rs = ShuffleSplit(n_splits=1, test_size=.1)
    for train_idx, validation_idx in rs.split(train_validation_fns):
        train_fns = [train_validation_fns[ti] for ti in train_idx]
        validation_fns = [train_validation_fns[vi] for vi in validation_idx]

    scaler = pickle.load(open(os.path.join(path_input, 'scaler_bock_phrase.pkl'), 'r'))

    batch_size = 256
    nb_epochs = 500
    best_val_loss = 1.0  # initialize the val_loss
    counter = 0
    patience = 15  # early stopping patience
    overlap = 10  # overlap frames

    input_shape = (batch_size, len_seq, 1, 80, 15)

    # initialize the model
    model = jan_original(filter_density=1,
                         dropout=0.5,
                         input_shape=input_shape,
                         batchNorm=False,
                         dense_activation='sigmoid',
                         channel=1,
                         stateful=False,
                         bidi=bidi)

    input_shape_val = (1, len_seq, 1, 80, 15)

    # initialize the model
    model_val = jan_original(filter_density=1,
                             dropout=0.5,
                             input_shape=input_shape_val,
                             batchNorm=False,
                             dense_activation='sigmoid',
                             channel=1,
                             stateful=False,
                             bidi=bidi)

    for ii_epoch in range(nb_epochs):

        batch_counter = 0

        # initialize the tensors
        mfcc_line_tensor = np.zeros(input_shape, dtype='float32')
        label_tensor = np.zeros((batch_size, len_seq, 1), dtype='int')
        sample_weights_tensor = np.zeros((batch_size, len_seq))

        # training
        for tfn in train_fns:

            mfcc_line, label, sample_weights = featureLabelSampleWeightsLoad(bock_feature_data_scratch_path,
                                                                             tfn,
                                                                             scaler)

            mfcc_line_pad, label_pad, sample_weights_pad, _ = \
                featureLabelSampleWeightsPad(mfcc_line, label, sample_weights, overlap)

            for ii in range((len(mfcc_line_pad)-len_seq)/overlap+1):

                idx_start = ii*overlap
                idx_end = idx_start + len_seq

                mfcc_seg = mfcc_line_pad[idx_start:idx_end]
                label_seg = label_pad[idx_start:idx_end]
                sample_weights_seg = sample_weights_pad[idx_start:idx_end]

                # feed the tensor
                mfcc_line_tensor[batch_counter, :, 0, :, :] = mfcc_seg
                label_tensor[batch_counter, :, 0] = label_seg
                sample_weights_tensor[batch_counter, :] = sample_weights_seg

                if batch_counter >= batch_size - 1:
                    train_loss, train_acc = model.train_on_batch(mfcc_line_tensor,
                                                                 label_tensor,
                                                                 sample_weight=sample_weights_tensor)
                    batch_counter = 0
                else:
                    batch_counter += 1

        # get weights for val
        weights_trained = model.get_weights()
        model_val.set_weights(weights_trained)

        # calculate losses
        train_loss = loss_cal(train_fns, bock_feature_data_scratch_path, scaler, model_val, len_seq)
        val_loss = loss_cal(validation_fns, bock_feature_data_scratch_path, scaler, model_val, len_seq)

        # save the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            counter = 0
            model.save_weights(file_path_model)
        else:
            counter += 1

        # write validation loss to csv
        writeValLossCsv(file_path_log, ii_epoch, val_loss, train_loss)

        # early stopping
        if counter >= patience:
            break

        random.shuffle(train_fns)