Esempio n. 1
0
def getTrainingFilenames(annotation_path, cv_filename):
    """
    annotation filenames - cv test filenames
    :param annotation_path:
    :param cv_filename:
    :return:
    """
    annotation_fns = getRecordings(annotation_path)
    test_fns = annotationCvParser(cv_filename)
    train_fns = [x for x in annotation_fns if x not in test_fns]
    return train_fns
def dump_feature_onset_helper(audio_path, annotation_path, fn, channel):

    audio_fn = join(audio_path, fn + '.flac')
    annotation_fn = join(annotation_path, fn + '.onsets')

    mfcc = getMFCCBands2DMadmom(audio_fn, fs, hopsize_t, channel)

    print('Feature collecting ...', fn)

    times_onset = annotationCvParser(annotation_fn)
    times_onset = [float(to) for to in times_onset]
    # syllable onset frames
    frames_onset = np.array(np.around(np.array(times_onset) / hopsize_t), dtype=int)

    # line start and end frames
    frame_start = 0
    frame_end = mfcc.shape[0] - 1

    return mfcc, frames_onset, frame_start, frame_end
Esempio n. 3
0
def schluter_eval_subroutine(nfolds, pp_threshold, obs_cal, len_seq,
                             architecture, bock_cv_path, bock_cnn_model_path,
                             bock_audio_path, bock_annotations_path,
                             bock_results_path, detection_results_path,
                             jingju_cnn_model_path, full_path_jingju_scaler):

    for ii in range(nfolds):
        # load scaler
        if 'bidi_lstms' not in architecture:  # not CRNN
            # only for jingju + schulter datasets trained model
            # scaler_name_0 = 'scaler_jan_madmom_simpleSampleWeighting_early_stopping_schluter_jingju_dataset_'
            # + str(ii)+'.pickle.gz'

            if 'pretrained' in architecture:
                scaler_0 = pickle.load(open(full_path_jingju_scaler, 'rb'))
            else:
                if 'temporal' in architecture:
                    scaler_name_0 = 'scaler_bock_' + str(ii) + '.pickle.gz'
                else:
                    scaler_name_0 = 'scaler_bock_temporal_' + str(
                        ii) + '.pickle.gz'

                with gzip.open(join(bock_cnn_model_path, scaler_name_0),
                               'rb') as f:
                    scaler_0 = cPickle.load(f)
        else:  # CRNN
            scaler_name_0 = 'scaler_bock_phrase.pkl'
            scaler_0 = pickle.load(
                open(join(bock_cnn_model_path, scaler_name_0), 'rb'))

        # load model
        if 'pretrained' in architecture:
            model_name_0 = '5_layers_cnn0'
        else:
            model_name_0 = architecture + str(ii)

        model_name_1 = ''

        if obs_cal != 'tocal':
            model_keras_cnn_0 = None
            stateful = None
        else:
            if 'bidi_lstms' not in architecture:
                if 'pretrained' in architecture:
                    model_keras_cnn_0 = load_model(
                        join(jingju_cnn_model_path, model_name_0 + '.h5'))
                else:
                    model_keras_cnn_0 = load_model(
                        join(bock_cnn_model_path, model_name_0 + '.h5'))
                print(model_keras_cnn_0.summary())
            else:
                from training_scripts.models_CRNN import jan_original
                # initialize the model
                stateful = False
                bidi = True
                input_shape = (1, len_seq, 1, 80, 15)
                model_keras_cnn_0 = jan_original(filter_density=1,
                                                 dropout=0.5,
                                                 input_shape=input_shape,
                                                 batchNorm=False,
                                                 dense_activation='sigmoid',
                                                 channel=1,
                                                 stateful=stateful,
                                                 training=False,
                                                 bidi=bidi)
                # load weights
                model_keras_cnn_0.load_weights(
                    join(bock_cnn_model_path, model_name_0 + '.h5'))

        # load cross validation filenames
        test_cv_filename = join(bock_cv_path,
                                '8-fold_cv_random_' + str(ii) + '.fold')
        test_filenames = annotationCvParser(test_cv_filename)

        if 'pretrained' in architecture:
            model_name_0 = architecture + str(ii)

        # delete detection results path if it exists
        detection_results_path_model = join(detection_results_path,
                                            model_name_0)
        if os.path.exists(detection_results_path_model) and os.path.isdir(
                model_name_0):
            shutil.rmtree(model_name_0)

        for fn in test_filenames:
            if 'bidi_lstms' not in architecture:
                batch_process_onset_detection(
                    audio_path=bock_audio_path,
                    annotation_path=bock_annotations_path,
                    filename=fn,
                    scaler_0=scaler_0,
                    model_keras_cnn_0=model_keras_cnn_0,
                    model_name_0=model_name_0,
                    model_name_1=model_name_1,
                    pp_threshold=pp_threshold,
                    channel=1,
                    obs_cal=obs_cal,
                    architecture=architecture,
                    detection_results_path=detection_results_path)
            else:
                batch_process_onset_detection_phrase(
                    audio_path=bock_audio_path,
                    annotation_path=bock_annotations_path,
                    filename=fn,
                    scaler_0=scaler_0,
                    model_keras_cnn_0=model_keras_cnn_0,
                    model_name_0=model_name_0,
                    model_name_1=model_name_1,
                    pp_threshold=pp_threshold,
                    stateful=stateful,
                    obs_cal=obs_cal,
                    len_seq=len_seq,
                    detection_results_path=detection_results_path)

    print('threshold', pp_threshold)
    recall_precision_f1_fold, recall_precision_f1_overall = eval_bock(
        architecture=architecture,
        detection_results_path=detection_results_path,
        bock_annotations_path=bock_annotations_path)

    log_path = join(bock_results_path, varin['sample_weighting'],
                    architecture + '_' + 'threshold.txt')
    # log_path = join(schluter_results_path, weighting, 'schluter_jingju_model_threshold.txt')
    append_write = append_or_write(log_path)
    write_results_2_txt_schluter(log_path, append_write, pp_threshold,
                                 recall_precision_f1_overall)

    return recall_precision_f1_fold, recall_precision_f1_overall