Exemple #1
0
def print_overlap_audio_df_best(all_songs):
    print('Overlap audio and df-best on this audio')
    audio_types = ['CHF_2017'] + filehandler.MIREX_SUBMISSION_NAMES

    result = dict()

    for audio_type in audio_types:
        wcsr_numerator = 0
        wcsr_denominator = 0

        for song_key in all_songs:
            song = all_songs[song_key]

            if audio_type == 'CHF_2017':
                audio_lab_str = song.full_chordify_chord_labs_path
            else:
                audio_lab_str = filehandler.get_full_mirex_chord_labs_path(
                    song, audio_type)
            df_lab_str = filehandler.get_data_fusion_path(
                song_key, 'DF', 'BEST', audio_type)

            if filehandler.file_exists(
                    audio_lab_str) and filehandler.file_exists(df_lab_str):
                wcsr_numerator += compare_chord_labels(
                    audio_lab_str, df_lab_str) * song.duration
                wcsr_denominator += song.duration

        print('Overlap between ' + audio_type + ' and ' + audio_type +
              '-DF-BEST (WCSR):' + str(wcsr_numerator / wcsr_denominator))

        result[audio_type] = wcsr_numerator / wcsr_denominator

    result_series = pandas.Series(result)
    return result_series
Exemple #2
0
def evaluate_tabs(all_songs) -> None:
    """
    Evaluate all lab files based on tab parsing and alignment.

    :param all_songs: All songs in our data set.
    """
    result_csv_path = filehandler.TABLABS_RESULTS_PATH
    if not path.isfile(result_csv_path):
        # Results were not calculated yet
        with open(result_csv_path, 'w') as write_file:
            for song_key in all_songs:
                song = all_songs[song_key]
                for tab_path in song.full_tab_paths:
                    tab_write_path = filehandler.get_full_tab_chord_labs_path(
                        tab_path)
                    if filehandler.file_exists(tab_write_path):
                        likelihood, transposition = filehandler.read_log_likelihood(
                            song_key, tab_write_path)
                        if filehandler.file_exists(tab_write_path):
                            csr, overseg, underseg, seg = evaluate(
                                song.full_ground_truth_chord_labs_path,
                                tab_write_path)
                            write_file.write(
                                '{0};{1};{2};{3};{4};{5};{6};{7};{8}\n'.format(
                                    str(song_key), str(song.duration),
                                    str(
                                        filehandler.get_relative_path(
                                            tab_write_path)), str(likelihood),
                                    str(transposition), str(csr), str(overseg),
                                    str(underseg), str(seg)))
Exemple #3
0
def print_overlap_df_best_methods(all_songs):
    print('Overlap audio types (df best)')

    result = dict()

    audio_types = ['CHF_2017'] + filehandler.MIREX_SUBMISSION_NAMES

    for audio_1 in audio_types:
        result[audio_1 + '-DF-BEST'] = dict()
        for audio_2 in audio_types:
            wcsr_numerator = 0
            wcsr_denominator = 0

            for song_key, song in all_songs.items():
                audio_1_df_lab = filehandler.get_data_fusion_path(
                    song_key, 'DF', 'BEST', audio_1)
                audio_2_df_lab = filehandler.get_data_fusion_path(
                    song_key, 'DF', 'BEST', audio_2)

                if filehandler.file_exists(
                        audio_1_df_lab) and filehandler.file_exists(
                            audio_2_df_lab):
                    wcsr_numerator += compare_chord_labels(
                        audio_1_df_lab, audio_2_df_lab) * song.duration
                    wcsr_denominator += song.duration

            result[audio_1 +
                   '-DF-BEST'][audio_2 +
                               '-DF-BEST'] = wcsr_numerator / wcsr_denominator

            print('Overlap between ' + audio_1 + '-DF-BEST and ' + audio_2 +
                  '-DF-BEST:' + str(wcsr_numerator / wcsr_denominator))

    result_df = pandas.DataFrame(result)
    return result_df
Exemple #4
0
def predict_single_song(song: Song, hmm_parameters: HMMParameters) -> None:
    """
    Estimate chords for each tab matched to the song and export them to a lab file.

    :param song: Song for which we estimate tab-based chords
    :param hmm_parameters: Parameters of the trained HMM
    """
    for full_tab_path in song.full_tab_paths:
        tab_chord_path = filehandler.get_chords_from_tab_filename(
            full_tab_path)
        tab_write_path = filehandler.get_full_tab_chord_labs_path(
            full_tab_path)
        if not filehandler.file_exists(tab_write_path):
            log_likelihood, transposition_semitone = \
                jump_align(tab_chord_path, song.full_audio_path, tab_write_path, hmm_parameters)
            if log_likelihood is not None:
                # We found an alignment, write this to our log-likelihoods file
                if not tab_write_path.startswith(filehandler.DATA_PATH):
                    print('WRITING ERROR')
                # Remove start of path
                tab_write_path = tab_write_path[len(filehandler.DATA_PATH) +
                                                1:]
                filehandler.write_log_likelihood(song.key, tab_write_path,
                                                 log_likelihood,
                                                 transposition_semitone)
def export_audio_features_for_song(song: Song) -> None:
    """
    Export the audio features of this song to a file.

    For this purpose, we use the python package librosa. First, we convert the audio file to mono. Then, we use the
    HPSS function to separate the harmonic and percussive elements of the audio. Then, we extract chroma from the
    harmonic part, using constant-Q transform with a sampling rate of 22050 and a hop length of 256 samples. Now we
    have chroma features for each sample, but we expect that the great majority of chord changes occurs on a beat.
    Therefore, we beat-synchronize the features: we run a beat-extraction function on the percussive part of the audio
    and average the chroma features between the consecutive beat positions. The chord annotations need to be
    beat-synchronized as well. We do this by taking the most prevalent chord label between beats. Each mean feature
    vector with the corresponding beat-synchronized chord label is regarded as one frame.

    :param song: Song for which we export the audio features
    """
    if song.full_ground_truth_chord_labs_path != '':
        # There are chord labels for this song
        write_path = filehandler.get_full_audio_features_path(song.key)
        if filehandler.file_exists(write_path):
            # We already extracted the audio features
            song.audio_features_path = write_path
        else:
            # We still need to extract the audio features.
            times_features_class = get_feature_ground_truth_matrix(
                song.full_audio_path, song.full_ground_truth_chord_labs_path)

            # Export the beat, feature and class matrix to the write_path (a binary .npy file)
            song.audio_features_path = write_path
            np.save(write_path, times_features_class)
Exemple #6
0
def align_single_song(
        song: Song,
        alignment_parameters: Optional[AlignmentParameters] = None):
    """
    Align each MIDI file that is matched to this song to the song. As part of the procedure, each MIDI will be
    synthesized and the alignment of each MIDI will be written to a file.

    :param alignment_parameters: Parameters for alignment
    :param song: The Song object for which we align each MIDI file
    """
    # Make sure to have alignment parameters
    if alignment_parameters is None:
        alignment_parameters = AlignmentParameters()

    audio_loaded = False
    audio_cqt = np.ndarray([])
    audio_times = np.ndarray([])

    for midi_path in song.full_midi_paths:
        midi_name = fh.get_file_name_from_full_path(midi_path)
        write_path = fh.get_full_alignment_path(midi_name)
        if not fh.file_exists(write_path):
            # There is no alignment yet for this audio-midi combination, so let's calculate the alignment
            try:
                synthesized_midi_path = fh.get_full_synthesized_midi_path(
                    midi_name)
                if not fh.file_exists(synthesized_midi_path):
                    # The MIDI has not been synthesized yet
                    synthesizer.synthesize_midi_to_wav(
                        midi_path, alignment_parameters.sampling_rate)

                if not audio_loaded:
                    # Load audio if it is not loaded yet
                    audio_data, _ = librosa.load(
                        song.full_audio_path,
                        sr=alignment_parameters.sampling_rate)
                    audio_cqt, audio_times = _compute_cqt(
                        audio_data, alignment_parameters)
                    audio_loaded = True
                align_midi(audio_cqt, audio_times, synthesized_midi_path,
                           write_path, alignment_parameters)
                fh.remove_file(synthesized_midi_path)
            except:
                print(write_path + " failed.")
Exemple #7
0
def classify_all_tabs_of_song(song: Song) -> None:
    """
    Classify all tabs of a song, by (1) LineType classification; (2) Segmenting lines; (3) System and Chord extraction.
    :param song: A Song in our data set, for which we want to parse all tabs
    """
    for tab_path in song.full_tab_paths:
        write_path = filehandler.get_chords_from_tab_filename(tab_path)
        if not filehandler.file_exists(write_path):
            untimed_chord_sequence = classify_tabs_from_file(tab_path)
            write_untimed_chord_sequence(write_path, untimed_chord_sequence)
Exemple #8
0
def print_overlap_audio_methods(all_songs):
    print('Overlap audio types (audio only)')

    result = dict()

    audio_types = ['CHF_2017'] + filehandler.MIREX_SUBMISSION_NAMES

    for audio_1 in audio_types:
        result[audio_1] = dict()
        for audio_2 in audio_types:
            wcsr_numerator = 0
            wcsr_denominator = 0

            for song_key, song in all_songs.items():
                if audio_1 == 'CHF_2017':
                    audio_1_lab = song.full_chordify_chord_labs_path
                else:
                    audio_1_lab = filehandler.get_full_mirex_chord_labs_path(
                        song, audio_1)

                if audio_2 == 'CHF_2017':
                    audio_2_lab = song.full_chordify_chord_labs_path
                else:
                    audio_2_lab = filehandler.get_full_mirex_chord_labs_path(
                        song, audio_2)

                if filehandler.file_exists(
                        audio_1_lab) and filehandler.file_exists(audio_2_lab):
                    wcsr_numerator += compare_chord_labels(
                        audio_1_lab, audio_2_lab) * song.duration
                    wcsr_denominator += song.duration

            result[audio_1][audio_2] = wcsr_numerator / wcsr_denominator

            print('Overlap between ' + audio_1 + ' and ' + audio_2 + ':' +
                  str(wcsr_numerator / wcsr_denominator))

    result_df = pandas.DataFrame(result)
    return result_df
Exemple #9
0
def data_fuse_song_with_actual_best_midi_and_tab(
        song: Song, chord_vocabulary: ChordVocabulary):
    """
    Data fuse a song using all combinations of selection and combination methods, write the final labels to .lab files

    :param song: The song on which we want to apply data fusion
    :param chord_vocabulary: The chord vocabulary
    """
    # Check if data fusion has already been calculated  TODO: make this check more robust
    if path.isfile(
            filehandler.get_data_fusion_path(song.key, 'df', 'actual-best',
                                             'CHF_2017')):
        return

    # Get list of audio lab files
    audio_labs = song.full_mirex_chord_lab_paths
    audio_labs['CHF_2017'] = song.full_chordify_chord_labs_path

    # Sample every 10ms, so 100 samples per second
    song_duration = song.duration
    nr_of_samples = int(ceil(song_duration * 100))

    # Turn the chords list (a list of (key, mode-str, chroma-list) tuples) into an chord_vocabulary (a list of strings)
    alphabet = ChordAlphabet(chord_vocabulary)

    selection_name = 'actual-best'
    lab_list = [get_actual_best_tab_lab(song)]
    # lab_list = [get_actual_best_midi_lab(song), get_actual_best_tab_lab(song)]

    # Fill a numpy array with chord labels for each of the lab files
    chord_matrix = np.zeros((len(lab_list) + 1, nr_of_samples), dtype=int)
    for lab_nr in range(len(lab_list)):
        load_lab_file_into_chord_matrix(lab_list[lab_nr], lab_nr, chord_matrix,
                                        alphabet, nr_of_samples)

    # Iterate over the audio types:
    for audio_name, audio_lab in audio_labs.items():
        if filehandler.file_exists(audio_lab):
            # Add the lab file to our chord matrix
            load_lab_file_into_chord_matrix(audio_lab, len(lab_list),
                                            chord_matrix, alphabet,
                                            nr_of_samples)
            final_labels_data_fusion = _data_fusion_chord_label_combination(
                chord_matrix, nr_of_samples, alphabet)
            _write_final_labels(
                final_labels_data_fusion,
                filehandler.get_data_fusion_path(song.key, 'df',
                                                 selection_name, audio_name),
                alphabet)
Exemple #10
0
def get_expected_best_tab_lab(song: Song) -> str:
    """
    Find the lab file of the expected best tab for this Song (based on log-likelihood returned by Jump Alignment)

    :param song: Song in our data set
    """
    best_tab_lab, best_tab_quality = '', 0

    for tab_path in song.full_tab_paths:
        tab_write_path = filehandler.get_full_tab_chord_labs_path(tab_path)
        if filehandler.file_exists(tab_write_path):
            tab_quality, _ = filehandler.read_log_likelihood(
                song.key, tab_path)
            if tab_quality > best_tab_quality:
                best_tab_lab, best_tab_quality = tab_write_path, tab_quality

    return best_tab_lab
Exemple #11
0
def classify_aligned_midis_for_song(song: Song,
                                    chord_vocabulary: ChordVocabulary,
                                    segmenter: MIDISegmenterInterface):
    """
    Find chord labels for all re-aligned MIDIs of this song

    :param song: Song object for which we want to find the chord labels
    :param chord_vocabulary: List of all chords
    :param segmenter: Bar or beat segmenter
    """
    for full_midi_path in song.full_midi_paths:
        midi_name = filehandler.get_file_name_from_full_path(full_midi_path)
        full_alignment_path = filehandler.get_full_alignment_path(midi_name)
        write_path = filehandler.get_full_midi_chord_labs_path(
            midi_name, segmenter.segmenter_name)
        if not filehandler.file_exists(write_path):
            # The file does not exist yet, so we need to find the chords
            # try:
            # Realign the MIDI using the alignment path
            realigned_midi = RealignedMIDI(full_midi_path, full_alignment_path)
            # Find Events, using the specified partition method
            events = segmenter.find_events(realigned_midi)
            # Assign most likely chords to each event
            most_likely_chords = _assign_most_likely_chords(
                events, chord_vocabulary)
            # Compute average chord probability
            midi_chord_probability = _compute_midi_chord_probability(
                most_likely_chords)
            # Concatenate annotation items with the same chord labels into one annotation.
            concatenated_annotation = _get_midi_chord_annotation(
                most_likely_chords)
            # Export results
            export_chord_annotation(concatenated_annotation, write_path)
            filehandler.write_midi_chord_probability(segmenter.segmenter_name,
                                                     midi_name,
                                                     midi_chord_probability)
Exemple #12
0
#     pool.apply_async(prepare_song, args=(all_songs[song_key],), callback=print)
# pool.close()
# pool.join()

for song_key in all_songs:
    print(prepare_song(all_songs[song_key]))

print('Pre-processing finished')

# Train HMM parameters for jump alignment
kf = KFold(n_splits=10, shuffle=True, random_state=42)
hmm_parameter_dict = {}
song_keys = list(all_songs.keys())
for train_indices, test_indices in kf.split(all_songs):
    hmm_parameters_path = filehandler.get_hmm_parameters_path(train_indices)
    if filehandler.file_exists(hmm_parameters_path):
        hmm_parameters = hmm_parameter_io.read_hmm_parameters_file(
            hmm_parameters_path)
    else:
        hmm_parameters = jump_alignment.train(chord_vocabulary, {
            song_keys[i]: all_songs[song_keys[i]]
            for i in list(train_indices)
        })
        hmm_parameter_io.write_hmm_parameters_file(hmm_parameters,
                                                   hmm_parameters_path)

    for test_index in test_indices:
        song_key = song_keys[test_index]
        hmm_parameter_dict[song_key] = hmm_parameters

print('HMM parameter training finished')
def export_result_image(song: Song,
                        chords_vocabulary: ChordVocabulary,
                        midi: bool = True,
                        tab: bool = True,
                        audio: str = 'CHF_2017',
                        df: bool = True):
    """
    Export visualisation to a png file.

    :param song: Song for which we want to export the visualisation
    :param chords_vocabulary: Chord vocabulary
    :param midi: Show MIDI files?
    :param tab: Show Tab files?
    :param audio: Audio ACE method
    :param df: Show all DF results?
    """
    if filehandler.file_exists(
            filehandler.get_lab_visualisation_path(song, audio)):
        return song.title + " was already visualised for the ACE method " + audio + "."

    nr_of_samples = int(ceil(song.duration * 100))
    alphabet = ChordAlphabet(chords_vocabulary)

    # Select labs based on parameter setting
    label_data = [{
        'name': 'Ground truth',
        'index': 0,
        'lab_path': song.full_ground_truth_chord_labs_path,
        'csr': 1.0,
        'ovs': 1.0,
        'uns': 1.0,
        'seg': 1.0
    }]
    i = 1
    best_indices = []  # For expected best MIDI and tab
    if midi:
        duplicate_midis = filehandler.find_duplicate_midis(song)
        best_midi_name, best_segmentation = data_fusion.get_expected_best_midi(
            song)
        full_midi_paths = song.full_midi_paths
        full_midi_paths.sort()
        for full_midi_path in full_midi_paths:
            midi_name = filehandler.get_file_name_from_full_path(
                full_midi_path)
            for segmentation_method in ['bar', 'beat']:
                full_midi_chords_path = filehandler.get_full_midi_chord_labs_path(
                    midi_name, segmentation_method)
                if filehandler.file_exists(full_midi_chords_path) \
                        and midi_name not in duplicate_midis:
                    # Evaluate song
                    csr, ovs, uns, seg = evaluate(
                        song.full_ground_truth_chord_labs_path,
                        full_midi_chords_path)
                    # Save evaluation values to label_data
                    label_data.append({
                        'name':
                        'MIDI ' + midi_name + ' | ' + segmentation_method,
                        'index': i,
                        'lab_path': full_midi_chords_path,
                        'csr': csr,
                        'ovs': ovs,
                        'uns': uns,
                        'seg': seg
                    })
                    # Check if this is the expected best MIDI & segmentation method for this song
                    if midi_name == best_midi_name and segmentation_method == best_segmentation:
                        best_indices.append(i)
                    i += 1

    if tab:
        best_tab = data_fusion.get_expected_best_tab_lab(song)
        for tab_counter, full_tab_path in enumerate(song.full_tab_paths, 1):
            tab_chord_labs_path = filehandler.get_full_tab_chord_labs_path(
                full_tab_path)
            if filehandler.file_exists(tab_chord_labs_path):
                # Evaluate song
                csr, ovs, uns, seg = evaluate(
                    song.full_ground_truth_chord_labs_path,
                    tab_chord_labs_path)
                # Save evaluation values to label_data
                label_data.append({
                    'name': 'Tab ' + str(tab_counter),
                    'index': i,
                    'lab_path': tab_chord_labs_path,
                    'csr': csr,
                    'ovs': ovs,
                    'uns': uns,
                    'seg': seg
                })
                if tab_chord_labs_path == best_tab:
                    best_indices.append(i)
                i += 1
    if df:
        csr, ovs, uns, seg = evaluate(
            song.full_ground_truth_chord_labs_path,
            filehandler.get_full_mirex_chord_labs_path(song, audio))
        label_data.append({
            'name':
            audio,
            'index':
            i,
            'lab_path':
            filehandler.get_full_mirex_chord_labs_path(song, audio),
            'csr':
            csr,
            'ovs':
            ovs,
            'uns':
            uns,
            'seg':
            seg
        })

        for selection_name in 'all', 'best':
            for combination_name in 'rnd', 'mv', 'df':
                df_lab_path = filehandler.get_data_fusion_path(
                    song.key, combination_name, selection_name, audio)
                csr, ovs, uns, seg = evaluate(
                    song.full_ground_truth_chord_labs_path, df_lab_path)
                label_data.append({
                    'name':
                    audio + '-' + combination_name.upper() + '-' +
                    selection_name.upper(),
                    'index':
                    i,
                    'lab_path':
                    df_lab_path,
                    'csr':
                    csr,
                    'ovs':
                    ovs,
                    'uns':
                    uns,
                    'seg':
                    seg
                })

    # Fill a numpy array with chord labels for each of the lab files
    chord_matrix = np.zeros((len(label_data), nr_of_samples), dtype=int)
    for lab_nr in range(len(label_data)):
        data_fusion.load_lab_file_into_chord_matrix(
            label_data[lab_nr]['lab_path'], lab_nr, chord_matrix, alphabet,
            nr_of_samples)

    all_chords = [chord_matrix[x] for x in range(len(label_data))]

    # Find names
    names = [label_dict['name'] for label_dict in label_data]

    # Find results
    results = ['CSR  OvS  UnS  Seg']
    for label_dict in label_data[1:]:
        results.append(' '.join([
            str(round(label_dict[measure], 2)).ljust(4, '0')
            for measure in ['csr', 'ovs', 'uns', 'seg']
        ]))

    # Show result
    plt1 = _show_chord_sequences(song, all_chords, best_indices, names,
                                 results, alphabet)

    plt1.savefig(filehandler.get_lab_visualisation_path(song, audio),
                 bbox_inches="tight",
                 pad_inches=0)

    return song.title + " was visualised for the ACE method " + audio + "."
Exemple #14
0
def data_fuse_song(song: Song, chord_vocabulary: ChordVocabulary):
    """
    Data fuse a song using all combinations of selection and combination methods, write the final labels to .lab files

    :param song: The song on which we want to apply data fusion
    :param chord_vocabulary: The chord vocabulary
    """
    # Check if data fusion has already been calculated  TODO: make this check more robust
    if path.isfile(
            filehandler.get_data_fusion_path(song.key, 'df', 'besttab',
                                             'CHF_2017')):
        return

    # Get list of symbolic lab files (all / expected best)
    # well_aligned_midis = get_well_aligned_midis(song)
    # all_symbolic_lab_paths = \
    #     [filehandler.get_full_midi_chord_labs_path(wam, 'bar') for wam in well_aligned_midis] + \
    #     [filehandler.get_full_midi_chord_labs_path(wam, 'beat') for wam in well_aligned_midis] + \
    #     [filehandler.get_full_tab_chord_labs_path(t) for t in song.full_tab_paths]
    # expected_best_symbolic_lab_paths = []
    # if well_aligned_midis:
    #     expected_best_symbolic_lab_paths.append(
    #         filehandler.get_full_midi_chord_labs_path(*get_expected_best_midi(song)))
    # if [filehandler.get_full_tab_chord_labs_path(t) for t in song.full_tab_paths]:
    #     expected_best_symbolic_lab_paths.append(
    #         filehandler.get_full_tab_chord_labs_path(get_expected_best_tab_lab(song)))

    well_aligned_midis = get_well_aligned_midis(song)
    all_midi_bar_lab_paths = [
        filehandler.get_full_midi_chord_labs_path(wam, 'bar')
        for wam in well_aligned_midis if filehandler.file_exists(
            filehandler.get_full_midi_chord_labs_path(wam, 'bar'))
    ]
    all_midi_beat_lab_paths = [
        filehandler.get_full_midi_chord_labs_path(wam, 'beat')
        for wam in well_aligned_midis if filehandler.file_exists(
            filehandler.get_full_midi_chord_labs_path(wam, 'beat'))
    ]
    all_midi_lab_paths = all_midi_bar_lab_paths + all_midi_beat_lab_paths
    all_tab_lab_paths = [
        filehandler.get_full_tab_chord_labs_path(t)
        for t in song.full_tab_paths
        if filehandler.file_exists(filehandler.get_full_tab_chord_labs_path(t))
    ]
    all_audio_lab_paths = {
        **song.full_mirex_chord_lab_paths,
        **{
            'CHF_2017': song.full_chordify_chord_labs_path
        }
    }

    # expected_best_symbolic_lab_paths = []
    # if well_aligned_midis:
    #     expected_best_symbolic_lab_paths.append(
    #         filehandler.get_full_midi_chord_labs_path(*get_expected_best_midi(song)))
    # if [filehandler.get_full_tab_chord_labs_path(t) for t in song.full_tab_paths]:
    #     expected_best_symbolic_lab_paths.append(
    #         filehandler.get_full_tab_chord_labs_path(get_expected_best_tab_lab(song)))

    expected_best_midi_lab_paths = []
    if well_aligned_midis:
        expected_best_midi_lab_paths.append(
            filehandler.get_full_midi_chord_labs_path(
                *get_expected_best_midi(song)))
    expected_best_tab_lab_paths = []
    if [
            filehandler.get_full_tab_chord_labs_path(t)
            for t in song.full_tab_paths
    ]:
        expected_best_tab_lab_paths.append(
            filehandler.get_full_tab_chord_labs_path(
                get_expected_best_tab_lab(song)))

    all_symbolic_lab_paths = all_midi_lab_paths + all_tab_lab_paths
    expected_best_symbolic_lab_paths = expected_best_midi_lab_paths + expected_best_tab_lab_paths

    # # Remove non-existing files (e.g. tab files in which too little chords were observed)
    # all_symbolic_lab_paths = [lab for lab in all_symbolic_lab_paths if filehandler.file_exists(lab)]
    # expected_best_symbolic_lab_paths = [lab for lab in expected_best_symbolic_lab_paths if filehandler.file_exists(lab)]

    # Get list of audio lab files
    # audio_labs = song.full_mirex_chord_lab_paths
    # audio_labs['CHF_2017'] = song.full_chordify_chord_labs_path

    # Sample every 10ms, so 100 samples per second
    song_duration = song.duration
    nr_of_samples = int(ceil(song_duration * 100))

    # Turn the chords list (a list of (key, mode-str, chroma-list) tuples) into an chord_vocabulary (a list of strings)
    alphabet = ChordAlphabet(chord_vocabulary)

    selection_dict = {
        'all': all_symbolic_lab_paths,
        'best': expected_best_symbolic_lab_paths,
        'allmidi': all_midi_lab_paths,
        'bestmidi': expected_best_midi_lab_paths,
        'alltab': all_tab_lab_paths,
        'besttab': expected_best_tab_lab_paths
    }

    # Iterate over the two types of selection (all / best)
    # for lab_list_i in [0, 1]:
    #     lab_list = [all_symbolic_lab_paths, expected_best_symbolic_lab_paths,
    #                 all_midi_lab_paths, expected_best_midi_lab_paths,
    #                 all_tab_lab_paths, expected_best_tab_lab_paths][lab_list_i]
    #     lab_list = [i for i in lab_list if i != '']
    # selection_name = ['all', 'best', 'allmidi', 'bestmidi', 'alltab', 'besttab'][lab_list_i]

    for selection_name, lab_list in selection_dict.items():
        lab_list = [i for i in lab_list if i != '']

        # Fill a numpy array with chord labels for each of the lab files
        chord_matrix = np.zeros((len(lab_list) + 1, nr_of_samples), dtype=int)
        for lab_nr in range(len(lab_list)):
            load_lab_file_into_chord_matrix(lab_list[lab_nr], lab_nr,
                                            chord_matrix, alphabet,
                                            nr_of_samples)

        # Iterate over the audio types:
        for audio_name, audio_lab in all_audio_lab_paths.items():
            if filehandler.file_exists(audio_lab):
                if any([
                        not filehandler.file_exists(
                            filehandler.get_data_fusion_path(
                                song.key, df_type_str, selection_name,
                                audio_name))
                        for df_type_str in ['rnd', 'mv', 'df']
                ]):
                    # Add the lab file to our chord matrix
                    load_lab_file_into_chord_matrix(audio_lab, len(lab_list),
                                                    chord_matrix, alphabet,
                                                    nr_of_samples)

                    # Iterate over the three combination types; calculate labels and write them:
                    if not filehandler.file_exists(
                            filehandler.get_data_fusion_path(
                                song.key, 'rnd', selection_name, audio_name)):
                        final_labels_random = _random_chord_label_combination(
                            chord_matrix, nr_of_samples)
                        _write_final_labels(
                            final_labels_random,
                            filehandler.get_data_fusion_path(
                                song.key, 'rnd', selection_name, audio_name),
                            alphabet)

                    if not filehandler.file_exists(
                            filehandler.get_data_fusion_path(
                                song.key, 'mv', selection_name, audio_name)):
                        final_labels_majority = _majority_vote_chord_label_combination(
                            chord_matrix, nr_of_samples, alphabet)
                        _write_final_labels(
                            final_labels_majority,
                            filehandler.get_data_fusion_path(
                                song.key, 'mv', selection_name, audio_name),
                            alphabet)

                    if not filehandler.file_exists(
                            filehandler.get_data_fusion_path(
                                song.key, 'df', selection_name, audio_name)):
                        final_labels_data_fusion = _data_fusion_chord_label_combination(
                            chord_matrix, nr_of_samples, alphabet)
                        _write_final_labels(
                            final_labels_data_fusion,
                            filehandler.get_data_fusion_path(
                                song.key, 'df', selection_name, audio_name),
                            alphabet)