コード例 #1
0
def test_encoding():
    symbtr_folder = './'
    symbtr_txt_folder = os.path.join(symbtr_folder, 'txt/')
    symbtr_mu2_folder = os.path.join(symbtr_folder, 'mu2/')

    symbtrtxtfiles = get_filenames_in_dir(symbtr_txt_folder,
                                          keyword='*.txt')[0]
    symbtrmu2files = get_filenames_in_dir(symbtr_mu2_folder,
                                          keyword='*.mu2')[0]

    isallvalid = True
    for txt in symbtrtxtfiles:
        out = subprocess.check_output("file -i " + txt, shell=True)
        # If there are no "Turkish" characters in a file, the encoding will be
        # seen as us-ascii which is a subset of UTF-8
        if not any(charset in out for charset in ['utf-8', 'us-ascii']):
            print out
            isallvalid = False

    for mu2 in symbtrmu2files:
        out = subprocess.check_output("file -i " + mu2, shell=True)
        # If there are no "Turkish" characters in a file, the encoding will be
        # seen as us-ascii which is a subset of UTF-8
        if not any(charset in out for charset in ['utf-8', 'us-ascii']):
            print out
            isallvalid = False

    assert isallvalid
コード例 #2
0
ファイル: format_tests.py プロジェクト: MTG/SymbTr
def test_encoding():
    symbtr_folder = './'
    symbtr_txt_folder = os.path.join(symbtr_folder, 'txt/')
    symbtr_mu2_folder = os.path.join(symbtr_folder, 'mu2/')

    symbtrtxtfiles = get_filenames_in_dir(symbtr_txt_folder,
                                          keyword='*.txt')[0]
    symbtrmu2files = get_filenames_in_dir(symbtr_mu2_folder,
                                          keyword='*.mu2')[0]

    isallvalid = True
    for txt in symbtrtxtfiles:
        out = subprocess.check_output("file -i " + txt, shell=True)
        # If there are no "Turkish" characters in a file, the encoding will be
        # seen as us-ascii which is a subset of UTF-8
        if not any(charset in out for charset in ['utf-8', 'us-ascii']):
            print out
            isallvalid = False

    for mu2 in symbtrmu2files:
        out = subprocess.check_output("file -i " + mu2, shell=True)
        # If there are no "Turkish" characters in a file, the encoding will be
        # seen as us-ascii which is a subset of UTF-8
        if not any(charset in out for charset in ['utf-8', 'us-ascii']):
            print out
            isallvalid = False

    assert isallvalid
def test_metadata():
    """
    This test checks if the MBIDs in annotations.json and ./metadata folder
    are consistent
    """
    anno_mbids = json.load(open('./annotations.json'))
    anno_mbids = set(os.path.split(aa['mbid'])[-1] for aa in anno_mbids)

    meta_mbids = get_filenames_in_dir('./data', keyword='*.json')[2]
    meta_mbids = set(os.path.splitext(mm)[0] for mm in meta_mbids)

    pitch_mbids = get_filenames_in_dir('./data', keyword='*.json')[2]
    pitch_mbids = set(os.path.splitext(mm)[0] for mm in pitch_mbids)

    missing_meta = anno_mbids - meta_mbids
    if missing_meta:
        print("Missing MBIDS in the metadata files in ./data folder. "
              "Please add them!")
        for mm in missing_meta:
            print('   {}'.format(mm))

            assert False, "Mismatch between the MBIDs in annotations.json " \
                          "and the metadata files in ./data folder"

    missing_anno = meta_mbids - anno_mbids
    if missing_anno:
        print("Extra MBIDS in the metadata files in ./data folder. "
              "Please remove them!")
        for ma in missing_anno:
            print('   {}'.format(ma))

            assert False, "Mismatch between the MBIDs in annotations.json " \
                          "and the metadata files in ./data folder"

    missing_pitch = anno_mbids - pitch_mbids
    if missing_pitch:
        print("Missing MBIDS in the pitch files in ./data folder. "
              "Please add them!")
        for mp in missing_pitch:
            print('   {}'.format(mp))

            assert False, "Mismatch between the MBIDs in annotations.json " \
                          "and the pitch files in ./data folder"

    missing_anno = pitch_mbids - anno_mbids
    if missing_anno:
        print("Extra MBIDS in the pitch files in ./data folder. "
              "Please remove them!")
        for ma in missing_anno:
            print('   {}'.format(ma))

            assert False, "Mismatch between the MBIDs in annotations.json " \
                          "and the pitch files in ./data folder"
コード例 #4
0
def test_metadata():
    """
    This test checks if the MBIDs in annotations.json and ./metadata folder
    are consistent
    """
    anno_mbids = json.load(open('./annotations.json'))
    anno_mbids = set(anno_mbids.keys())

    meta_mbids = get_filenames_in_dir('./metadata', keyword='*.json')[2]
    meta_mbids = set(os.path.splitext(mm)[0] for mm in meta_mbids)

    missing_meta = anno_mbids - meta_mbids
    if missing_meta:
        print "Missing these MBIDS the in ./metadata folder. Please add them!"
        for mm in missing_meta:
            print '   {}'.format(mm)

            assert False, "Mismatch between the MBIDs in annotations.json " \
                          "and ./metadata folder"

    missing_anno = meta_mbids - anno_mbids
    if missing_anno:
        print "Extra MBIDS in the ./metadata folder. Please remove them!"
        for ma in missing_anno:
            print '   {}'.format(ma)

            assert False, "Mismatch between the MBIDs in annotations.json " \
                          "and ./metadata folder"
コード例 #5
0
ファイル: pitch.py プロジェクト: altugkarakurt/morty
    def extract(cls, audiodir, start_idx=0):
        """
        Extract the predominant melody of all the audio recordings in the
        input folder and its subfolders
        :param audiodir: the audio directory
        :param start_idx: the index to start predominant melody extraction
        from the list of found audio recordings. This parameter is useful,
        if the user plans to run multiple instances of the extractor at once
        """
        # text file
        audio_files = get_filenames_in_dir(audiodir, keyword="*.mp3")[0]
        pitch_files = [os.path.join(os.path.dirname(f), os.path.basename(
            os.path.splitext(f)[0]) + '.pitch') for f in audio_files]

        if start_idx:  # if index is given
            audio_files = audio_files[start_idx:]
            pitch_files = pitch_files[start_idx:]

        for ii, (af, pf) in enumerate(zip(audio_files, pitch_files)):
            print(' ')
            print("{0:d}: {1:s}".format(ii + 1, os.path.basename(af)))

            if os.path.isfile(pf):  # already exists
                print("   > Already exist; skipped.")
            else:
                # extract and filter
                results = cls.extractor.run(af)
                pitch_track = cls.filter.run(results['pitch'])

                # save compact
                pitch_track = np.array(pitch_track)[:, 1]
                decimal_str = '%.' + str(cls.DECIMAL) + 'f'

                np.savetxt(pf, pitch_track, fmt=decimal_str)
コード例 #6
0
    def run(cls):
        data_folder = os.path.join('..', 'data')
        mp3_files = get_filenames_in_dir(data_folder, keyword='*.mp3')[0]
        audio_metadata = AudioMetadata(get_work_attributes=True,
                                       print_warnings=True)

        for ii, m in enumerate(mp3_files):
            save_file = os.path.splitext(m)[0] + '.json'
            if os.path.exists(save_file):
                temp_mbid = json.load(open(save_file))['mbid']
                if temp_mbid not in m:
                    print(m + ": does not match " + temp_mbid)
                continue

            print('{0:d}: {1:s}'.format(ii, m))
            # Get audio metadata
            audio_meta = audio_metadata.from_musicbrainz(m)

            vocal_instrument = []
            for a in audio_meta['artists']:
                choir_bool = a['type'] == 'vocal' and \
                             'attribute-list' in a.keys() and \
                             'choir_vocals' in a['attribute-list']
                if choir_bool:
                    vocal_instrument.append(a['attribute-list'])
                elif a['type'] in ['conductor']:
                    pass
                else:
                    vocal_instrument.append(a['type'])

            audio_meta['instrumentation_voicing'] = \
                cls.check_voice_instrumentation(vocal_instrument)

            json.dump(audio_meta, open(save_file, 'w'), indent=4)
コード例 #7
0
def test_metadata():
    """
    This test checks if the MBIDs in annotations.json and ./metadata folder
    are consistent
    """
    anno_mbids = json.load(open('./annotations.json'))
    anno_mbids = set(anno_mbids.keys())

    meta_mbids = get_filenames_in_dir('./metadata', keyword='*.json')[2]
    meta_mbids = set(os.path.splitext(mm)[0] for mm in meta_mbids)

    missing_meta = anno_mbids - meta_mbids
    if missing_meta:
        print "Missing these MBIDS the in ./metadata folder. Please add them!"
        for mm in missing_meta:
            print '   {}'.format(mm)

            assert False, "Mismatch between the MBIDs in annotations.json " \
                          "and ./metadata folder"

    missing_anno = meta_mbids - anno_mbids
    if missing_anno:
        print "Extra MBIDS in the ./metadata folder. Please remove them!"
        for ma in missing_anno:
            print '   {}'.format(ma)

            assert False, "Mismatch between the MBIDs in annotations.json " \
                          "and ./metadata folder"
コード例 #8
0
def getsymbtrnames():
    symbtr_folder = './'
    symbtr_txt_folder = os.path.join(symbtr_folder, 'txt/')
    symbtr_pdf_folder = os.path.join(symbtr_folder, 'SymbTr-pdf/')
    symbtr_mu2_folder = os.path.join(symbtr_folder, 'mu2/')
    symbtr_xml_folder = os.path.join(symbtr_folder, 'MusicXML/')
    symbtr_mid_folder = os.path.join(symbtr_folder, 'midi/')

    symbtr_work_file = os.path.join(symbtr_folder, 'symbTr_mbid.json')

    symbtrtxtnames = get_filenames_in_dir(symbtr_txt_folder,
                                          keyword='*.txt')[2]
    symbtrtxtnames = [s for s in symbtrtxtnames if not s[0] == '.']
    symbtrtxtnames = set([os.path.splitext(s)[0] for s in symbtrtxtnames])

    symbtrmu2names = get_filenames_in_dir(symbtr_mu2_folder,
                                          keyword='*.mu2')[2]
    symbtrmu2names = [s for s in symbtrmu2names if not s[0] == '.']
    symbtrmu2names = set([os.path.splitext(s)[0] for s in symbtrmu2names])

    symbtrpdfnames = get_filenames_in_dir(symbtr_pdf_folder,
                                          keyword='*.pdf')[2]
    symbtrpdfnames = [s for s in symbtrpdfnames if not s[0] == '.']
    symbtrpdfnames = set([os.path.splitext(s)[0] for s in symbtrpdfnames])

    symbtrxmlnames = get_filenames_in_dir(symbtr_xml_folder,
                                          keyword='*.xml')[2]
    symbtrxmlnames = [s for s in symbtrxmlnames if not s[0] == '.']
    symbtrxmlnames = set([os.path.splitext(s)[0] for s in symbtrxmlnames])

    symbtrmidnames = get_filenames_in_dir(symbtr_mid_folder,
                                          keyword='*.mid')[2]
    symbtrmidnames = [s for s in symbtrmidnames if not s[0] == '.']
    symbtrmidnames = set([os.path.splitext(s)[0] for s in symbtrmidnames])

    symbtr_work = json.load(open(symbtr_work_file, 'r'))
    symbtrjsonnames = set(s['name'] for s in symbtr_work)

    return (symbtrtxtnames, symbtrmu2names, symbtrpdfnames, symbtrxmlnames,
            symbtrmidnames, symbtrjsonnames)
コード例 #9
0
ファイル: format_tests.py プロジェクト: MTG/SymbTr
def getsymbtrnames():
    symbtr_folder = './'
    symbtr_txt_folder = os.path.join(symbtr_folder, 'txt/')
    symbtr_pdf_folder = os.path.join(symbtr_folder, 'SymbTr-pdf/')
    symbtr_mu2_folder = os.path.join(symbtr_folder, 'mu2/')
    symbtr_xml_folder = os.path.join(symbtr_folder, 'MusicXML/')
    symbtr_mid_folder = os.path.join(symbtr_folder, 'midi/')

    symbtr_work_file = os.path.join(symbtr_folder, 'symbTr_mbid.json')

    symbtrtxtnames = get_filenames_in_dir(symbtr_txt_folder,
                                          keyword='*.txt')[2]
    symbtrtxtnames = [s for s in symbtrtxtnames if not s[0] == '.']
    symbtrtxtnames = set([os.path.splitext(s)[0] for s in symbtrtxtnames])

    symbtrmu2names = get_filenames_in_dir(symbtr_mu2_folder,
                                          keyword='*.mu2')[2]
    symbtrmu2names = [s for s in symbtrmu2names if not s[0] == '.']
    symbtrmu2names = set([os.path.splitext(s)[0] for s in symbtrmu2names])

    symbtrpdfnames = get_filenames_in_dir(symbtr_pdf_folder,
                                          keyword='*.pdf')[2]
    symbtrpdfnames = [s for s in symbtrpdfnames if not s[0] == '.']
    symbtrpdfnames = set([os.path.splitext(s)[0] for s in symbtrpdfnames])

    symbtrxmlnames = get_filenames_in_dir(symbtr_xml_folder,
                                          keyword='*.xml')[2]
    symbtrxmlnames = [s for s in symbtrxmlnames if not s[0] == '.']
    symbtrxmlnames = set([os.path.splitext(s)[0] for s in symbtrxmlnames])

    symbtrmidnames = get_filenames_in_dir(symbtr_mid_folder,
                                          keyword='*.mid')[2]
    symbtrmidnames = [s for s in symbtrmidnames if not s[0] == '.']
    symbtrmidnames = set([os.path.splitext(s)[0] for s in symbtrmidnames])

    symbtr_work = json.load(open(symbtr_work_file, 'r'))
    symbtrjsonnames = set(s['name'] for s in symbtr_work)

    return (symbtrtxtnames, symbtrmu2names, symbtrpdfnames,
            symbtrxmlnames, symbtrmidnames, symbtrjsonnames)
コード例 #10
0
    def extract(cls, audiodir, start_idx=0):
        """
        Extract the predominant melody of all the audio recordings in the
        input folder and its subfolders
        :param audiodir: the audio directory
        :param start_idx: the index to start predominant melody extraction
        from the list of found audio recordings. This parameter is useful,
        if the user plans to run multiple instances of the extractor at once
        """
        # text file
        audio_files = get_filenames_in_dir(audiodir, keyword="*.mp3")[0]
        pitch_files = [
            os.path.join(os.path.dirname(f),
                         os.path.basename(os.path.splitext(f)[0]) + '.pitch')
            for f in audio_files
        ]

        if start_idx:  # if index is given
            audio_files = audio_files[start_idx:]
            pitch_files = pitch_files[start_idx:]

        for ii, (af, pf) in enumerate(zip(audio_files, pitch_files)):
            print(' ')
            print("{0:d}: {1:s}".format(ii + 1, os.path.basename(af)))

            if os.path.isfile(pf):  # already exists
                print("   > Already exist; skipped.")
            else:
                # extract and filter
                results = cls.extractor.run(af)
                pitch_track = cls.filter.run(results['pitch'])

                # save compact
                pitch_track = np.array(pitch_track)[:, 1]
                decimal_str = '%.' + str(cls.DECIMAL) + 'f'

                np.savetxt(pf, pitch_track, fmt=decimal_str)
コード例 #11
0
def search_min_peak_ratio(step_size, kernel_width, distribution_type,
                          min_peak_ratio):
    base_folder = os.path.join('data', 'features')
    feature_folder = os.path.abspath(io.get_folder(
        base_folder, distribution_type, step_size, kernel_width))
    files = get_filenames_in_dir(feature_folder, keyword='*pdf.json')[0]
    evaluator = Evaluator()
    num_peaks = 0
    num_tonic_in_peaks = 0
    for f in files:
        dd = json.load(open(f))
        dd['feature'] = PitchDistribution.from_dict(dd['feature'])

        peak_idx = dd['feature'].detect_peaks(min_peak_ratio=min_peak_ratio)[0]
        peak_cents = dd['feature'].bins[peak_idx]
        peak_freqs = Converter.cent_to_hz(peak_cents, dd['tonic'])

        ev = [evaluator.evaluate_tonic(pp, dd['tonic'])['tonic_eval']
              for pp in peak_freqs]

        num_tonic_in_peaks += any(ev)
        num_peaks += len(ev)

    return num_tonic_in_peaks, num_peaks
コード例 #12
0
# get the input index
if len(sys.argv) == 1:
    idx = []
elif len(sys.argv) == 2:  # for parallelization
    idx = int(sys.argv[1])
else:
    raise ValueError('Only accepts zero or one argument')

print(idx)

extractor = PredominantMelodyMakam()
audiodir = './'  # audio folder and sub folders

# text file
audio_files = get_filenames_in_dir(audiodir, keyword="*.mp3")[0]
txtfiles = [os.path.join(os.path.dirname(f), os.path.basename(
    os.path.splitext(f)[0]) + '.pitch') for f in audio_files]

if idx:  # if index is given
    audio_files = [audio_files[idx]]
    txtfiles = [txtfiles[idx]]

for ii, mp3 in enumerate(audio_files):
    print("{0:d}: {1:s}".format(ii + 1, os.path.basename(mp3)))

    if os.path.isfile(txtfiles[ii]):  # already exists
        print("   > Already exist; skipped.")
    else:
        results = extractor.run(mp3)
コード例 #13
0
ファイル: melodyextraction.py プロジェクト: nomadics/morty
# get the input index
if len(sys.argv) == 1:
    idx = []
elif len(sys.argv) == 2:  # for parallelization
    idx = int(sys.argv[1])
else:
    raise ValueError('Only accepts zero or one argument')

print(idx)

extractor = PredominantMelodyMakam()
audiodir = './'  # audio folder and sub folders

# text file
audio_files = get_filenames_in_dir(audiodir, keyword="*.mp3")[0]
txtfiles = [
    os.path.join(os.path.dirname(f),
                 os.path.basename(os.path.splitext(f)[0]) + '.pitch')
    for f in audio_files
]

if idx:  # if index is given
    audio_files = [audio_files[idx]]
    txtfiles = [txtfiles[idx]]

for ii, mp3 in enumerate(audio_files):
    print("{0:d}: {1:s}".format(ii + 1, os.path.basename(mp3)))

    if os.path.isfile(txtfiles[ii]):  # already exists
        print("   > Already exist; skipped.")
コード例 #14
0
ファイル: foldgenerator.py プロジェクト: nomadics/morty
    def stratified_k_fold(cls,
                          data_dir,
                          annotation_in,
                          n_folds=10,
                          random_state=None):
        """
        Generates stratified k folds from the audio_recordings in the
        data_dir. The stratification is applied according to the makam
        annotations
        :param data_dir: (str) data directory
        :param annotation_in: (str) json file or dictionary, which stores the
               annotations
               The loaded variable is a list of dictionaries, where each
               dictionary have the "mbid", "tonic" (frequency) and "makam"
               (name) keys, e.g.
               [
                 {
                   "mbid": "0db48ce4-f018-4d7d-b75e-66a64db72067",
                   "tonic": 151.1,
                   "makam": "Hicaz"
                 },
                 {
                   "mbid": "2c88acdf-685d-42c7-913d-1a9f2005587e",
                   "tonic": 292.5,
                   "makam": "Hicaz"
                 }
                 ...
               ]
        :param n_folds: (int) number of stratified folds requested
        :param random_state: (None, int or RandomState) pseudo-random number
               generator state used for shuffling. If None, use default numpy
               RNG for shuffling.
        :return: list of folds. each fold is organized as a dict with two keys
               "test" and "train". These keys store a list of dicts, where each
               dict has the "file", recording "MBID", (annotated) "tonic"
               and (annotated) "mode" keys, e.g:
               {'test': [
                   {'file': '0b45417b-acb4-4f8a-b180-5ad45be889af.pitch',
                    'mbid': u'0b45417b-acb4-4f8a-b180-5ad45be889af',
                    'mode': u'Saba',
                    'tonic': 328.3},
                   {'file': '3c25f0d8-a6df-4bde-87ef-e4af708b861d.pitch',
                    'mbid': u'3c25f0d8-a6df-4bde-87ef-e4af708b861d',
                    'mode': u'Hicaz',
                    'tonic': 150.0},
                    ...],
                'train': [
                   {...}]
        """
        modes = cls._get_mode_names(data_dir)
        [file_paths, base_folders,
         file_names] = get_filenames_in_dir(data_dir, keyword='*.pitch')

        try:  # json file
            annotations = json.load(open(annotation_in, 'r'))
        except TypeError:  # list of dict
            annotations = annotation_in

        file_modes, mbids, tonics = cls._parse_mbid_mode_tonic(
            annotations, file_names, base_folders)

        # get the stratified folds
        mode_idx = [modes.index(m) for m in file_modes]
        skf = cross_validation.StratifiedKFold(mode_idx,
                                               n_folds=n_folds,
                                               shuffle=True,
                                               random_state=random_state)

        folds = cls._organize_folds(skf, file_paths, mbids, file_modes, tonics)

        return folds
コード例 #15
0
def get_txt_filenames():
    symbtr_txt_folder = './txt/'
    return get_filenames_in_dir(symbtr_txt_folder, keyword='*.txt')
コード例 #16
0
ファイル: foldgenerator.py プロジェクト: altugkarakurt/morty
    def stratified_k_fold(cls, data_dir, annotation_in, n_folds=10,
                          random_state=None):
        """
        Generates stratified k folds from the audio_recordings in the
        data_dir. The stratification is applied according to the makam
        annotations
        :param data_dir: (str) data directory
        :param annotation_in: (str) json file or dictionary, which stores the
               annotations
               The loaded variable is a list of dictionaries, where each
               dictionary have the "mbid", "tonic" (frequency) and "makam"
               (name) keys, e.g.
               [
                 {
                   "mbid": "0db48ce4-f018-4d7d-b75e-66a64db72067",
                   "tonic": 151.1,
                   "makam": "Hicaz"
                 },
                 {
                   "mbid": "2c88acdf-685d-42c7-913d-1a9f2005587e",
                   "tonic": 292.5,
                   "makam": "Hicaz"
                 }
                 ...
               ]
        :param n_folds: (int) number of stratified folds requested
        :param random_state: (None, int or RandomState) pseudo-random number
               generator state used for shuffling. If None, use default numpy
               RNG for shuffling.
        :return: list of folds. each fold is organized as a dict with two keys
               "test" and "train". These keys store a list of dicts, where each
               dict has the "file", recording "MBID", (annotated) "tonic"
               and (annotated) "mode" keys, e.g:
               {'test': [
                   {'file': '0b45417b-acb4-4f8a-b180-5ad45be889af.pitch',
                    'mbid': u'0b45417b-acb4-4f8a-b180-5ad45be889af',
                    'mode': u'Saba',
                    'tonic': 328.3},
                   {'file': '3c25f0d8-a6df-4bde-87ef-e4af708b861d.pitch',
                    'mbid': u'3c25f0d8-a6df-4bde-87ef-e4af708b861d',
                    'mode': u'Hicaz',
                    'tonic': 150.0},
                    ...],
                'train': [
                   {...}]
        """
        modes = cls._get_mode_names(data_dir)
        [file_paths, base_folders, file_names] = get_filenames_in_dir(
            data_dir, keyword='*.pitch')

        try:  # json file
            annotations = json.load(open(annotation_in, 'r'))
        except TypeError:  # list of dict
            annotations = annotation_in

        file_modes, mbids, tonics = cls._parse_mbid_mode_tonic(
            annotations, file_names, base_folders)

        # get the stratified folds
        mode_idx = [modes.index(m) for m in file_modes]
        skf = cross_validation.StratifiedKFold(
            mode_idx, n_folds=n_folds, shuffle=True, random_state=random_state)

        folds = cls._organize_folds(skf, file_paths, mbids, file_modes, tonics)

        return folds
コード例 #17
0
def get_mu2_filenames():
    symbtr_mu2_folder = './mu2/'
    return get_filenames_in_dir(symbtr_mu2_folder, keyword='*.mu2')
コード例 #18
0
def test(step_size, kernel_width, distribution_type,
         model_type, fold_idx, experiment_type, dis_measure, k_neighbor,
         min_peak_ratio, rank, save_folder, overwrite=False):

    # file to save the results
    res_dict = {'saved': [], 'failed': [], 'skipped': []}
    test_folder = os.path.abspath(os.path.join(io.get_folder(
        os.path.join(save_folder, 'testing', experiment_type), model_type,
        distribution_type, step_size, kernel_width, dis_measure,
        k_neighbor, min_peak_ratio), 'fold{0:d}'.format(fold_idx)))
    results_file = os.path.join(test_folder, 'results.json')
    if not os.path.exists(test_folder):
        os.makedirs(test_folder)
    else:
        if overwrite:
            shutil.rmtree(test_folder, ignore_errors=True)
            os.makedirs(test_folder)
        elif os.path.exists(results_file):
            return u"{0:s} already has results.".format(test_folder)

    # load fold
    fold_file = os.path.join(save_folder, 'folds.json')
    folds = json.load(open(fold_file))
    test_fold = []
    for f in folds:
        if f[0] == fold_idx:
            test_fold = f[1]['testing']
            break

    assert len(test_fold) == 100, "There should be 100 samples in the test " \
                                  "fold"

    # load training model
    training_folder = os.path.abspath(io.get_folder(
        os.path.join(save_folder, 'training'), model_type,
        distribution_type, step_size, kernel_width))

    model_file = os.path.join(training_folder,
                              u'fold{0:d}.json'.format(fold_idx))
    model = json.load(open(model_file))
    # instantiate the PitchDistributions
    for i, m in enumerate(model):
        try:  # filepath given
            model[i] = json.load(open(os.path.join(save_folder, m)))
        except (TypeError, AttributeError):  # dict already loaded
            assert isinstance(m['feature'], dict), "Unknown model."
        model[i]['feature'] = PitchDistribution.from_dict(
            model[i]['feature'])
        try:
            if any(test_sample['source'] in model[i]['sources']
                   for test_sample in test_fold):
                raise RuntimeError('Test data uses training data!')
        except KeyError:
            if any(test_sample['source'] == model[i]['source']
                   for test_sample in test_fold):
                raise RuntimeError('Test data uses training data!')

    for test_sample in test_fold:
        # get MBID from pitch file
        mbid = test_sample['source']
        save_file = os.path.join(test_folder, u'{0:s}.json'.format(mbid))
        if not overwrite and os.path.exists(save_file):
            res_dict['skipped'].append(save_file)
            continue

        # instantiate the classifier and evaluator object
        classifier = KNNClassifier(
            step_size=step_size, kernel_width=kernel_width,
            feature_type=distribution_type, model=copy.deepcopy(model))

        # if the model_type is multi and the test data is in the model,
        # remove it
        if model_type == 'multi':
            for i, m in enumerate(classifier.model):
                if mbid in m:
                    del classifier.model[i]
                    break

        try:
            # we use the pitch instead of the distribution already computed in
            # the feature extraction. those distributions are normalized wrt
            # tonic to one of the bins centers will exactly correspond to
            # the tonic freq. therefore it would be cheating
            pitch = np.loadtxt(test_sample['pitch'])
            if experiment_type == 'tonic':  # tonic identification
                results = classifier.estimate_tonic(
                    pitch, test_sample['mode'], min_peak_ratio=min_peak_ratio,
                    distance_method=dis_measure, k_neighbor=k_neighbor,
                    rank=rank)
            elif experiment_type == 'mode':  # mode recognition
                results = classifier.estimate_mode(
                    pitch, test_sample['tonic'], distance_method=dis_measure,
                    k_neighbor=k_neighbor, rank=rank)
            elif experiment_type == 'joint':  # joint estimation
                results = classifier.estimate_joint(
                    pitch, min_peak_ratio=min_peak_ratio,
                    distance_method=dis_measure, k_neighbor=k_neighbor,
                    rank=rank)
            else:
                raise ValueError("Unknown experiment_type")

            # save results
            json.dump(results, open(save_file, 'w'))
            res_dict['saved'].append(save_file)
        except:
            res_dict['failed'].append(save_file)

    if not res_dict['failed']:
        computed = get_filenames_in_dir(test_folder, keyword='*.json')[0]
        assert len(computed) == 100, 'There should have been 100 tested files.'

        results = {}
        for c in computed:
            mbid = os.path.splitext(os.path.split(c)[-1])[0]
            results[mbid] = json.load(open(c))

        json.dump(results, open(results_file, 'w'), indent=4)
        for c in computed:
            os.remove(c)
    return res_dict
コード例 #19
0
def evaluate(step_size, kernel_width, distribution_type, model_type,
             experiment_type, dis_measure, k_neighbor, min_peak_ratio,
             result_folder):
    test_folder = os.path.abspath(os.path.join(io.get_folder(
        os.path.join(result_folder, 'testing', experiment_type), model_type,
        distribution_type, step_size, kernel_width, dis_measure,
        k_neighbor, min_peak_ratio)))
    result_files = get_filenames_in_dir(test_folder,
                                        keyword='*results.json')[0]

    anno_file = './data/ottoman_turkish_makam_recognition_dataset' \
                '/annotations.json'
    annotations = json.load(open(anno_file))
    makam_labels = np.unique([a['makam'] for a in annotations]).tolist()
    evaluator = Evaluator()

    tmp_bins = np.arange(0, 1200, step_size)
    if experiment_type == 'tonic':
        eval_folds = {'num_correct_tonic': 0, 'tonic_accuracy': 0,
                      'tonic_deviation_distribution': PitchDistribution(
                          tmp_bins, np.zeros(np.shape(tmp_bins)),
                          kernel_width=0, ref_freq=None)}
    elif experiment_type == 'mode':
        eval_folds = {'num_correct_mode': 0, 'mode_accuracy': 0,
                      'confusion_matrix': {
                          'matrix': np.zeros((len(makam_labels),
                                              len(makam_labels))),
                          'labels': makam_labels}
                      }
    else:
        eval_folds = {'num_correct_tonic': 0, 'tonic_accuracy': 0,
                      'num_correct_mode': 0, 'mode_accuracy': 0,
                      'num_correct_joint': 0, 'joint_accuracy': 0,
                      'tonic_deviation_distribution': PitchDistribution(
                          tmp_bins, np.zeros(np.shape(tmp_bins)),
                          kernel_width=0, ref_freq=None),
                      'confusion_matrix': {
                          'matrix': np.zeros((len(makam_labels),
                                              len(makam_labels))),
                          'labels': makam_labels}
                      }

    for rf in result_files:
        res = json.load(open(rf))
        eval_file = os.path.join(os.path.dirname(rf), 'evaluation.json')

        rec_ev = []
        for aa in annotations:
            mbid = os.path.split(aa['mbid'])[-1]

            if mbid in res.keys():  # in testing data
                if experiment_type == 'tonic':
                    rec_ev.append(evaluator.evaluate_tonic(res[mbid][0][0],
                                                           aa['tonic'], mbid))
                    rec_ev[-1]['tonic_eval'] = rec_ev[-1]['tonic_eval'].\
                        tolist()
                    rec_ev[-1]['same_octave'] = rec_ev[-1]['same_octave'].\
                        tolist()

                elif experiment_type == 'mode':
                    rec_ev.append(evaluator.evaluate_mode(res[mbid][0][0],
                                                          aa['makam'], mbid))

                else:
                    rec_ev.append(evaluator.evaluate_joint(
                        [res[mbid][0][0][0], aa['tonic']],
                        [res[mbid][0][0][1], aa['makam']], mbid))

                    rec_ev[-1]['tonic_eval'] = rec_ev[-1]['tonic_eval'].\
                        tolist()
                    rec_ev[-1]['same_octave'] = rec_ev[-1]['same_octave'].\
                        tolist()
                    try:
                        rec_ev[-1]['joint_eval'] = rec_ev[-1]['joint_eval'].\
                            tolist()
                    except AttributeError:
                        # TODO: find out why i've put an exception here
                        pass

        ev = {'per_recording': rec_ev, 'overall': {}}
        try:
            ev['overall']['num_correct_tonic'] = sum(re['tonic_eval']
                                                     for re in rec_ev)
            ev['overall']['tonic_accuracy'] = (
                ev['overall']['num_correct_tonic'] / len(rec_ev))

            ev['overall']['tonic_deviation_distribution'] = \
                PitchDistribution.from_cent_pitch(
                    [re['cent_diff'] for re in rec_ev], ref_freq=None,
                    step_size=step_size, kernel_width=0)

            try:  # force to pcd
                ev['overall']['tonic_deviation_distribution'].to_pcd()
            except AssertionError:
                pass

            eval_folds['num_correct_tonic'] += ev['overall'][
                'num_correct_tonic']
            eval_folds['tonic_deviation_distribution'].vals +=\
                ev['overall']['tonic_deviation_distribution'].vals

            ev['overall']['tonic_deviation_distribution'] = \
                ev['overall']['tonic_deviation_distribution'].to_dict()
        except KeyError:
            pass
        try:
            ev['overall']['num_correct_mode'] = sum(re['mode_eval']
                                                    for re in rec_ev)
            ev['overall']['mode_accuracy'] = (
                ev['overall']['num_correct_mode'] / len(rec_ev))

            ev['overall']['confusion_matrix'] = {
                'matrix': confusion_matrix(
                    [re['annotated_mode'] for re in rec_ev],
                    [re['estimated_mode'] for re in rec_ev],
                    labels=makam_labels),
                'labels': makam_labels}

            eval_folds['num_correct_mode'] += ev['overall'][
                'num_correct_mode']

            eval_folds['confusion_matrix']['matrix'] +=\
                ev['overall']['confusion_matrix']['matrix']

            ev['overall']['confusion_matrix']['matrix'] = \
                ev['overall']['confusion_matrix']['matrix'].astype(int).tolist()

        except KeyError:
            pass
        try:
            ev['overall']['num_correct_joint'] = sum(re['joint_eval']
                                                     for re in rec_ev)
            ev['overall']['joint_accuracy'] = (
                ev['overall']['num_correct_joint'] / len(rec_ev))

            eval_folds['num_correct_joint'] += ev['overall'][
                'num_correct_joint']
        except KeyError:
            pass

        json.dump(ev, open(eval_file, 'w'))

    if experiment_type == 'tonic':
        eval_folds['tonic_accuracy'] = eval_folds['num_correct_tonic'] / 10
        eval_folds['tonic_deviation_distribution'] = \
            eval_folds['tonic_deviation_distribution'].to_dict()
    elif experiment_type == 'mode':
        eval_folds['mode_accuracy'] = eval_folds['num_correct_mode'] / 10
        eval_folds['confusion_matrix']['matrix'] = \
            eval_folds['confusion_matrix']['matrix'].astype(int).tolist()
    else:
        eval_folds['tonic_accuracy'] = eval_folds['num_correct_tonic'] / 10
        eval_folds['mode_accuracy'] = eval_folds['num_correct_mode'] / 10
        eval_folds['joint_accuracy'] = eval_folds['num_correct_joint'] / 10

        eval_folds['tonic_deviation_distribution'] = \
            eval_folds['tonic_deviation_distribution'].to_dict()
        eval_folds['confusion_matrix']['matrix'] = \
            eval_folds['confusion_matrix']['matrix'].tolist()

    json.dump(eval_folds,
              open(os.path.join(test_folder, 'overall_eval.json'), 'w'))

    return u'{0:s} done'.format(test_folder)