def test_encoding(): symbtr_folder = './' symbtr_txt_folder = os.path.join(symbtr_folder, 'txt/') symbtr_mu2_folder = os.path.join(symbtr_folder, 'mu2/') symbtrtxtfiles = get_filenames_in_dir(symbtr_txt_folder, keyword='*.txt')[0] symbtrmu2files = get_filenames_in_dir(symbtr_mu2_folder, keyword='*.mu2')[0] isallvalid = True for txt in symbtrtxtfiles: out = subprocess.check_output("file -i " + txt, shell=True) # If there are no "Turkish" characters in a file, the encoding will be # seen as us-ascii which is a subset of UTF-8 if not any(charset in out for charset in ['utf-8', 'us-ascii']): print out isallvalid = False for mu2 in symbtrmu2files: out = subprocess.check_output("file -i " + mu2, shell=True) # If there are no "Turkish" characters in a file, the encoding will be # seen as us-ascii which is a subset of UTF-8 if not any(charset in out for charset in ['utf-8', 'us-ascii']): print out isallvalid = False assert isallvalid
def test_metadata(): """ This test checks if the MBIDs in annotations.json and ./metadata folder are consistent """ anno_mbids = json.load(open('./annotations.json')) anno_mbids = set(os.path.split(aa['mbid'])[-1] for aa in anno_mbids) meta_mbids = get_filenames_in_dir('./data', keyword='*.json')[2] meta_mbids = set(os.path.splitext(mm)[0] for mm in meta_mbids) pitch_mbids = get_filenames_in_dir('./data', keyword='*.json')[2] pitch_mbids = set(os.path.splitext(mm)[0] for mm in pitch_mbids) missing_meta = anno_mbids - meta_mbids if missing_meta: print("Missing MBIDS in the metadata files in ./data folder. " "Please add them!") for mm in missing_meta: print(' {}'.format(mm)) assert False, "Mismatch between the MBIDs in annotations.json " \ "and the metadata files in ./data folder" missing_anno = meta_mbids - anno_mbids if missing_anno: print("Extra MBIDS in the metadata files in ./data folder. " "Please remove them!") for ma in missing_anno: print(' {}'.format(ma)) assert False, "Mismatch between the MBIDs in annotations.json " \ "and the metadata files in ./data folder" missing_pitch = anno_mbids - pitch_mbids if missing_pitch: print("Missing MBIDS in the pitch files in ./data folder. " "Please add them!") for mp in missing_pitch: print(' {}'.format(mp)) assert False, "Mismatch between the MBIDs in annotations.json " \ "and the pitch files in ./data folder" missing_anno = pitch_mbids - anno_mbids if missing_anno: print("Extra MBIDS in the pitch files in ./data folder. " "Please remove them!") for ma in missing_anno: print(' {}'.format(ma)) assert False, "Mismatch between the MBIDs in annotations.json " \ "and the pitch files in ./data folder"
def test_metadata(): """ This test checks if the MBIDs in annotations.json and ./metadata folder are consistent """ anno_mbids = json.load(open('./annotations.json')) anno_mbids = set(anno_mbids.keys()) meta_mbids = get_filenames_in_dir('./metadata', keyword='*.json')[2] meta_mbids = set(os.path.splitext(mm)[0] for mm in meta_mbids) missing_meta = anno_mbids - meta_mbids if missing_meta: print "Missing these MBIDS the in ./metadata folder. Please add them!" for mm in missing_meta: print ' {}'.format(mm) assert False, "Mismatch between the MBIDs in annotations.json " \ "and ./metadata folder" missing_anno = meta_mbids - anno_mbids if missing_anno: print "Extra MBIDS in the ./metadata folder. Please remove them!" for ma in missing_anno: print ' {}'.format(ma) assert False, "Mismatch between the MBIDs in annotations.json " \ "and ./metadata folder"
def extract(cls, audiodir, start_idx=0): """ Extract the predominant melody of all the audio recordings in the input folder and its subfolders :param audiodir: the audio directory :param start_idx: the index to start predominant melody extraction from the list of found audio recordings. This parameter is useful, if the user plans to run multiple instances of the extractor at once """ # text file audio_files = get_filenames_in_dir(audiodir, keyword="*.mp3")[0] pitch_files = [os.path.join(os.path.dirname(f), os.path.basename( os.path.splitext(f)[0]) + '.pitch') for f in audio_files] if start_idx: # if index is given audio_files = audio_files[start_idx:] pitch_files = pitch_files[start_idx:] for ii, (af, pf) in enumerate(zip(audio_files, pitch_files)): print(' ') print("{0:d}: {1:s}".format(ii + 1, os.path.basename(af))) if os.path.isfile(pf): # already exists print(" > Already exist; skipped.") else: # extract and filter results = cls.extractor.run(af) pitch_track = cls.filter.run(results['pitch']) # save compact pitch_track = np.array(pitch_track)[:, 1] decimal_str = '%.' + str(cls.DECIMAL) + 'f' np.savetxt(pf, pitch_track, fmt=decimal_str)
def run(cls): data_folder = os.path.join('..', 'data') mp3_files = get_filenames_in_dir(data_folder, keyword='*.mp3')[0] audio_metadata = AudioMetadata(get_work_attributes=True, print_warnings=True) for ii, m in enumerate(mp3_files): save_file = os.path.splitext(m)[0] + '.json' if os.path.exists(save_file): temp_mbid = json.load(open(save_file))['mbid'] if temp_mbid not in m: print(m + ": does not match " + temp_mbid) continue print('{0:d}: {1:s}'.format(ii, m)) # Get audio metadata audio_meta = audio_metadata.from_musicbrainz(m) vocal_instrument = [] for a in audio_meta['artists']: choir_bool = a['type'] == 'vocal' and \ 'attribute-list' in a.keys() and \ 'choir_vocals' in a['attribute-list'] if choir_bool: vocal_instrument.append(a['attribute-list']) elif a['type'] in ['conductor']: pass else: vocal_instrument.append(a['type']) audio_meta['instrumentation_voicing'] = \ cls.check_voice_instrumentation(vocal_instrument) json.dump(audio_meta, open(save_file, 'w'), indent=4)
def getsymbtrnames(): symbtr_folder = './' symbtr_txt_folder = os.path.join(symbtr_folder, 'txt/') symbtr_pdf_folder = os.path.join(symbtr_folder, 'SymbTr-pdf/') symbtr_mu2_folder = os.path.join(symbtr_folder, 'mu2/') symbtr_xml_folder = os.path.join(symbtr_folder, 'MusicXML/') symbtr_mid_folder = os.path.join(symbtr_folder, 'midi/') symbtr_work_file = os.path.join(symbtr_folder, 'symbTr_mbid.json') symbtrtxtnames = get_filenames_in_dir(symbtr_txt_folder, keyword='*.txt')[2] symbtrtxtnames = [s for s in symbtrtxtnames if not s[0] == '.'] symbtrtxtnames = set([os.path.splitext(s)[0] for s in symbtrtxtnames]) symbtrmu2names = get_filenames_in_dir(symbtr_mu2_folder, keyword='*.mu2')[2] symbtrmu2names = [s for s in symbtrmu2names if not s[0] == '.'] symbtrmu2names = set([os.path.splitext(s)[0] for s in symbtrmu2names]) symbtrpdfnames = get_filenames_in_dir(symbtr_pdf_folder, keyword='*.pdf')[2] symbtrpdfnames = [s for s in symbtrpdfnames if not s[0] == '.'] symbtrpdfnames = set([os.path.splitext(s)[0] for s in symbtrpdfnames]) symbtrxmlnames = get_filenames_in_dir(symbtr_xml_folder, keyword='*.xml')[2] symbtrxmlnames = [s for s in symbtrxmlnames if not s[0] == '.'] symbtrxmlnames = set([os.path.splitext(s)[0] for s in symbtrxmlnames]) symbtrmidnames = get_filenames_in_dir(symbtr_mid_folder, keyword='*.mid')[2] symbtrmidnames = [s for s in symbtrmidnames if not s[0] == '.'] symbtrmidnames = set([os.path.splitext(s)[0] for s in symbtrmidnames]) symbtr_work = json.load(open(symbtr_work_file, 'r')) symbtrjsonnames = set(s['name'] for s in symbtr_work) return (symbtrtxtnames, symbtrmu2names, symbtrpdfnames, symbtrxmlnames, symbtrmidnames, symbtrjsonnames)
def extract(cls, audiodir, start_idx=0): """ Extract the predominant melody of all the audio recordings in the input folder and its subfolders :param audiodir: the audio directory :param start_idx: the index to start predominant melody extraction from the list of found audio recordings. This parameter is useful, if the user plans to run multiple instances of the extractor at once """ # text file audio_files = get_filenames_in_dir(audiodir, keyword="*.mp3")[0] pitch_files = [ os.path.join(os.path.dirname(f), os.path.basename(os.path.splitext(f)[0]) + '.pitch') for f in audio_files ] if start_idx: # if index is given audio_files = audio_files[start_idx:] pitch_files = pitch_files[start_idx:] for ii, (af, pf) in enumerate(zip(audio_files, pitch_files)): print(' ') print("{0:d}: {1:s}".format(ii + 1, os.path.basename(af))) if os.path.isfile(pf): # already exists print(" > Already exist; skipped.") else: # extract and filter results = cls.extractor.run(af) pitch_track = cls.filter.run(results['pitch']) # save compact pitch_track = np.array(pitch_track)[:, 1] decimal_str = '%.' + str(cls.DECIMAL) + 'f' np.savetxt(pf, pitch_track, fmt=decimal_str)
def search_min_peak_ratio(step_size, kernel_width, distribution_type, min_peak_ratio): base_folder = os.path.join('data', 'features') feature_folder = os.path.abspath(io.get_folder( base_folder, distribution_type, step_size, kernel_width)) files = get_filenames_in_dir(feature_folder, keyword='*pdf.json')[0] evaluator = Evaluator() num_peaks = 0 num_tonic_in_peaks = 0 for f in files: dd = json.load(open(f)) dd['feature'] = PitchDistribution.from_dict(dd['feature']) peak_idx = dd['feature'].detect_peaks(min_peak_ratio=min_peak_ratio)[0] peak_cents = dd['feature'].bins[peak_idx] peak_freqs = Converter.cent_to_hz(peak_cents, dd['tonic']) ev = [evaluator.evaluate_tonic(pp, dd['tonic'])['tonic_eval'] for pp in peak_freqs] num_tonic_in_peaks += any(ev) num_peaks += len(ev) return num_tonic_in_peaks, num_peaks
# get the input index if len(sys.argv) == 1: idx = [] elif len(sys.argv) == 2: # for parallelization idx = int(sys.argv[1]) else: raise ValueError('Only accepts zero or one argument') print(idx) extractor = PredominantMelodyMakam() audiodir = './' # audio folder and sub folders # text file audio_files = get_filenames_in_dir(audiodir, keyword="*.mp3")[0] txtfiles = [os.path.join(os.path.dirname(f), os.path.basename( os.path.splitext(f)[0]) + '.pitch') for f in audio_files] if idx: # if index is given audio_files = [audio_files[idx]] txtfiles = [txtfiles[idx]] for ii, mp3 in enumerate(audio_files): print("{0:d}: {1:s}".format(ii + 1, os.path.basename(mp3))) if os.path.isfile(txtfiles[ii]): # already exists print(" > Already exist; skipped.") else: results = extractor.run(mp3)
# get the input index if len(sys.argv) == 1: idx = [] elif len(sys.argv) == 2: # for parallelization idx = int(sys.argv[1]) else: raise ValueError('Only accepts zero or one argument') print(idx) extractor = PredominantMelodyMakam() audiodir = './' # audio folder and sub folders # text file audio_files = get_filenames_in_dir(audiodir, keyword="*.mp3")[0] txtfiles = [ os.path.join(os.path.dirname(f), os.path.basename(os.path.splitext(f)[0]) + '.pitch') for f in audio_files ] if idx: # if index is given audio_files = [audio_files[idx]] txtfiles = [txtfiles[idx]] for ii, mp3 in enumerate(audio_files): print("{0:d}: {1:s}".format(ii + 1, os.path.basename(mp3))) if os.path.isfile(txtfiles[ii]): # already exists print(" > Already exist; skipped.")
def stratified_k_fold(cls, data_dir, annotation_in, n_folds=10, random_state=None): """ Generates stratified k folds from the audio_recordings in the data_dir. The stratification is applied according to the makam annotations :param data_dir: (str) data directory :param annotation_in: (str) json file or dictionary, which stores the annotations The loaded variable is a list of dictionaries, where each dictionary have the "mbid", "tonic" (frequency) and "makam" (name) keys, e.g. [ { "mbid": "0db48ce4-f018-4d7d-b75e-66a64db72067", "tonic": 151.1, "makam": "Hicaz" }, { "mbid": "2c88acdf-685d-42c7-913d-1a9f2005587e", "tonic": 292.5, "makam": "Hicaz" } ... ] :param n_folds: (int) number of stratified folds requested :param random_state: (None, int or RandomState) pseudo-random number generator state used for shuffling. If None, use default numpy RNG for shuffling. :return: list of folds. each fold is organized as a dict with two keys "test" and "train". These keys store a list of dicts, where each dict has the "file", recording "MBID", (annotated) "tonic" and (annotated) "mode" keys, e.g: {'test': [ {'file': '0b45417b-acb4-4f8a-b180-5ad45be889af.pitch', 'mbid': u'0b45417b-acb4-4f8a-b180-5ad45be889af', 'mode': u'Saba', 'tonic': 328.3}, {'file': '3c25f0d8-a6df-4bde-87ef-e4af708b861d.pitch', 'mbid': u'3c25f0d8-a6df-4bde-87ef-e4af708b861d', 'mode': u'Hicaz', 'tonic': 150.0}, ...], 'train': [ {...}] """ modes = cls._get_mode_names(data_dir) [file_paths, base_folders, file_names] = get_filenames_in_dir(data_dir, keyword='*.pitch') try: # json file annotations = json.load(open(annotation_in, 'r')) except TypeError: # list of dict annotations = annotation_in file_modes, mbids, tonics = cls._parse_mbid_mode_tonic( annotations, file_names, base_folders) # get the stratified folds mode_idx = [modes.index(m) for m in file_modes] skf = cross_validation.StratifiedKFold(mode_idx, n_folds=n_folds, shuffle=True, random_state=random_state) folds = cls._organize_folds(skf, file_paths, mbids, file_modes, tonics) return folds
def get_txt_filenames(): symbtr_txt_folder = './txt/' return get_filenames_in_dir(symbtr_txt_folder, keyword='*.txt')
def stratified_k_fold(cls, data_dir, annotation_in, n_folds=10, random_state=None): """ Generates stratified k folds from the audio_recordings in the data_dir. The stratification is applied according to the makam annotations :param data_dir: (str) data directory :param annotation_in: (str) json file or dictionary, which stores the annotations The loaded variable is a list of dictionaries, where each dictionary have the "mbid", "tonic" (frequency) and "makam" (name) keys, e.g. [ { "mbid": "0db48ce4-f018-4d7d-b75e-66a64db72067", "tonic": 151.1, "makam": "Hicaz" }, { "mbid": "2c88acdf-685d-42c7-913d-1a9f2005587e", "tonic": 292.5, "makam": "Hicaz" } ... ] :param n_folds: (int) number of stratified folds requested :param random_state: (None, int or RandomState) pseudo-random number generator state used for shuffling. If None, use default numpy RNG for shuffling. :return: list of folds. each fold is organized as a dict with two keys "test" and "train". These keys store a list of dicts, where each dict has the "file", recording "MBID", (annotated) "tonic" and (annotated) "mode" keys, e.g: {'test': [ {'file': '0b45417b-acb4-4f8a-b180-5ad45be889af.pitch', 'mbid': u'0b45417b-acb4-4f8a-b180-5ad45be889af', 'mode': u'Saba', 'tonic': 328.3}, {'file': '3c25f0d8-a6df-4bde-87ef-e4af708b861d.pitch', 'mbid': u'3c25f0d8-a6df-4bde-87ef-e4af708b861d', 'mode': u'Hicaz', 'tonic': 150.0}, ...], 'train': [ {...}] """ modes = cls._get_mode_names(data_dir) [file_paths, base_folders, file_names] = get_filenames_in_dir( data_dir, keyword='*.pitch') try: # json file annotations = json.load(open(annotation_in, 'r')) except TypeError: # list of dict annotations = annotation_in file_modes, mbids, tonics = cls._parse_mbid_mode_tonic( annotations, file_names, base_folders) # get the stratified folds mode_idx = [modes.index(m) for m in file_modes] skf = cross_validation.StratifiedKFold( mode_idx, n_folds=n_folds, shuffle=True, random_state=random_state) folds = cls._organize_folds(skf, file_paths, mbids, file_modes, tonics) return folds
def get_mu2_filenames(): symbtr_mu2_folder = './mu2/' return get_filenames_in_dir(symbtr_mu2_folder, keyword='*.mu2')
def test(step_size, kernel_width, distribution_type, model_type, fold_idx, experiment_type, dis_measure, k_neighbor, min_peak_ratio, rank, save_folder, overwrite=False): # file to save the results res_dict = {'saved': [], 'failed': [], 'skipped': []} test_folder = os.path.abspath(os.path.join(io.get_folder( os.path.join(save_folder, 'testing', experiment_type), model_type, distribution_type, step_size, kernel_width, dis_measure, k_neighbor, min_peak_ratio), 'fold{0:d}'.format(fold_idx))) results_file = os.path.join(test_folder, 'results.json') if not os.path.exists(test_folder): os.makedirs(test_folder) else: if overwrite: shutil.rmtree(test_folder, ignore_errors=True) os.makedirs(test_folder) elif os.path.exists(results_file): return u"{0:s} already has results.".format(test_folder) # load fold fold_file = os.path.join(save_folder, 'folds.json') folds = json.load(open(fold_file)) test_fold = [] for f in folds: if f[0] == fold_idx: test_fold = f[1]['testing'] break assert len(test_fold) == 100, "There should be 100 samples in the test " \ "fold" # load training model training_folder = os.path.abspath(io.get_folder( os.path.join(save_folder, 'training'), model_type, distribution_type, step_size, kernel_width)) model_file = os.path.join(training_folder, u'fold{0:d}.json'.format(fold_idx)) model = json.load(open(model_file)) # instantiate the PitchDistributions for i, m in enumerate(model): try: # filepath given model[i] = json.load(open(os.path.join(save_folder, m))) except (TypeError, AttributeError): # dict already loaded assert isinstance(m['feature'], dict), "Unknown model." model[i]['feature'] = PitchDistribution.from_dict( model[i]['feature']) try: if any(test_sample['source'] in model[i]['sources'] for test_sample in test_fold): raise RuntimeError('Test data uses training data!') except KeyError: if any(test_sample['source'] == model[i]['source'] for test_sample in test_fold): raise RuntimeError('Test data uses training data!') for test_sample in test_fold: # get MBID from pitch file mbid = test_sample['source'] save_file = os.path.join(test_folder, u'{0:s}.json'.format(mbid)) if not overwrite and os.path.exists(save_file): res_dict['skipped'].append(save_file) continue # instantiate the classifier and evaluator object classifier = KNNClassifier( step_size=step_size, kernel_width=kernel_width, feature_type=distribution_type, model=copy.deepcopy(model)) # if the model_type is multi and the test data is in the model, # remove it if model_type == 'multi': for i, m in enumerate(classifier.model): if mbid in m: del classifier.model[i] break try: # we use the pitch instead of the distribution already computed in # the feature extraction. those distributions are normalized wrt # tonic to one of the bins centers will exactly correspond to # the tonic freq. therefore it would be cheating pitch = np.loadtxt(test_sample['pitch']) if experiment_type == 'tonic': # tonic identification results = classifier.estimate_tonic( pitch, test_sample['mode'], min_peak_ratio=min_peak_ratio, distance_method=dis_measure, k_neighbor=k_neighbor, rank=rank) elif experiment_type == 'mode': # mode recognition results = classifier.estimate_mode( pitch, test_sample['tonic'], distance_method=dis_measure, k_neighbor=k_neighbor, rank=rank) elif experiment_type == 'joint': # joint estimation results = classifier.estimate_joint( pitch, min_peak_ratio=min_peak_ratio, distance_method=dis_measure, k_neighbor=k_neighbor, rank=rank) else: raise ValueError("Unknown experiment_type") # save results json.dump(results, open(save_file, 'w')) res_dict['saved'].append(save_file) except: res_dict['failed'].append(save_file) if not res_dict['failed']: computed = get_filenames_in_dir(test_folder, keyword='*.json')[0] assert len(computed) == 100, 'There should have been 100 tested files.' results = {} for c in computed: mbid = os.path.splitext(os.path.split(c)[-1])[0] results[mbid] = json.load(open(c)) json.dump(results, open(results_file, 'w'), indent=4) for c in computed: os.remove(c) return res_dict
def evaluate(step_size, kernel_width, distribution_type, model_type, experiment_type, dis_measure, k_neighbor, min_peak_ratio, result_folder): test_folder = os.path.abspath(os.path.join(io.get_folder( os.path.join(result_folder, 'testing', experiment_type), model_type, distribution_type, step_size, kernel_width, dis_measure, k_neighbor, min_peak_ratio))) result_files = get_filenames_in_dir(test_folder, keyword='*results.json')[0] anno_file = './data/ottoman_turkish_makam_recognition_dataset' \ '/annotations.json' annotations = json.load(open(anno_file)) makam_labels = np.unique([a['makam'] for a in annotations]).tolist() evaluator = Evaluator() tmp_bins = np.arange(0, 1200, step_size) if experiment_type == 'tonic': eval_folds = {'num_correct_tonic': 0, 'tonic_accuracy': 0, 'tonic_deviation_distribution': PitchDistribution( tmp_bins, np.zeros(np.shape(tmp_bins)), kernel_width=0, ref_freq=None)} elif experiment_type == 'mode': eval_folds = {'num_correct_mode': 0, 'mode_accuracy': 0, 'confusion_matrix': { 'matrix': np.zeros((len(makam_labels), len(makam_labels))), 'labels': makam_labels} } else: eval_folds = {'num_correct_tonic': 0, 'tonic_accuracy': 0, 'num_correct_mode': 0, 'mode_accuracy': 0, 'num_correct_joint': 0, 'joint_accuracy': 0, 'tonic_deviation_distribution': PitchDistribution( tmp_bins, np.zeros(np.shape(tmp_bins)), kernel_width=0, ref_freq=None), 'confusion_matrix': { 'matrix': np.zeros((len(makam_labels), len(makam_labels))), 'labels': makam_labels} } for rf in result_files: res = json.load(open(rf)) eval_file = os.path.join(os.path.dirname(rf), 'evaluation.json') rec_ev = [] for aa in annotations: mbid = os.path.split(aa['mbid'])[-1] if mbid in res.keys(): # in testing data if experiment_type == 'tonic': rec_ev.append(evaluator.evaluate_tonic(res[mbid][0][0], aa['tonic'], mbid)) rec_ev[-1]['tonic_eval'] = rec_ev[-1]['tonic_eval'].\ tolist() rec_ev[-1]['same_octave'] = rec_ev[-1]['same_octave'].\ tolist() elif experiment_type == 'mode': rec_ev.append(evaluator.evaluate_mode(res[mbid][0][0], aa['makam'], mbid)) else: rec_ev.append(evaluator.evaluate_joint( [res[mbid][0][0][0], aa['tonic']], [res[mbid][0][0][1], aa['makam']], mbid)) rec_ev[-1]['tonic_eval'] = rec_ev[-1]['tonic_eval'].\ tolist() rec_ev[-1]['same_octave'] = rec_ev[-1]['same_octave'].\ tolist() try: rec_ev[-1]['joint_eval'] = rec_ev[-1]['joint_eval'].\ tolist() except AttributeError: # TODO: find out why i've put an exception here pass ev = {'per_recording': rec_ev, 'overall': {}} try: ev['overall']['num_correct_tonic'] = sum(re['tonic_eval'] for re in rec_ev) ev['overall']['tonic_accuracy'] = ( ev['overall']['num_correct_tonic'] / len(rec_ev)) ev['overall']['tonic_deviation_distribution'] = \ PitchDistribution.from_cent_pitch( [re['cent_diff'] for re in rec_ev], ref_freq=None, step_size=step_size, kernel_width=0) try: # force to pcd ev['overall']['tonic_deviation_distribution'].to_pcd() except AssertionError: pass eval_folds['num_correct_tonic'] += ev['overall'][ 'num_correct_tonic'] eval_folds['tonic_deviation_distribution'].vals +=\ ev['overall']['tonic_deviation_distribution'].vals ev['overall']['tonic_deviation_distribution'] = \ ev['overall']['tonic_deviation_distribution'].to_dict() except KeyError: pass try: ev['overall']['num_correct_mode'] = sum(re['mode_eval'] for re in rec_ev) ev['overall']['mode_accuracy'] = ( ev['overall']['num_correct_mode'] / len(rec_ev)) ev['overall']['confusion_matrix'] = { 'matrix': confusion_matrix( [re['annotated_mode'] for re in rec_ev], [re['estimated_mode'] for re in rec_ev], labels=makam_labels), 'labels': makam_labels} eval_folds['num_correct_mode'] += ev['overall'][ 'num_correct_mode'] eval_folds['confusion_matrix']['matrix'] +=\ ev['overall']['confusion_matrix']['matrix'] ev['overall']['confusion_matrix']['matrix'] = \ ev['overall']['confusion_matrix']['matrix'].astype(int).tolist() except KeyError: pass try: ev['overall']['num_correct_joint'] = sum(re['joint_eval'] for re in rec_ev) ev['overall']['joint_accuracy'] = ( ev['overall']['num_correct_joint'] / len(rec_ev)) eval_folds['num_correct_joint'] += ev['overall'][ 'num_correct_joint'] except KeyError: pass json.dump(ev, open(eval_file, 'w')) if experiment_type == 'tonic': eval_folds['tonic_accuracy'] = eval_folds['num_correct_tonic'] / 10 eval_folds['tonic_deviation_distribution'] = \ eval_folds['tonic_deviation_distribution'].to_dict() elif experiment_type == 'mode': eval_folds['mode_accuracy'] = eval_folds['num_correct_mode'] / 10 eval_folds['confusion_matrix']['matrix'] = \ eval_folds['confusion_matrix']['matrix'].astype(int).tolist() else: eval_folds['tonic_accuracy'] = eval_folds['num_correct_tonic'] / 10 eval_folds['mode_accuracy'] = eval_folds['num_correct_mode'] / 10 eval_folds['joint_accuracy'] = eval_folds['num_correct_joint'] / 10 eval_folds['tonic_deviation_distribution'] = \ eval_folds['tonic_deviation_distribution'].to_dict() eval_folds['confusion_matrix']['matrix'] = \ eval_folds['confusion_matrix']['matrix'].tolist() json.dump(eval_folds, open(os.path.join(test_folder, 'overall_eval.json'), 'w')) return u'{0:s} done'.format(test_folder)