def _cent_pitch_to_feature(self, pitch_cent, ref_freq): feature = PitchDistribution.from_cent_pitch( pitch_cent, ref_freq=ref_freq, kernel_width=self.kernel_width, step_size=self.step_size) if self.feature_type == 'pcd': feature.to_pcd() return feature
def deserialize(seyir_features): for sf in seyir_features: try: sf['pitch_distribution'] = PitchDistribution.from_dict( sf['pitch_distribution']) except AttributeError: # empty pitch distribution assert not sf['pitch_distribution'], \ 'non-empty, non-object pitch distribution encountered'
def _compute_seyir_features_per_interval(self, pp, tt, t_intervals, t_center): seyir_features = [] maxdur = max(ti[1] - ti[0] for ti in t_intervals) for ti, tc in zip(t_intervals, t_center): p_cent, p_sliced = self._slice_pitch(pp, ti, tt) if p_cent.size == 0: # silence seyir_features.append( {'pitch_distribution': [], 'average_pitch': np.nan, 'stable_pitches': [], 'time_interval': ti, 'time_center': tc}) else: pd = PitchDistribution.from_cent_pitch( p_cent, ref_freq=self._dummy_ref_freq, kernel_width=self.kernel_width, step_size=self.step_size) # reconvert to Hz pd.cent_to_hz() # normalize to 1 (instead of the area under the curve) maxval = max(pd.vals) num_ratio = float(len(p_cent)) / len(p_sliced) # ratio of # number of samples time_ratio = (ti[1] - ti[0]) / maxdur pd.vals = pd.vals * num_ratio * time_ratio / maxval # get the stable pitches, i.e. peaks peak_idx, peak_vals = pd.detect_peaks() stable_pitches = [{'frequency': float(pd.bins[idx]), 'value': float(val)} for idx, val in zip(peak_idx, peak_vals)] # get the average pitch avpitch = Converter.cent_to_hz(np.mean(p_cent), self._dummy_ref_freq) seyir_features.append( {'pitch_distribution': pd, 'average_pitch': avpitch, 'stable_pitches': stable_pitches, 'time_interval': ti, 'time_center': tc}) return seyir_features
def _get_stablepitch_distribution(self, note_trajectories, theoretical_interval, ref_freq=None): temp_pitch_vals = np.hstack(nn for nn in note_trajectories) # useful to keep the bins coinciding with a desired value, # e.g. tonic frequency if ref_freq is None: ref_freq = self._get_median_pitch(temp_pitch_vals) distribution = PitchDistribution.from_hz_pitch( temp_pitch_vals, ref_freq=ref_freq, kernel_width=self.kernel_width, step_size=self.step_size, norm_type=None) # get the stable pitch as the highest peaks among the peaks close to # the theoretical pitch TODO peaks = distribution.detect_peaks() peak_bins = distribution.bins[peaks[0]] peak_vals = distribution.vals[peaks[0]] try: cand_bool = (abs(peak_bins - theoretical_interval) < self.pitch_threshold) stable_pitch_cand = peak_bins[cand_bool] cand_occr = peak_vals[cand_bool] peak_cent = stable_pitch_cand[np.argmax(cand_occr)] # convert to hz scale peak_freq = Converter.cent_to_hz(peak_cent, ref_freq) except ValueError: # no stable pitch in the vicinity, probably a # misalignment peak_freq = None # convert to hz scale distribution.cent_to_hz() return peak_freq, distribution
def search_min_peak_ratio(step_size, kernel_width, distribution_type, min_peak_ratio): base_folder = os.path.join('data', 'features') feature_folder = os.path.abspath(io.get_folder( base_folder, distribution_type, step_size, kernel_width)) files = get_filenames_in_dir(feature_folder, keyword='*pdf.json')[0] evaluator = Evaluator() num_peaks = 0 num_tonic_in_peaks = 0 for f in files: dd = json.load(open(f)) dd['feature'] = PitchDistribution.from_dict(dd['feature']) peak_idx = dd['feature'].detect_peaks(min_peak_ratio=min_peak_ratio)[0] peak_cents = dd['feature'].bins[peak_idx] peak_freqs = Converter.cent_to_hz(peak_cents, dd['tonic']) ev = [evaluator.evaluate_tonic(pp, dd['tonic'])['tonic_eval'] for pp in peak_freqs] num_tonic_in_peaks += any(ev) num_peaks += len(ev) return num_tonic_in_peaks, num_peaks
def get_models(self, pitch, alignednotes, tonic_symbol): note_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'note_dict.json') note_dict = json.load(open(note_file, 'r')) pitch = np.array(pitch) alignednotes_ext = deepcopy(alignednotes) note_names = set(an['Symbol'] for an in alignednotes_ext) note_models = {} for nn in note_names: try: note_models[nn] = { 'notes': [], 'distribution': [], 'stable_pitch': [], 'performed_interval': [], 'theoretical_interval': { 'Value': (note_dict[nn]['Value'] - note_dict[tonic_symbol]['Value']), 'Unit': 'cent'}, 'theoretical_pitch': []} except KeyError: logging.warning( u"The note {0:s} is not in the note_dict.".format(nn)) # compute note trajectories and add to each model self._distribute_pitch_trajectories(alignednotes_ext, note_models, pitch) # remove models without any aligned note self._remove_unaligned_notes(note_models) # update the tonic frequency temporarily # NOTE: extremely unlikely but this value might shift to the next bin # in the note model computation. Hence we don't assign it to the # final tonic value. tonic_trajectories = [nn['PitchTrajectory'][:, 1] for nn in note_models[tonic_symbol]['notes']] temp_tonic_freq = self._get_stablepitch_distribution( tonic_trajectories, note_models[tonic_symbol]['theoretical_interval']['Value'])[0] # compute the histogram for each model self._get_note_histogram(note_models, temp_tonic_freq) # update the new tonic frequency newtonic = {'alignment': { 'Value': note_models[tonic_symbol]['stable_pitch']['Value'], 'Unit': 'Hz', 'Symbol': tonic_symbol, 'Method': 'alignedNoteModel', 'OctaveWrapped': False, 'Citation': 'SenturkPhDThesis', 'Procedure': 'Tonic identified from the tonic note model obtained ' 'from audio-score alignment'}} # get the distances wrt tonic self._get_tunings(newtonic, note_models) # compute the complete histogram without normalization recording_distribution = PitchDistribution.from_hz_pitch( pitch, ref_freq=temp_tonic_freq, kernel_width=self.kernel_width, step_size=self.step_size, norm_type=None) recording_distribution.cent_to_hz() # normalize all the distributions recording_distribution, note_models = self._normalize_distributions( recording_distribution, note_models) return note_models, recording_distribution, newtonic
def identify(self, pitch, plot=False): """ Identify the tonic by detecting the last note and extracting the frequency """ pitch_sliced = np.array(deepcopy(pitch)) # trim silence in the end sil_trim_len = len(np.trim_zeros(pitch_sliced[:, 1], 'b')) # remove pitch_sliced = pitch_sliced[:sil_trim_len, :] # trailing zeros # slice the pitch track to only include the last 10% of the track # for performance reasons pitch_len = pitch_sliced.shape[0] pitch_sliced = pitch_sliced[-int(pitch_len * 0.1):, :] # compute the pitch distribution and distribution peaks dummy_freq = 440.0 distribution = PitchDistribution.from_hz_pitch( np.array(pitch)[:, 1], ref_freq=dummy_freq, kernel_width=self.kernel_width, step_size=self.step_size) # get pitch chunks flt = PitchFilter(lower_interval_thres=self.lower_interval_thres, upper_interval_thres=self.upper_interval_thres, min_freq=self.min_freq, max_freq=self.max_freq) pitch_chunks = flt.decompose_into_chunks(pitch_sliced) pitch_chunks = flt.post_filter_chunks(pitch_chunks) tonic = {"value": None, "unit": "Hz", "timeInterval": {"value": None, "unit": 'sec'}, "octaveWrapped": False, # octave correction is done "procedure": "Tonic identification by last note detection", "citation": 'Atlı, H. S., Bozkurt, B., Şentürk, S. (2015). ' 'A Method for Tonic Frequency Identification of ' 'Turkish Makam Music Recordings. In Proceedings ' 'of 5th International Workshop on Folk Music ' 'Analysis, pages 119–122, Paris, France.'} # try all chunks starting from the last as the tonic candidate, # considering the octaves for chunk in reversed(pitch_chunks): last_note = median(chunk[:, 1]) # check all the pitch classes of the last note as a tonic candidate # by checking the vicinity in the stable pitches tonic_candidate = self.check_tonic_with_octave_correction( last_note, deepcopy(distribution)) # assign the tonic if there is an estimation if tonic_candidate is not None: tonic['value'] = tonic_candidate tonic['timeInterval']['value'] = [chunk[0, 0], chunk[-1, 0]] # convert distribution bins to frequency distribution.cent_to_hz() break if plot: self.plot(pitch_sliced, tonic, pitch_chunks, distribution) return tonic, pitch_sliced, pitch_chunks, distribution
def test(step_size, kernel_width, distribution_type, model_type, fold_idx, experiment_type, dis_measure, k_neighbor, min_peak_ratio, rank, save_folder, overwrite=False): # file to save the results res_dict = {'saved': [], 'failed': [], 'skipped': []} test_folder = os.path.abspath(os.path.join(io.get_folder( os.path.join(save_folder, 'testing', experiment_type), model_type, distribution_type, step_size, kernel_width, dis_measure, k_neighbor, min_peak_ratio), 'fold{0:d}'.format(fold_idx))) results_file = os.path.join(test_folder, 'results.json') if not os.path.exists(test_folder): os.makedirs(test_folder) else: if overwrite: shutil.rmtree(test_folder, ignore_errors=True) os.makedirs(test_folder) elif os.path.exists(results_file): return u"{0:s} already has results.".format(test_folder) # load fold fold_file = os.path.join(save_folder, 'folds.json') folds = json.load(open(fold_file)) test_fold = [] for f in folds: if f[0] == fold_idx: test_fold = f[1]['testing'] break assert len(test_fold) == 100, "There should be 100 samples in the test " \ "fold" # load training model training_folder = os.path.abspath(io.get_folder( os.path.join(save_folder, 'training'), model_type, distribution_type, step_size, kernel_width)) model_file = os.path.join(training_folder, u'fold{0:d}.json'.format(fold_idx)) model = json.load(open(model_file)) # instantiate the PitchDistributions for i, m in enumerate(model): try: # filepath given model[i] = json.load(open(os.path.join(save_folder, m))) except (TypeError, AttributeError): # dict already loaded assert isinstance(m['feature'], dict), "Unknown model." model[i]['feature'] = PitchDistribution.from_dict( model[i]['feature']) try: if any(test_sample['source'] in model[i]['sources'] for test_sample in test_fold): raise RuntimeError('Test data uses training data!') except KeyError: if any(test_sample['source'] == model[i]['source'] for test_sample in test_fold): raise RuntimeError('Test data uses training data!') for test_sample in test_fold: # get MBID from pitch file mbid = test_sample['source'] save_file = os.path.join(test_folder, u'{0:s}.json'.format(mbid)) if not overwrite and os.path.exists(save_file): res_dict['skipped'].append(save_file) continue # instantiate the classifier and evaluator object classifier = KNNClassifier( step_size=step_size, kernel_width=kernel_width, feature_type=distribution_type, model=copy.deepcopy(model)) # if the model_type is multi and the test data is in the model, # remove it if model_type == 'multi': for i, m in enumerate(classifier.model): if mbid in m: del classifier.model[i] break try: # we use the pitch instead of the distribution already computed in # the feature extraction. those distributions are normalized wrt # tonic to one of the bins centers will exactly correspond to # the tonic freq. therefore it would be cheating pitch = np.loadtxt(test_sample['pitch']) if experiment_type == 'tonic': # tonic identification results = classifier.estimate_tonic( pitch, test_sample['mode'], min_peak_ratio=min_peak_ratio, distance_method=dis_measure, k_neighbor=k_neighbor, rank=rank) elif experiment_type == 'mode': # mode recognition results = classifier.estimate_mode( pitch, test_sample['tonic'], distance_method=dis_measure, k_neighbor=k_neighbor, rank=rank) elif experiment_type == 'joint': # joint estimation results = classifier.estimate_joint( pitch, min_peak_ratio=min_peak_ratio, distance_method=dis_measure, k_neighbor=k_neighbor, rank=rank) else: raise ValueError("Unknown experiment_type") # save results json.dump(results, open(save_file, 'w')) res_dict['saved'].append(save_file) except: res_dict['failed'].append(save_file) if not res_dict['failed']: computed = get_filenames_in_dir(test_folder, keyword='*.json')[0] assert len(computed) == 100, 'There should have been 100 tested files.' results = {} for c in computed: mbid = os.path.splitext(os.path.split(c)[-1])[0] results[mbid] = json.load(open(c)) json.dump(results, open(results_file, 'w'), indent=4) for c in computed: os.remove(c) return res_dict
def evaluate(step_size, kernel_width, distribution_type, model_type, experiment_type, dis_measure, k_neighbor, min_peak_ratio, result_folder): test_folder = os.path.abspath(os.path.join(io.get_folder( os.path.join(result_folder, 'testing', experiment_type), model_type, distribution_type, step_size, kernel_width, dis_measure, k_neighbor, min_peak_ratio))) result_files = get_filenames_in_dir(test_folder, keyword='*results.json')[0] anno_file = './data/ottoman_turkish_makam_recognition_dataset' \ '/annotations.json' annotations = json.load(open(anno_file)) makam_labels = np.unique([a['makam'] for a in annotations]).tolist() evaluator = Evaluator() tmp_bins = np.arange(0, 1200, step_size) if experiment_type == 'tonic': eval_folds = {'num_correct_tonic': 0, 'tonic_accuracy': 0, 'tonic_deviation_distribution': PitchDistribution( tmp_bins, np.zeros(np.shape(tmp_bins)), kernel_width=0, ref_freq=None)} elif experiment_type == 'mode': eval_folds = {'num_correct_mode': 0, 'mode_accuracy': 0, 'confusion_matrix': { 'matrix': np.zeros((len(makam_labels), len(makam_labels))), 'labels': makam_labels} } else: eval_folds = {'num_correct_tonic': 0, 'tonic_accuracy': 0, 'num_correct_mode': 0, 'mode_accuracy': 0, 'num_correct_joint': 0, 'joint_accuracy': 0, 'tonic_deviation_distribution': PitchDistribution( tmp_bins, np.zeros(np.shape(tmp_bins)), kernel_width=0, ref_freq=None), 'confusion_matrix': { 'matrix': np.zeros((len(makam_labels), len(makam_labels))), 'labels': makam_labels} } for rf in result_files: res = json.load(open(rf)) eval_file = os.path.join(os.path.dirname(rf), 'evaluation.json') rec_ev = [] for aa in annotations: mbid = os.path.split(aa['mbid'])[-1] if mbid in res.keys(): # in testing data if experiment_type == 'tonic': rec_ev.append(evaluator.evaluate_tonic(res[mbid][0][0], aa['tonic'], mbid)) rec_ev[-1]['tonic_eval'] = rec_ev[-1]['tonic_eval'].\ tolist() rec_ev[-1]['same_octave'] = rec_ev[-1]['same_octave'].\ tolist() elif experiment_type == 'mode': rec_ev.append(evaluator.evaluate_mode(res[mbid][0][0], aa['makam'], mbid)) else: rec_ev.append(evaluator.evaluate_joint( [res[mbid][0][0][0], aa['tonic']], [res[mbid][0][0][1], aa['makam']], mbid)) rec_ev[-1]['tonic_eval'] = rec_ev[-1]['tonic_eval'].\ tolist() rec_ev[-1]['same_octave'] = rec_ev[-1]['same_octave'].\ tolist() try: rec_ev[-1]['joint_eval'] = rec_ev[-1]['joint_eval'].\ tolist() except AttributeError: # TODO: find out why i've put an exception here pass ev = {'per_recording': rec_ev, 'overall': {}} try: ev['overall']['num_correct_tonic'] = sum(re['tonic_eval'] for re in rec_ev) ev['overall']['tonic_accuracy'] = ( ev['overall']['num_correct_tonic'] / len(rec_ev)) ev['overall']['tonic_deviation_distribution'] = \ PitchDistribution.from_cent_pitch( [re['cent_diff'] for re in rec_ev], ref_freq=None, step_size=step_size, kernel_width=0) try: # force to pcd ev['overall']['tonic_deviation_distribution'].to_pcd() except AssertionError: pass eval_folds['num_correct_tonic'] += ev['overall'][ 'num_correct_tonic'] eval_folds['tonic_deviation_distribution'].vals +=\ ev['overall']['tonic_deviation_distribution'].vals ev['overall']['tonic_deviation_distribution'] = \ ev['overall']['tonic_deviation_distribution'].to_dict() except KeyError: pass try: ev['overall']['num_correct_mode'] = sum(re['mode_eval'] for re in rec_ev) ev['overall']['mode_accuracy'] = ( ev['overall']['num_correct_mode'] / len(rec_ev)) ev['overall']['confusion_matrix'] = { 'matrix': confusion_matrix( [re['annotated_mode'] for re in rec_ev], [re['estimated_mode'] for re in rec_ev], labels=makam_labels), 'labels': makam_labels} eval_folds['num_correct_mode'] += ev['overall'][ 'num_correct_mode'] eval_folds['confusion_matrix']['matrix'] +=\ ev['overall']['confusion_matrix']['matrix'] ev['overall']['confusion_matrix']['matrix'] = \ ev['overall']['confusion_matrix']['matrix'].astype(int).tolist() except KeyError: pass try: ev['overall']['num_correct_joint'] = sum(re['joint_eval'] for re in rec_ev) ev['overall']['joint_accuracy'] = ( ev['overall']['num_correct_joint'] / len(rec_ev)) eval_folds['num_correct_joint'] += ev['overall'][ 'num_correct_joint'] except KeyError: pass json.dump(ev, open(eval_file, 'w')) if experiment_type == 'tonic': eval_folds['tonic_accuracy'] = eval_folds['num_correct_tonic'] / 10 eval_folds['tonic_deviation_distribution'] = \ eval_folds['tonic_deviation_distribution'].to_dict() elif experiment_type == 'mode': eval_folds['mode_accuracy'] = eval_folds['num_correct_mode'] / 10 eval_folds['confusion_matrix']['matrix'] = \ eval_folds['confusion_matrix']['matrix'].astype(int).tolist() else: eval_folds['tonic_accuracy'] = eval_folds['num_correct_tonic'] / 10 eval_folds['mode_accuracy'] = eval_folds['num_correct_mode'] / 10 eval_folds['joint_accuracy'] = eval_folds['num_correct_joint'] / 10 eval_folds['tonic_deviation_distribution'] = \ eval_folds['tonic_deviation_distribution'].to_dict() eval_folds['confusion_matrix']['matrix'] = \ eval_folds['confusion_matrix']['matrix'].tolist() json.dump(eval_folds, open(os.path.join(test_folder, 'overall_eval.json'), 'w')) return u'{0:s} done'.format(test_folder)