def getModelDistribution(pitchVals, kernel_width=2.5): dummyFreq = 440.0 step_size = 7.5 tempCentVals = PitchDistribution.hz_to_cent(pitchVals, dummyFreq) distribution = PitchDistribution.generate_pd(tempCentVals, ref_freq=dummyFreq, kernel_width=kernel_width, step_size=step_size) distribution.bins = PitchDistribution.cent_to_hz(distribution.bins, dummyFreq) return distribution
def generate_pd(pitch_track, ref_freq=440, smooth_factor=7.5, cent_ss=7.5, source='', segment='all'): ### Some extra interval is added to the beginning and end since the ### superposed Gaussian for smoothing would vanish after 3 sigmas. ### The limits are also quantized to be a multiple of chosen step-size smoothening = (smooth_factor * np.sqrt(1 / np.cov(pitch_track))) min_bin = (min(pitch_track) - (min(pitch_track) % smooth_factor)) - (5 * smooth_factor) max_bin = (max(pitch_track) + (smooth_factor - (max(pitch_track) % smooth_factor))) + (5 * smooth_factor) pd_bins = np.arange(min_bin, max_bin, cent_ss) kde = stats.gaussian_kde(pitch_track, bw_method=smoothening) pd_vals = kde.evaluate(pd_bins) return p_d.PitchDistribution(pd_bins, pd_vals, kernel_width=smooth_factor, source=source, ref_freq=ref_freq, segment=segment)
def mode_estimate(self, dist, mode_dists, distance_method='euclidean', metric='pcd'): """--------------------------------------------------------------------------------------- Given the tonic (or candidate tonic), compares the piece's distribution using the candidate modes and returns the resultant distance vector to higher level functions. ---------------------------------------------------------------------------------------""" if (metric == 'pcd'): distance_vector = np.array( mf.generate_distance_matrix(dist, [0], mode_dists, method=distance_method)) elif (metric == 'pd'): distance_vector = np.zeros(len(mode_dists)) for i in range(len(mode_dists)): trial = p_d.PitchDistribution(dist.bins, dist.vals, kernel_width=dist.kernel_width, source=dist.source, ref_freq=dist.ref_freq, segment=dist.segmentation) trial, mode_trial = mf.pd_zero_pad(trial, mode_dists[i], cent_ss=self.cent_ss) distance_vector[i] = mf.distance(trial, mode_trial, method=distance_method) return distance_vector
def getModels(pitch, alignednotes, tonic, kernel_width=2.5): noteNames = set(an['Symbol'] for an in alignednotes) noteModels = dict((nn, {'notes':[], 'distribution':[], 'stablepitch':[]}) for nn in noteNames) # get the complete histogram noteModels['all'] = {'distribution':getModelDistribution(pitch[:,1]), 'notes':None, 'stablepitch': None} # compute note trajectories and add to each model for an in alignednotes: if not an['Interval'][0] == an['Interval'][1]: # not aligned an['trajectory'] = np.vstack(p for p in pitch if an['Interval'][0] <= p[0] <= an['Interval'][1]) noteModels[an['Symbol']]['notes'].append(an) # compute the histogram for each model for key in noteModels.keys(): if not key == 'all': tempPitchVals = np.hstack(nn['trajectory'][:,1] for nn in noteModels[key]['notes']) noteModels[key]['distribution']=getModelDistribution(tempPitchVals) # get the stable pitch theoreticalpeak = noteModels[key]['notes'][0]['Pitch']['Value'] peakCandIdx = noteModels[key]['distribution'].detect_peaks()[0] peakCandFreqs = [noteModels[key]['distribution'].bins[i] for i in peakCandIdx] peakCandCents = PitchDistribution.hz_to_cent(peakCandFreqs, tonic) minId = abs(peakCandCents - theoreticalpeak).argmin() noteModels[key]['stablepitch'] = peakCandFreqs[minId] # scale according to relative usage of each note stablepitchVal = noteModels[key]['distribution'].vals[peakCandIdx[minId]] allhistbin_id = abs(PitchDistribution.hz_to_cent( noteModels['all']['distribution'].bins,peakCandFreqs[minId])).argmin() allhistval = noteModels['all']['distribution'].vals[allhistbin_id] noteModels[key]['distribution'].vals = (noteModels[key]['distribution'].vals * allhistval / stablepitchVal) return noteModels
def load_collection(self, mode_name, metric, dist_dir='./'): obj_list = [] fname = mode_name + '_' + metric + '.json' with open((dist_dir + fname)) as f: dist_list = json.load(f)[mode_name] for d in dist_list: obj_list.append( p_d.PitchDistribution(np.array(d['bins']), np.array(d['vals']), kernel_width=d['kernel_width'], source=d['source'], ref_freq=d['ref_freq'], segment=d['segmentation'])) return obj_list
def generate_pcd(pd): ### Initializations pcd_bins = np.arange(0, 1200, pd.step_size) pcd_vals = np.zeros(len(pcd_bins)) ###Octave wrapping for k in range(len(pd.bins)): idx = int((pd.bins[k] % 1200) / pd.step_size) pcd_vals[idx] = pcd_vals[idx] + pd.vals[k] return p_d.PitchDistribution(pcd_bins, pcd_vals, kernel_width=pd.kernel_width, source=pd.source, ref_freq=pd.ref_freq, segment=pd.segmentation)
def tonic_estimate(self, dist, peak_idxs, mode_dist, distance_method="euclidean", metric='pcd'): """--------------------------------------------------------------------------------------- Given the mode (or candidate mode), compares the piece's distribution using the candidate tonics and returns the resultant distance vector to higher level functions. ---------------------------------------------------------------------------------------""" ### Mode is known, tonic is estimated. ### Piece's distributon is generated if (metric == 'pcd'): return np.array( mf.generate_distance_matrix(dist, peak_idxs, [mode_dist], method=distance_method))[:, 0] elif (metric == 'pd'): temp = p_d.PitchDistribution(dist.bins, dist.vals, kernel_width=dist.kernel_width, source=dist.source, ref_freq=dist.ref_freq, segment=dist.segmentation) temp, mode_dist = mf.pd_zero_pad(temp, mode_dist, cent_ss=self.cent_ss) ### Filling both sides of vals with zeros, to make sure that the shifts won't drop any non-zero values temp.vals = np.concatenate( (np.zeros(abs(max(peak_idxs))), temp.vals, np.zeros(abs(min(peak_idxs))))) mode_dist.vals = np.concatenate( (np.zeros(abs(max(peak_idxs))), mode_dist.vals, np.zeros(abs(min(peak_idxs))))) return np.array( mf.generate_distance_matrix(temp, peak_idxs, [mode_dist], method=distance_method))[:, 0]
### Initializations pt_dir = 'Examples/Pitch Tracks/' pd_dir = 'Examples/PD/' pcd_dir = 'Examples/PCD/' b = be.BozkurtEstimation() ###--------------------------------------------------------------------------------------- ### Loading the pitch tracks pt1 = mf.load_track('semahat', pt_dir)[:, 1] ###--------------------------------------------------------------------------------------- ### Loading the existing pitch distributions. The JSON related issues are handled ### internally, no need to import json. pcd1 = p_d.load('semahat_pcd.json', pcd_dir) pcd2 = p_d.load('gec_kalma_pcd.json', pcd_dir) pcd3 = p_d.load('murat_derya_pcd.json', pcd_dir) ### You don't need to worry about KDE, if you just want to use the function as it is. KDE ### returns the Kernel Density Estimation, in case you might use in another analysis. pd = p_d.load('gec_kalma_pd.json', pd_dir) ### They can plotted like this. #pcd1.plot() # This is Figure 1 #pd.plot() # This is Figure 2 ###--------------------------------------------------------------------------------------- ### Here comes the actual training part. After the following lines, the joint distributions ### of the modes should be saved in your working directory. ussak_pcd = b.train('ussak_pcd', [(pt_dir + 'semahat'), (pt_dir + 'gec_kalma'),
def estimate(self, pitch_track, mode_names=[], mode_name='', mode_dir='./', est_tonic=True, est_mode=True, rank=1, distance_method="euclidean", metric='pcd', ref_freq=440): """--------------------------------------------------------------------------------------- This is the high level function that users are expected to interact with, for estimation purposes. Using the est_* flags, it is possible to estimate tonic, mode or both. ---------------------------------------------------------------------------------------""" ### Preliminaries before the estimations cent_track = mf.hz_to_cent(pitch_track, ref_freq=ref_freq) dist = mf.generate_pd(cent_track, ref_freq=ref_freq, smooth_factor=self.smooth_factor, cent_ss=self.cent_ss) dist = mf.generate_pcd(dist) if (metric == 'pcd') else dist mode_dists = [(p_d.load((m + '_' + metric + '.json'), mode_dir)) for m in mode_names] mode_dist = p_d.load( (mode_name + '_' + metric + '.json'), mode_dir) if (mode_name != '') else None tonic_list = np.zeros(rank) mode_list = ['' for x in range(rank)] if (est_tonic): if (metric == 'pcd'): ### Shifting to the global minima to prevent wrong detection of peaks shift_factor = dist.vals.tolist().index(min(dist.vals)) dist = dist.shift(shift_factor) anti_freq = mf.cent_to_hz([dist.bins[shift_factor]], ref_freq=ref_freq)[0] peak_idxs, peak_vals = dist.detect_peaks() elif (metric == 'pd'): peak_idxs, peak_vals = dist.detect_peaks() origin = np.where(dist.bins == 0)[0][0] shift_idxs = [(idx - origin) for idx in peak_idxs] ### Call to actual estimation functions if (est_tonic and est_mode): if (metric == 'pcd'): dist_mat = mf.generate_distance_matrix(dist, peak_idxs, mode_dists, method=distance_method) for r in range(rank): min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0] min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0] tonic_list[r] = mf.cent_to_hz( [dist.bins[peak_idxs[min_row]]], anti_freq)[0] mode_list[r] = mode_names[min_col] dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1) return mode_list, tonic_list elif (metric == 'pd'): dist_mat = np.zeros((len(shift_idxs), len(mode_dists))) for m in range(len(mode_dists)): dist_mat[:, m] = self.tonic_estimate( dist, shift_idxs, mode_dists[m], distance_method=distance_method, metric=metric) for r in range(rank): min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0] min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0] tonic_list[r] = mf.cent_to_hz( [shift_idxs[min_row] * self.cent_ss], ref_freq)[0] mode_list[r] = mode_names[min_col] dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1) return mode_list, tonic_list elif (est_tonic): if (metric == 'pcd'): distance_vector = self.tonic_estimate( dist, peak_idxs, mode_dist, distance_method=distance_method, metric=metric) for r in range(rank): idx = np.argmin(distance_vector) tonic_list[r] = mf.cent_to_hz([dist.bins[peak_idxs[idx]]], anti_freq)[0] distance_vector[idx] = (np.amax(distance_vector) + 1) return tonic_list elif (metric == 'pd'): distance_vector = self.tonic_estimate( dist, shift_idxs, mode_dist, distance_method=distance_method, metric=metric) for r in range(rank): idx = np.argmin(distance_vector) tonic_list[r] = mf.cent_to_hz( [shift_idxs[idx] * self.cent_ss], ref_freq)[0] distance_vector[idx] = (np.amax(distance_vector) + 1) return tonic_list elif (est_mode): distance_vector = self.mode_estimate( dist, mode_dists, distance_method=distance_method, metric=metric) for r in range(rank): idx = np.argmin(distance_vector) mode_list[r] = mode_names[idx] distance_vector[idx] = (np.amax(distance_vector) + 1) return mode_list else: # Nothing is expected to be estimated return 0
def estimate(self, pitch_track, mode_names=[], mode_name='', mode_dir='./', est_tonic=True, est_mode=True, rank=1, distance_method="euclidean", metric='pcd', ref_freq=440): ### Preliminaries before the estimations cent_track = mf.hz_to_cent(pitch_track, ref_freq) dist = mf.generate_pd(cent_track, ref_freq=ref_freq, smooth_factor=self.smooth_factor, cent_ss=self.cent_ss) dist = mf.generate_pcd(dist) if (metric == 'pcd') else dist mode_collections = [ self.load_collection(mode, metric, dist_dir=mode_dir) for mode in mode_names ] mode_dists = [dist for col in mode_collections for dist in col] mode_dist = self.load_collection( mode_name, metric, dist_dir=mode_dir) if (mode_name != '') else None tonic_list = np.zeros(rank) mode_list = ['' for x in range(rank)] if (est_tonic): if (metric == 'pcd'): ### Shifting to the global minima to prevent wrong detection of peaks shift_factor = dist.vals.tolist().index(min(dist.vals)) dist = dist.shift(shift_factor) anti_freq = mf.cent_to_hz([dist.bins[shift_factor]], ref_freq=ref_freq)[0] peak_idxs, peak_vals = dist.detect_peaks() elif (metric == 'pd'): peak_idxs, peak_vals = dist.detect_peaks() origin = np.where(dist.bins == 0)[0][0] shift_idxs = [(idx - origin) for idx in peak_idxs] ### Call to actual estimation functions if (est_tonic and est_mode): if (metric == 'pcd'): dist_mat = mf.generate_distance_matrix(dist, peak_idxs, mode_dists, method=distance_method) for r in range(rank): min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0] min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0] tonic_list[r] = mf.cent_to_hz( [dist.bins[peak_idxs[min_row]]], anti_freq)[0] mode_list[r] = (mode_dists[min_col].source, mode_dists[min_col].segmentation) dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1) return mode_list, tonic_list elif (metric == 'pd'): dist_mat = np.zeros((len(shift_idxs), len(mode_dists))) for m in range(len(mode_dists)): dist_mat[:, m] = self.tonic_estimate( dist, shift_idxs, mode_dists[m], distance_method=distance_method, metric=metric) for r in range(rank): min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0] min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0] tonic_list[r] = mf.cent_to_hz( [shift_idxs[min_row] * self.cent_ss], ref_freq)[0] mode_list[r] = (mode_dists[min_col].source, mode_dists[min_col].segmentation) dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1) return mode_list, tonic_list elif (est_tonic): if (metric == 'pcd'): dist_mat = [(np.array( mf.generate_distance_matrix(dist, peak_idxs, [d], method=distance_method))[:, 0]) for d in mode_dist] elif (metric == 'pd'): peak_idxs = shift_idxs temp = p_d.PitchDistribution(dist.bins, dist.vals, kernel_width=dist.kernel_width, source=dist.source, ref_freq=dist.ref_freq, segment=dist.segmentation) dist_mat = [] for d in mode_dist: temp, d = mf.pd_zero_pad(temp, d, cent_ss=self.cent_ss) ### Filling both sides of vals with zeros, to make sure that the shifts won't drop any non-zero values temp.vals = np.concatenate( (np.zeros(abs(max(peak_idxs))), temp.vals, np.zeros(abs(min(peak_idxs))))) d.vals = np.concatenate( (np.zeros(abs(max(peak_idxs))), d.vals, np.zeros(abs(min(peak_idxs))))) cur_vector = np.array( mf.generate_distance_matrix(temp, peak_idxs, [d], method=distance_method))[:, 0] dist_mat.append(cur_vector) for r in range(rank): min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0] min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0] tonic_list[r] = mf.cent_to_hz([dist.bins[peak_idxs[min_row]]], anti_freq)[0] dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1) return tonic_list elif (est_mode): distance_vector = self.mode_estimate( dist, mode_dists, distance_method=distance_method, metric=metric) for r in range(rank): idx = np.argmin(distance_vector) mode_list[r] = (mode_dists[idx].source, mode_dists[idx].segmentation) distance_vector[idx] = (np.amax(distance_vector) + 1) return mode_list else: return 0
def estimate(self, pitch_track, mode_names=[], mode_name='', mode_dir='./', est_tonic=True, est_mode=True, rank=1, distance_method="euclidean", metric='pcd', ref_freq=440): """--------------------------------------------------------------------------------------- This is the high level function that users are expected to interact with, for estimation purposes. Using the est_* flags, it is possible to estimate tonic, mode or both. ---------------------------------------------------------------------------------------""" ### Preliminaries before the estimations cent_track = mf.hz_to_cent(pitch_track, ref_freq=ref_freq) dist = mf.generate_pd(cent_track, ref_freq=ref_freq, smooth_factor=self.smooth_factor, cent_ss=self.cent_ss) dist = mf.generate_pcd(dist) if (metric=='pcd') else dist mode_dists = [(p_d.load((m + '_' + metric + '.json'), mode_dir)) for m in mode_names] mode_dist = p_d.load((mode_name + '_' + metric + '.json'), mode_dir) if (mode_name!='') else None tonic_list = np.zeros(rank) mode_list = ['' for x in range(rank)] if(est_tonic): if(metric=='pcd'): ### Shifting to the global minima to prevent wrong detection of peaks shift_factor = dist.vals.tolist().index(min(dist.vals)) dist = dist.shift(shift_factor) anti_freq = mf.cent_to_hz([dist.bins[shift_factor]], ref_freq=ref_freq)[0] peak_idxs, peak_vals = dist.detect_peaks() elif(metric=='pd'): peak_idxs, peak_vals = dist.detect_peaks() origin = np.where(dist.bins==0)[0][0] shift_idxs = [(idx - origin) for idx in peak_idxs] ### Call to actual estimation functions if(est_tonic and est_mode): if(metric=='pcd'): dist_mat = mf.generate_distance_matrix(dist, peak_idxs, mode_dists, method=distance_method) for r in range(rank): min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0] min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0] tonic_list[r] = mf.cent_to_hz([dist.bins[peak_idxs[min_row]]], anti_freq)[0] mode_list[r] = mode_names[min_col] dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1) return mode_list, tonic_list elif(metric=='pd'): dist_mat = np.zeros((len(shift_idxs), len(mode_dists))) for m in range(len(mode_dists)): dist_mat[:,m] = self.tonic_estimate(dist, shift_idxs, mode_dists[m], distance_method=distance_method, metric=metric) for r in range(rank): min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0] min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0] tonic_list[r] = mf.cent_to_hz([shift_idxs[min_row] * self.cent_ss], ref_freq)[0] mode_list[r] = mode_names[min_col] dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1) return mode_list, tonic_list elif(est_tonic): if(metric=='pcd'): distance_vector = self.tonic_estimate(dist, peak_idxs, mode_dist, distance_method=distance_method, metric=metric) for r in range(rank): idx = np.argmin(distance_vector) tonic_list[r] = mf.cent_to_hz([dist.bins[peak_idxs[idx]]], anti_freq)[0] distance_vector[idx] = (np.amax(distance_vector) + 1) return tonic_list elif(metric=='pd'): distance_vector = self.tonic_estimate(dist, shift_idxs, mode_dist, distance_method=distance_method, metric=metric) for r in range(rank): idx = np.argmin(distance_vector) tonic_list[r] = mf.cent_to_hz([shift_idxs[idx] * self.cent_ss], ref_freq)[0] distance_vector[idx] = (np.amax(distance_vector) + 1) return tonic_list elif(est_mode): distance_vector = self.mode_estimate(dist, mode_dists, distance_method=distance_method, metric=metric) for r in range(rank): idx = np.argmin(distance_vector) mode_list[r] = mode_names[idx] distance_vector[idx] = (np.amax(distance_vector) + 1) return mode_list else: # Nothing is expected to be estimated return 0
pt_dir = 'Examples/Pitch Tracks/' pd_dir = 'Examples/PD/' pcd_dir = 'Examples/PCD/' b = be.BozkurtEstimation() ###--------------------------------------------------------------------------------------- ### Loading the pitch tracks pt1 = mf.load_track('semahat', pt_dir)[:,1] ###--------------------------------------------------------------------------------------- ### Loading the existing pitch distributions. The JSON related issues are handled ### internally, no need to import json. pcd1 = p_d.load('semahat_pcd.json', pcd_dir) pcd2 = p_d.load('gec_kalma_pcd.json', pcd_dir) pcd3 = p_d.load('murat_derya_pcd.json', pcd_dir) ### You don't need to worry about KDE, if you just want to use the function as it is. KDE ### returns the Kernel Density Estimation, in case you might use in another analysis. pd = p_d.load('gec_kalma_pd.json', pd_dir) ### They can plotted like this. #pcd1.plot() # This is Figure 1 #pd.plot() # This is Figure 2 ###--------------------------------------------------------------------------------------- ### Here comes the actual training part. After the following lines, the joint distributions ### of the modes should be saved in your working directory. ussak_pcd = b.train('ussak_pcd', [(pt_dir + 'semahat'), (pt_dir + 'gec_kalma'), (pt_dir + 'murat_derya')], [199, 396.3525, 334.9488], metric='pcd')