def train(self, mode_name, pt_list, ref_freq_list, pt_dir='', metric='pcd', save_dir='./'): """--------------------------------------------------------------------------------------- This function handles everything related to supervised learning portion of this system. It expects the list of text files containing the pitch tracks of the dataset, the array of their known tonics and generates the joint distribution of the mode and saves it. ---------------------------------------------------------------------------------------""" mode_track = [] for idx in range(len(pt_list)): print pt_list[idx] cur_track = mf.load_track(pt_list[idx], pt_dir) cur_cent_track = mf.hz_to_cent(cur_track, ref_freq=ref_freq_list[idx]) for i in cur_cent_track: mode_track.append(i) joint_dist = mf.generate_pd(mode_track, smooth_factor=self.smooth_factor, cent_ss=self.cent_ss, source=mode_name, segment='all') if (metric == 'pcd'): joint_dist = mf.generate_pcd(joint_dist) joint_dist.save((mode_name + '_' + metric + '.json'), save_dir=save_dir)
def train(self, mode_name, pt_list, ref_freq_list, pt_dir='', metric='pcd', save_dir='./'): """--------------------------------------------------------------------------------------- This function handles everything related to supervised learning portion of this system. It expects the list of text files containing the pitch tracks of the dataset, the array of their known tonics and generates the joint distribution of the mode and saves it. ---------------------------------------------------------------------------------------""" mode_track = [] for idx in range(len(pt_list)): print pt_list[idx] cur_track = mf.load_track(pt_list[idx], pt_dir) cur_cent_track = mf.hz_to_cent(cur_track, ref_freq=ref_freq_list[idx]) for i in cur_cent_track: mode_track.append(i) joint_dist = mf.generate_pd(mode_track, smooth_factor=self.smooth_factor, cent_ss=self.cent_ss, source=mode_name, segment='all') if(metric=='pcd'): joint_dist = mf.generate_pcd(joint_dist) joint_dist.save((mode_name + '_' + metric + '.json'), save_dir=save_dir)
def train(self, mode_name, pt_list, ref_freq_list, metric='pcd', save_dir='./', pt_dir='./'): save_name = mode_name + '_' + metric + '.json' dist_list = [] dist_json = [] for pt in range(len(pt_list)): cur = mf.load_track(pt_list[pt], pt_dir) time_track = cur[:, 0] pitch_track = cur[:, 1] pts, segs = self.slice(time_track, pitch_track, pt_list[pt]) pts = [mf.hz_to_cent(k, ref_freq=ref_freq_list[pt]) for k in pts] temp_list = self.train_segments(pts, segs, ref_freq_list[pt], save_dir, save_name, metric) for tmp in temp_list: dist_list.append(tmp) for d in dist_list: temp_json = { 'bins': d.bins.tolist(), 'vals': d.vals.tolist(), 'kernel_width': d.kernel_width, 'source': d.source, 'ref_freq': d.ref_freq, 'segmentation': d.segmentation } dist_json.append(temp_json) with open((save_dir + save_name), 'w') as f: dist_json = {mode_name: dist_json} json.dump(dist_json, f, indent=2) f.close()
# load the annotations for testing data; it will be only used for # makam recognition (with annotated tonic) for i in makam_annot: for j in annot: # append the tonic of the recordıng from the relevant annotation if(i['mbid'] == j['mbid']): i['tonic'] = j['tonic'] break #actual estimation for recording in makam_annot: #check if test recording was use in training if (recording['mbid'] + '.pitch' in makam_recordings): raise ValueError(('Unique-check Failure. ' + recording['mbid'])) pitch_track = mf.load_track(txt_name=(recording['mbid'] + '.pitch'), txt_dir=pitch_track_dir) init_time = time.time() cur_out = estimator.estimate(pitch_track, mode_names=makam_list, est_tonic=False, est_mode=True, k_param=k_param, distance_method=distance, metric=distribution_type, mode_in=fold_dir, tonic_freq=recording['tonic']) end_time = time.time() elapsed = (round((end_time - init_time) * 100) / 100) print elapsed output[('Fold' + str(fold))].append({'mbid':recording['mbid'], 'mode_estimation':cur_out[0], 'sources': cur_out[1], 'distances':cur_out[2], 'elapsed_time':elapsed}) with open(os.path.join(distancePath, distance + '_k' + str(k_param) + '.json'), 'w') as f: json.dump(output, f, indent=2) f.close()
def run(distance_inp, training_i): ###Experiment Parameters------------------------------------------------------------------------- distance = distance_inp training_idx = training_i rank = 10 fold_list = np.arange(1,11) distance_list = ['intersection', 'corr', 'manhattan', 'bhat', 'euclidean', 'l3'] makam_list = ['Acemasiran', 'Acemkurdi', 'Beyati', 'Bestenigar', 'Hicaz', 'Hicazkar', 'Huseyni', 'Huzzam', 'Karcigar', 'Kurdilihicazkar', 'Mahur', 'Muhayyer', 'Neva', 'Nihavent', 'Rast', 'Saba', 'Segah', 'Sultaniyegah', 'Suzinak', 'Ussak'] #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!DATA FOLDER INIT!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! data_folder = '../../../Makam_Dataset/Pitch_Tracks/' #data_folder = '../../../test_datasets/turkish_makam_recognition_dataset/data/' #sertan desktop local #data_folder = '../../../experiments/turkish_makam_recognition_dataset/data/' # hpc cluster # folder structure experiment_dir = './BozkurtExperiments' # assumes it is already created #chooses which training to use training_dir = os.path.join(experiment_dir, 'Training' + str(training_idx)) modePath = os.path.join(training_dir, 'Mode') if not os.path.exists(modePath): os.makedirs(modePath) # get the training experient/fold parameters with open(os.path.join(training_dir, 'parameters.json'), 'r') as f: cur_params = json.load(f) f.close() done_dists = next(os.walk(modePath))[2] done_dists = [d[:-5] for d in done_dists] if (distance in done_dists): print 'Already done ' + distance return print 'Computing ' + distance cent_ss = cur_params['cent_ss'] smooth_factor = cur_params['smooth_factor'] distribution_type = cur_params['distribution_type'] chunk_size = cur_params['chunk_size'] # instantiate makam estimator for training estimator = be.BozkurtEstimation(cent_ss=cent_ss, smooth_factor=smooth_factor, chunk_size=chunk_size) # load annotations; the tonic values will be read from here with open('annotations.json', 'r') as f: annot = json.load(f) f.close() output = dict() for fold in fold_list: output['Fold' + str(fold)] = [] fold_dir = os.path.join(training_dir, 'Fold' + str(fold)) # load the current fold to get the test recordings with open((os.path.join('./Folds', 'fold_' + str(fold) + '.json')), 'r') as f: cur_fold = json.load(f)['test'] f.close() # retrieve annotations of the training recordings for makam_name in makam_list: # just for checking the uniqueness of test recordings with open(os.path.join(fold_dir, makam_name + '.json')) as f: makam_recordings = json.load(f)[0]['source'] f.close() # divide the training data into makams makam_annot = [k for k in cur_fold if k['makam']==makam_name] pitch_track_dir = os.path.join(data_folder, makam_name) # load the annotations for testing data; it will be only used for # makam recognition (with annotated tonic) for i in makam_annot: for j in annot: # append the tonic of the recordıng from the relevant annotation if(i['mbid'] == j['mbid']): i['tonic'] = j['tonic'] break #actual estimation for recording in makam_annot: #check if test recording was use in training if (recording['mbid'] + '.pitch' in makam_recordings): raise ValueError(('Unique-check Failure. ' + recording['mbid'])) pitch_track = mf.load_track(txt_name=(recording['mbid'] + '.pitch'), txt_dir=pitch_track_dir) cur_out = estimator.estimate(pitch_track, mode_names=makam_list, est_tonic=False, est_mode=True, rank=rank, distance_method=distance, tonic_freq=recording['tonic'], metric=distribution_type, mode_in=fold_dir) output[('Fold' + str(fold))].append({'mbid':recording['mbid'], 'tonic_estimation':cur_out}) with open(os.path.join(modePath, distance + '.json'), 'w') as f: json.dump(output, f, indent=2) f.close()
os.makedirs(save_dir) for makam_name in makam_list: makam_annot = [k for k in cur_fold if (k['makam']==makam_name)] pt_dir = '../../../Makam_Dataset/Pitch_Tracks/' + makam_name + '/' pt_list = [(tmp['mbid'] + '.pitch') for tmp in makam_annot] tonic_list = [tmp['tonic'] for tmp in makam_annot] train(makam_name, pt_list, tonic_list, cent_ss, smooth_factor, metric, 'bozkurt', pt_dir=pt_dir, save_dir=save_dir, chunk_size=chunk_size) print 'Fold ' + str(fld) + ' Done! ' + str(datetime.now()) trial_info = {('Experiment' + str(cnt)):{'cent_ss': cent_ss, 'smooth_factor':smooth_factor, 'metric':metric, 'chunk_size':chunk_size, 'method':'bozkurt'}} with open('trial_info.json', 'a') as f: json.dump(trial_info, f, indent=2) f.close() print 'Experiment ' + str(cnt) + ' Done\tMetric: ' + metric + ', Smooth_Factor: ' + str(smooth_factor) + ', Cent_SS: ' + str(cent_ss) + ', Chunk Size: ' + str(chunk_size) + ' \n' cur_fold = fold.load_fold('./Bozkurt_Experiment/bozkurt_test_fold.json') save_dir = './Bozkurt_Experiment/' b = be.BozkurtEstimation(cent_ss=cent_ss, smooth_factor=smooth_factor, chunk_size=chunk_size) results = [] print 'Mode Estimation' for makam_name in makam_list: makam_annot = [k for k in cur_fold if (k['makam']==makam_name)] pt_dir = '../../../Makam_Dataset/Pitch_Tracks/' + makam_name + '/' pt_list = [(tmp['mbid'] + '.pitch') for tmp in makam_annot] tonic_list = [tmp['tonic'] for tmp in makam_annot] for pt in range(len(pt_list)): print 'new track' pitch_track = mf.load_track(pt_list[pt], pt_dir) cur_res = b.estimate(pitch_track, mode_names=makam_list, mode_name='', mode_in=save_dir, est_tonic=True, est_mode=True, rank=1, distance_method="euclidean", metric='pcd', tonic_freq=tonic_list[pt]) results.append({'mbid': pt_list[pt][:-6], 'makam':makam_name, 'tonic':tonic_list[pt], 'estimated':cur_res})
PitchDistribution objects, the functions and the attributes can be called/accessed the same way. The distinguishment of these are handled internally. ---------------------------------------------------------------------------------------""" ###--------------------------------------------------------------------------------------- ### Initializations pt_dir = 'Examples/Pitch Tracks/' pd_dir = 'Examples/PD/' pcd_dir = 'Examples/PCD/' b = be.BozkurtEstimation() ###--------------------------------------------------------------------------------------- ### Loading the pitch tracks pt1 = mf.load_track('semahat', pt_dir)[:, 1] ###--------------------------------------------------------------------------------------- ### Loading the existing pitch distributions. The JSON related issues are handled ### internally, no need to import json. pcd1 = p_d.load('semahat_pcd.json', pcd_dir) pcd2 = p_d.load('gec_kalma_pcd.json', pcd_dir) pcd3 = p_d.load('murat_derya_pcd.json', pcd_dir) ### You don't need to worry about KDE, if you just want to use the function as it is. KDE ### returns the Kernel Density Estimation, in case you might use in another analysis. pd = p_d.load('gec_kalma_pd.json', pd_dir) ### They can plotted like this. #pcd1.plot() # This is Figure 1 #pd.plot() # This is Figure 2
PitchDistribution objects, the functions and the attributes can be called/accessed the same way. The distinguishment of these are handled internally. ---------------------------------------------------------------------------------------""" ###--------------------------------------------------------------------------------------- ### Initializations pt_dir = 'Examples/Pitch Tracks/' pd_dir = 'Examples/PD/' pcd_dir = 'Examples/PCD/' b = be.BozkurtEstimation() ###--------------------------------------------------------------------------------------- ### Loading the pitch tracks pt1 = mf.load_track('semahat', pt_dir)[:,1] ###--------------------------------------------------------------------------------------- ### Loading the existing pitch distributions. The JSON related issues are handled ### internally, no need to import json. pcd1 = p_d.load('semahat_pcd.json', pcd_dir) pcd2 = p_d.load('gec_kalma_pcd.json', pcd_dir) pcd3 = p_d.load('murat_derya_pcd.json', pcd_dir) ### You don't need to worry about KDE, if you just want to use the function as it is. KDE ### returns the Kernel Density Estimation, in case you might use in another analysis. pd = p_d.load('gec_kalma_pd.json', pd_dir) ### They can plotted like this. #pcd1.plot() # This is Figure 1