def __init__(self, threshold_doa=20, threshold_sed=0.5): """ :return metrics for doa, sed or seld, 2019 or 2020 :type threshold_sed: float in (0,1), classify threshold :type threshold_doa: float in (0, 180), maximum threshold error for doa """ self._params = get_params("4") self.feat_cls = FeatureClass(self._params) self._threshold_sed = threshold_sed self._threshold_doa = threshold_doa
def collect_test_labels(_data_gen_test, _data_out, classification_mode, quick_test): # Collecting ground truth for test data params = parameter.get_params('1') nb_batch = params['quick_test_nb_batch'] if quick_test else _data_gen_test.get_total_batches_in_data() batch_size = _data_out[0][0] gt_sed = np.zeros((nb_batch * batch_size, _data_out[0][1], _data_out[0][2])) gt_doa = np.zeros((nb_batch * batch_size, _data_out[0][1], _data_out[1][2])) print("nb_batch in test: {}".format(nb_batch)) cnt = 0 for tmp_feat, tmp_label in _data_gen_test.generate(): gt_sed[cnt * batch_size:(cnt + 1) * batch_size, :, :] = tmp_label[0] gt_doa[cnt * batch_size:(cnt + 1) * batch_size, :, :] = tmp_label[1] cnt = cnt + 1 print(cnt) if cnt == nb_batch: break return gt_sed.astype(int), gt_doa
def _get_label_filenames_sizes(self): #for filename in os.listdir(self._label_dir): # if self._datagen_mode in filename: # self._filenames_list.append(filename) #1 stands for default configuration _params = parameter.get_params('1') cnt_train = 0 cnt_test = 0 for filename in os.listdir(self._label_dir): if self._datagen_mode == "train": for split_n in _params["train_split"]: if "split" + str(split_n) in filename: self._filenames_list.append(filename) print("TRAIN " + str(cnt_train) + ": " + filename) cnt_train = cnt_train + 1 elif self._datagen_mode == "validation": if "split" + str(self._split) in filename: self._filenames_list.append(filename) print("VALID " + str(cnt_test) + ": " + filename) cnt_test = cnt_test + 1 else: if ("split" + str(self._split) in filename) and ("ov" + str(self._ov_num) in filename): self._filenames_list.append(filename) print("TEST " + str(cnt_test) + ": " + filename) cnt_test = cnt_test + 1 temp_feat = np.load( os.path.join(self._feat_dir, self._filenames_list[0])) self._nb_frames_file = temp_feat.shape[0] self._feat_len = int(temp_feat.shape[1] / self._2_nb_ch) temp_label = np.load( os.path.join(self._label_dir, self._filenames_list[0])) self._label_len = temp_label.shape[-1] self._doa_len = (self._label_len - self._nb_classes) / self._nb_classes return
def __init__(self, seq_len, splits=[ 1, ], random_shuffle=False, len_restrict=0, with_conj=False, rotate=None, output_trim=0, nb_freq_bins_use=None, direction_bias=None, direction_bias_additional=None, single_source_case_only=False, test_mode=False): self.params = parameter.get_params() self._seq_len = seq_len self._splits = np.array(splits) self._data_dir = self.params["dataset_dir"] self._label_dir = os.path.join(self._data_dir, 'label') self._feat_dir = os.path.join(self._data_dir, 'foa_norm') self._nb_classes = 11 self._nb_ch = 4 self._2_nb_ch = 2 * self._nb_ch self._nondeteministic_shuffle = random_shuffle self._filenames_list = list() self.gen_data_file_name_list(len_restrict) self._available_cases = list() self.single_source_case_only = single_source_case_only self.gen_available_cases() self.with_conj = with_conj self.rotate = rotate self.output_trim = output_trim self._nb_freq_bins_use = nb_freq_bins_use self._direction_bias = direction_bias self._direction_bias_additional = direction_bias_additional
def main(args): ''' Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 ''' # use parameter set defined by user dataset, mode, task_id, job_id = args.dataset, args.mode, args.name, args.job_id task = 'sed' feat_type = 'mel' nb_ch = 4 doa_type = None params, model_params = parameter.get_params(dataset=dataset, mode=mode, task_id=task_id, feat_type=feat_type, doa=doa_type) train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': test_splits = [1, 2, 3, 4] val_splits = [2, 3, 4, 1] train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]] avg_scores_val = [] avg_scores_test = [] for split_cnt, split in enumerate(test_splits): print('\nThis is split {}'.format(split_cnt)) # Unique name for the run model_dir_prefix = os.path.join( params['model_dir'], task) if task == 'sed' else os.path.join( params['model_dir'], 'doa_reg') cls_feature_class.create_folder(model_dir_prefix) #model_id = int(job_id) + split_cnt unique_name = '{}{}_{}_{}_sed_dev_split{}'.format( task_id, str(job_id), params['dataset'], params['feat_type'], split_cnt + 1) unique_name = os.path.join(model_dir_prefix, unique_name) model_name = '{}_model.h5'.format(unique_name) print('\tmodel unique name: {}\n'.format(unique_name)) # Load train and validation data print('Loading training dataset:') data_gen_train = cls_data_generator.DataGenerator( dataset=params['dataset'], split=train_splits[split_cnt], batch_size=params['batch_size'], seq_len=params['seq_length'], feat_label_dir=params['feat_label_dir'], feat_type=feat_type, doa=doa_type) print('Loading validation dataset:') data_gen_val = cls_data_generator.DataGenerator( dataset=params['dataset'], split=val_splits[split_cnt], batch_size=params['batch_size'], seq_len=3000, per_file=True, feat_label_dir=params['feat_label_dir'], shuffle=False, feat_type=feat_type, doa=doa_type) # Collect the reference labels for validation data data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format( data_in, data_out)) gt = collect_test_labels_3000(data_gen_val) sed_gt = evaluation_metrics.reshape_3Dto2D(gt) # [3000*100, 11] nb_classes = data_gen_train.get_nb_classes() def_elevation = data_gen_train.get_default_elevation() if task_id == 'crnn': model = CUDA(CRNN_SED(data_in, data_out[0])) elif task_id == 'mcrnn': model = CUDA(MCRNN_SED(data_in, data_out[0])) model.apply(kaiming_init) total_num = sum(param.numel() for param in model.parameters()) print('==========================================') print('Total parameter number for {}: {}'.format( model_params['method'], total_num)) print('==========================================') # Pytorch optimizer optimizer = optim.Adam(params=model.parameters(), lr=0.001) feat_torch = CUDA( Variable( torch.FloatTensor(params['batch_size'], nb_ch, params['seq_length'], params['feat_dim']))) label_sed = CUDA( Variable( torch.FloatTensor(params['batch_size'], params['seq_length'], 11))) best_seld_metric = 99999 best_sed_metric = 99999 best_epoch = -1 patience_cnt = 0 seld_metric = np.zeros(params['nb_epochs']) tr_loss = np.zeros(params['nb_epochs']) sed_val_loss = np.zeros(params['nb_epochs']) sed_metric = np.zeros((params['nb_epochs'], 2)) nb_epoch = params['nb_epochs'] # start training pbar_epoch = tqdm(total=nb_epoch, desc='[Epoch]') for epoch_cnt in range(nb_epoch): # train stage model.train() iter_cnt = 0 for feat, label in data_gen_train.generate(): feat_torch.resize_(params['batch_size'], nb_ch, params['seq_length'], params['feat_dim']) feat_torch.data.copy_(torch.from_numpy(feat)) label_sed.resize_(params['batch_size'], params['seq_length'], 11) label_sed.data.copy_(torch.from_numpy(label[0])) sed = model(feat_torch) sed_loss = bce_loss(sed, label_sed) doa_loss = 0.0 total_loss = sed_loss + doa_loss optimizer.zero_grad() total_loss.backward() optimizer.step() if iter_cnt % params['print_iter'] == 0: pbar_epoch.write( 'Iteration: {:3d}, sed_loss: {:.4f}, doa_loss: {:.4f}, total_loss: {:.4f}' .format(iter_cnt, sed_loss, doa_loss, total_loss)) #pbar_iteration.update(1) iter_cnt += 1 if iter_cnt >= data_gen_train.get_total_batches_in_data(): break iter_cnt = 0 sed_validation_loss = 0 entire_pred_sed = np.zeros( (data_gen_val._batch_size * data_gen_val.get_total_batches_in_data(), 3000, 11)) model.eval() with torch.no_grad(): for feat, label in data_gen_val.generate(): batch_size = feat.shape[0] feat_torch.resize_(batch_size, nb_ch, 3000, params['feat_dim']) feat_torch.data.copy_(torch.from_numpy(feat)) label_sed.resize_(batch_size, 3000, 11) label_sed.copy_(torch.from_numpy(label[0])) sed = model(feat_torch) sed_loss = bce_loss(sed, label_sed) sed_validation_loss += sed_loss # concat all predictions entire_pred_sed[ iter_cnt * batch_size:(iter_cnt + 1) * batch_size, :] = sed.detach().cpu().numpy() iter_cnt += 1 if iter_cnt >= data_gen_val.get_total_batches_in_data(): break sed_validation_loss = sed_validation_loss / data_gen_val.get_total_batches_in_data( ) tr_loss[epoch_cnt] = total_loss sed_val_loss[epoch_cnt] = sed_validation_loss # Calculate the metrics sed_pred = evaluation_metrics.reshape_3Dto2D( entire_pred_sed) > params[ 'threshold'] # compared with threshold sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores( sed_pred, sed_gt, data_gen_val.nb_frames_1s()) patience_cnt += 1 if sed_metric[epoch_cnt, 0] < best_sed_metric: best_sed_metric = sed_metric[epoch_cnt, 0] best_epoch = epoch_cnt save_model(model, model_name) patience_cnt = 0 pbar_epoch.update(1) pbar_epoch.write( 'epoch_cnt: %d, sed_tr_loss: %.4f, sed_val_loss: %.4f, ER_overall: %.2f, F1_overall: %.2f, best_sed_ER: %.4f, best_epoch : %d\n' % (epoch_cnt, tr_loss[epoch_cnt], sed_val_loss[epoch_cnt], sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1], best_sed_metric, best_epoch)) if patience_cnt >= params['patience']: break pbar_epoch.close() avg_scores_val.append( [sed_metric[best_epoch, 0], sed_metric[best_epoch, 1]] ) #, doa_metric[best_epoch, 0], doa_metric[best_epoch, 1], best_seld_metric]) print('\nResults on validation split:') print('\tUnique_name: {} '.format(unique_name)) print('\tSaved model for the best_epoch: {}'.format(best_epoch)) print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format( sed_metric[best_epoch, 0], sed_metric[best_epoch, 1])) # ------------------ Calculate metric scores for unseen test split --------------------------------- print('Loading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], split=split, batch_size=params['batch_size'], seq_len=3000, feat_label_dir=params['feat_label_dir'], shuffle=False, per_file=True, is_eval=True if params['mode'] is 'eval' else False, #False feat_type=feat_type, doa=doa_type) test_batch_size = data_gen_test._batch_size print( '\nLoading the best model and predicting results on the testing split' ) model = load_model(model, '{}_model.h5'.format(unique_name)) model.eval() # test stage total_test_batches = data_gen_test.get_total_batches_in_data() pbar_test = tqdm(total=total_test_batches, desc='[Testing]') iter_cnt = 0 entire_test_sed = np.zeros((100, 3000, 11)) with torch.no_grad(): if params['mode'] == 'dev': for feat, label in data_gen_test.generate(): batch_size = feat.shape[0] feat_torch.data.resize_(batch_size, nb_ch, 3000, params['feat_dim']) feat_torch.data.copy_(torch.from_numpy(feat)) sed = model(feat_torch) # concat all predictions entire_test_sed[ iter_cnt * test_batch_size:(iter_cnt + 1) * test_batch_size, :] = sed.detach().cpu().numpy() pbar_test.update(1) iter_cnt += 1 if iter_cnt >= data_gen_test.get_total_batches_in_data(): break print('the test batch_size is{}'.format(batch_size)) pbar_test.close() test_sed_pred = evaluation_metrics.reshape_3Dto2D( entire_test_sed) > params['threshold'] if params['mode'] == 'dev': _, test_data_out = data_gen_test.get_data_sizes() test_gt = collect_test_labels_3000(data_gen_test) test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt) test_sed_loss = evaluation_metrics.compute_sed_scores( test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s()) avg_scores_test.append([test_sed_loss[0], test_sed_loss[1]]) print('Results on test split:') print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format( test_sed_loss[0], test_sed_loss[1])) print('\n\nValidation split scores per fold:\n') for cnt in range(len(val_splits)): print('\t Split {} - SED ER: {} F1: {}'.format(val_splits[cnt], avg_scores_val[cnt][0], avg_scores_val[cnt][1])) if params['mode'] == 'dev': print('\n\nTesting split scores per fold:\n') for cnt in range(len(val_splits)): print('\t Split {} - SED ER: {} F1: {}'.format( test_splits[cnt], avg_scores_test[cnt][0], avg_scores_test[cnt][1]))
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 """ print(argv) if len(argv) != 3: print('\n\n') print( '-------------------------------------------------------------------------------------------------------' ) print('The code expected two optional inputs') print('\t>> python seld.py <task-id> <job-id>') print( '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py' ) print('Using default inputs for now') print( '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print( '-------------------------------------------------------------------------------------------------------' ) print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 2 else argv[1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 3 else argv[-1] feat_cls = cls_feature_class.FeatureClass(params) train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': test_splits = [6] val_splits = [5] train_splits = [[1, 2, 3, 4]] elif params['mode'] == 'eval': test_splits = [[7, 8]] val_splits = [[6]] train_splits = [[1, 2, 3, 4, 5]] for split_cnt, split in enumerate(test_splits): print( '\n\n---------------------------------------------------------------------------------------------------' ) print( '------------------------------------ SPLIT {} -----------------------------------------------' .format(split)) print( '---------------------------------------------------------------------------------------------------' ) # Unique name for the run cls_feature_class.create_folder(params['model_dir']) unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id, params['dataset'], params['mode'], split) unique_name = os.path.join(params['model_dir'], unique_name) model_name = '{}_model.h5'.format(unique_name) print("unique_name: {}\n".format(unique_name)) # Load train and validation data print('Loading training dataset:') data_gen_train = cls_data_generator.DataGenerator( params=params, split=train_splits[split_cnt]) print('Loading validation dataset:') data_gen_val = cls_data_generator.DataGenerator( params=params, split=val_splits[split_cnt], shuffle=False, per_file=True, is_eval=False) # Collect the reference labels for validation data data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format( data_in, data_out)) nb_classes = data_gen_train.get_nb_classes() print( 'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n' .format(params['dropout_rate'], params['nb_cnn2d_filt'], params['f_pool_size'], params['t_pool_size'], params['rnn_size'], params['fnn_size'], params['doa_objective'])) print('Using loss weights : {}'.format(params['loss_weights'])) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], f_pool_size=params['f_pool_size'], t_pool_size=params['t_pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], weights=params['loss_weights'], doa_objective=params['doa_objective'], is_accdoa=params['is_accdoa']) # Dump results in DCASE output format for calculating final scores dcase_output_val_folder = os.path.join( params['dcase_output_dir'], '{}_{}_{}_val'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.delete_and_create_folder(dcase_output_val_folder) print('Dumping recording-wise val results in: {}'.format( dcase_output_val_folder)) # Initialize evaluation metric class score_obj = ComputeSELDResults(params) best_seld_metric = 99999 best_epoch = -1 patience_cnt = 0 nb_epoch = 2 if params['quick_test'] else params['nb_epochs'] tr_loss = np.zeros(nb_epoch) seld_metric = np.zeros((nb_epoch, 5)) # start training for epoch_cnt in range(nb_epoch): start = time.time() # train once per epoch hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=2 if params['quick_test'] else data_gen_train.get_total_batches_in_data(), epochs=params['epochs_per_fit'], verbose=2, ) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] # predict once per epoch pred = model.predict_generator( generator=data_gen_val.generate(), steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(), verbose=2) if params['is_accdoa']: sed_pred, doa_pred = get_accdoa_labels(pred, nb_classes) sed_pred = reshape_3Dto2D(sed_pred) doa_pred = reshape_3Dto2D(doa_pred) else: sed_pred = reshape_3Dto2D(pred[0]) > 0.5 doa_pred = reshape_3Dto2D(pred[1] if params['doa_objective'] is 'mse' else pred[1][:, :, nb_classes:]) # Calculate the DCASE 2021 metrics - Location-aware detection and Class-aware localization scores dump_DCASE2021_results(data_gen_val, feat_cls, dcase_output_val_folder, sed_pred, doa_pred) seld_metric[epoch_cnt, :] = score_obj.get_SELD_Results( dcase_output_val_folder) patience_cnt += 1 if seld_metric[epoch_cnt, -1] < best_seld_metric: best_seld_metric = seld_metric[epoch_cnt, -1] best_epoch = epoch_cnt model.save(model_name) patience_cnt = 0 print( 'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, ' '\n\t\t DCASE2021 SCORES: ER: {:0.2f}, F: {:0.1f}, LE: {:0.1f}, LR:{:0.1f}, seld_score (early stopping score): {:0.2f}, ' 'best_seld_score: {:0.2f}, best_epoch : {}\n'.format( epoch_cnt, time.time() - start, tr_loss[epoch_cnt], seld_metric[epoch_cnt, 0], seld_metric[epoch_cnt, 1] * 100, seld_metric[epoch_cnt, 2], seld_metric[epoch_cnt, 3] * 100, seld_metric[epoch_cnt, -1], best_seld_metric, best_epoch)) if patience_cnt > params['patience']: break print('\nResults on validation split:') print('\tUnique_name: {} '.format(unique_name)) print('\tSaved model for the best_epoch: {}'.format(best_epoch)) print('\tSELD_score (early stopping score) : {}'.format( best_seld_metric)) print('\n\tDCASE2021 scores') print( '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}' .format(seld_metric[best_epoch, 2], seld_metric[best_epoch, 3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(seld_metric[best_epoch, 0], seld_metric[best_epoch, 1] * 100)) # ------------------ Calculate metric scores for unseen test split --------------------------------- print( '\nLoading the best model and predicting results on the testing split' ) print('\tLoading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( params=params, split=split, shuffle=False, per_file=True, is_eval=True if params['mode'] is 'eval' else False) model = keras_model.load_seld_model('{}_model.h5'.format(unique_name), params['doa_objective']) pred_test = model.predict_generator( generator=data_gen_test.generate(), steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), verbose=2) if params['is_accdoa']: test_sed_pred, test_doa_pred = get_accdoa_labels( pred_test, nb_classes) test_sed_pred = reshape_3Dto2D(test_sed_pred) test_doa_pred = reshape_3Dto2D(test_doa_pred) else: test_sed_pred = reshape_3Dto2D(pred_test[0]) > 0.5 test_doa_pred = reshape_3Dto2D( pred_test[1] if params['doa_objective'] is 'mse' else pred_test[1][:, :, nb_classes:]) # Dump results in DCASE output format for calculating final scores dcase_output_test_folder = os.path.join( params['dcase_output_dir'], '{}_{}_{}_test'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.delete_and_create_folder(dcase_output_test_folder) print('Dumping recording-wise test results in: {}'.format( dcase_output_test_folder)) dump_DCASE2021_results(data_gen_test, feat_cls, dcase_output_test_folder, test_sed_pred, test_doa_pred) if params['mode'] is 'dev': # Calculate DCASE2021 scores test_seld_metric = score_obj.get_SELD_Results( dcase_output_test_folder) print('Results on test split:') print('\tDCASE2021 Scores') print( '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}' .format(test_seld_metric[2], test_seld_metric[3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(test_seld_metric[0], test_seld_metric[1] * 100)) print('\tSELD (early stopping metric): {:0.2f}'.format( test_seld_metric[-1]))
# Calculated scores eval.update_seld_scores(pred_labels, self._ref_labels[pred_file]) # Overall SED and DOA scores ER, F, LE, LR = eval.compute_seld_scores() seld_scr = SELD_evaluation_metrics.early_stopping_metric([ER, F], [LE, LR]) print('\nAverage score for {} {} data using {} coordinates'.format(score_type, 'fold' if score_type=='all' else split_key, 'Polar' if self._use_polar_format else 'Cartesian' )) print('SELD score (early stopping metric): {:0.2f}'.format(seld_scr)) print('SED metrics: Error rate: {:0.2f}, F-score:{:0.1f}'.format(ER, 100*F)) print('DOA metrics: Localization error: {:0.1f}, Localization Recall: {:0.1f}'.format(LE, 100*LR)) def reshape_3Dto2D(A): return A.reshape(A.shape[0] * A.shape[1], A.shape[2]) if __name__ == "__main__": pred_output_format_files = 'results/4_foa_dev_test' # Path of the DCASEoutput format files # Compute just the DCASE 2021 final results score_obj = ComputeSELDResults(parameter.get_params()) ER, F, LE, LR, seld_scr = score_obj.get_SELD_Results(pred_output_format_files) print('SELD score (early stopping metric): {:0.2f}'.format(seld_scr)) print('SED metrics: Error rate: {:0.2f}, F-score:{:0.1f}'.format(ER, 100*F)) print('DOA metrics: Localization error: {:0.1f}, Localization Recall: {:0.1f}'.format(LE, 100*LR)) # Compute DCASE 2021 results along with room-wise performance score_obj.get_consolidated_SELD_results(pred_output_format_files)
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 """ print(argv) if len(argv) != 3: print('\n\n') print( '-------------------------------------------------------------------------------------------------------' ) print('The code expected two optional inputs') print('\t>> python seld.py <task-id> <job-id>') print( '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py' ) print('Using default inputs for now') print( '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print( '-------------------------------------------------------------------------------------------------------' ) print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 2 else argv[1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 3 else argv[-1] feat_cls = cls_feature_class.FeatureClass(params) train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': test_splits = [1] val_splits = [2] train_splits = [[3, 4, 5, 6]] elif params['mode'] == 'eval': test_splits = [[7, 8]] val_splits = [[1]] train_splits = [[2, 3, 4, 5, 6]] avg_scores_val = [] avg_scores_test = [] for split_cnt, split in enumerate(test_splits): print( '\n\n---------------------------------------------------------------------------------------------------' ) print( '------------------------------------ SPLIT {} -----------------------------------------------' .format(split)) print( '---------------------------------------------------------------------------------------------------' ) # Unique name for the run cls_feature_class.create_folder(params['model_dir']) unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id, params['dataset'], params['mode'], split) unique_name = os.path.join(params['model_dir'], unique_name) model_name = '{}_model.h5'.format(unique_name) print("unique_name: {}\n".format(unique_name)) # Load train and validation data print('Loading training dataset:') data_gen_train = cls_data_generator.DataGenerator( params=params, split=train_splits[split_cnt]) print('Loading validation dataset:') data_gen_val = cls_data_generator.DataGenerator( params=params, split=val_splits[split_cnt], shuffle=False) # Collect the reference labels for validation data data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format( data_in, data_out)) nb_classes = data_gen_train.get_nb_classes() gt = collect_test_labels(data_gen_val, data_out, nb_classes, params['quick_test']) sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0]) doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1]) print( 'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n' .format(params['dropout_rate'], params['nb_cnn2d_filt'], params['f_pool_size'], params['t_pool_size'], params['rnn_size'], params['fnn_size'], params['doa_objective'])) print('Using loss weights : {}'.format(params['loss_weights'])) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], f_pool_size=params['f_pool_size'], t_pool_size=params['t_pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], weights=params['loss_weights'], doa_objective=params['doa_objective']) best_seld_metric = 99999 best_epoch = -1 patience_cnt = 0 nb_epoch = 2 if params['quick_test'] else params['nb_epochs'] seld_metric = np.zeros(nb_epoch) new_seld_metric = np.zeros(nb_epoch) tr_loss = np.zeros(nb_epoch) doa_metric = np.zeros((nb_epoch, 6)) sed_metric = np.zeros((nb_epoch, 2)) new_metric = np.zeros((nb_epoch, 4)) # start training for epoch_cnt in range(nb_epoch): start = time.time() # train once per epoch hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=2 if params['quick_test'] else data_gen_train.get_total_batches_in_data(), epochs=params['epochs_per_fit'], verbose=2, ) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] # predict once per peoch pred = model.predict_generator( generator=data_gen_val.generate(), steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(), verbose=2) sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5 doa_pred = evaluation_metrics.reshape_3Dto2D( pred[1] if params['doa_objective'] is 'mse' else pred[1][:, :, nb_classes:]) # Calculate the DCASE 2019 metrics - Detection-only and Localization-only scores sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores( sed_pred, sed_gt, data_gen_val.nb_frames_1s()) doa_metric[ epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr_xyz( doa_pred, doa_gt, sed_pred, sed_gt) seld_metric[epoch_cnt] = evaluation_metrics.early_stopping_metric( sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :]) # Calculate the DCASE 2020 metrics - Location-aware detection and Class-aware localization scores cls_new_metric = SELD_evaluation_metrics.SELDMetrics( nb_classes=data_gen_val.get_nb_classes(), doa_threshold=params['lad_doa_thresh']) pred_dict = feat_cls.regression_label_format_to_output_format( sed_pred, doa_pred) gt_dict = feat_cls.regression_label_format_to_output_format( sed_gt, doa_gt) pred_blocks_dict = feat_cls.segment_labels(pred_dict, sed_pred.shape[0]) gt_blocks_dict = feat_cls.segment_labels(gt_dict, sed_gt.shape[0]) cls_new_metric.update_seld_scores_xyz(pred_blocks_dict, gt_blocks_dict) new_metric[epoch_cnt, :] = cls_new_metric.compute_seld_scores() new_seld_metric[ epoch_cnt] = evaluation_metrics.early_stopping_metric( new_metric[epoch_cnt, :2], new_metric[epoch_cnt, 2:]) # Visualize the metrics with respect to epochs plot_functions(unique_name, tr_loss, sed_metric, doa_metric, seld_metric, new_metric, new_seld_metric) patience_cnt += 1 if new_seld_metric[epoch_cnt] < best_seld_metric: best_seld_metric = new_seld_metric[epoch_cnt] best_epoch = epoch_cnt model.save(model_name) patience_cnt = 0 print( 'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, ' '\n\t\t DCASE2019 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, FR:{:0.1f}, seld_score: {:0.2f}, ' '\n\t\t DCASE2020 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, DE_F:{:0.1f}, seld_score (early stopping score): {:0.2f}, ' 'best_seld_score: {:0.2f}, best_epoch : {}\n'.format( epoch_cnt, time.time() - start, tr_loss[epoch_cnt], sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1] * 100, doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1] * 100, seld_metric[epoch_cnt], new_metric[epoch_cnt, 0], new_metric[epoch_cnt, 1] * 100, new_metric[epoch_cnt, 2], new_metric[epoch_cnt, 3] * 100, new_seld_metric[epoch_cnt], best_seld_metric, best_epoch)) if patience_cnt > params['patience']: break avg_scores_val.append([ new_metric[best_epoch, 0], new_metric[best_epoch, 1], new_metric[best_epoch, 2], new_metric[best_epoch, 3], best_seld_metric ]) print('\nResults on validation split:') print('\tUnique_name: {} '.format(unique_name)) print('\tSaved model for the best_epoch: {}'.format(best_epoch)) print('\tSELD_score (early stopping score) : {}'.format( best_seld_metric)) print('\n\tDCASE2020 scores') print( '\tClass-aware localization scores: DOA_error: {:0.1f}, F-score: {:0.1f}' .format(new_metric[best_epoch, 2], new_metric[best_epoch, 3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(new_metric[best_epoch, 0], new_metric[best_epoch, 1] * 100)) print('\n\tDCASE2019 scores') print( '\tLocalization-only scores: DOA_error: {:0.1f}, Frame recall: {:0.1f}' .format(doa_metric[best_epoch, 0], doa_metric[best_epoch, 1] * 100)) print( '\tDetection-only scores: Error rate: {:0.2f}, F-score: {:0.1f}\n'. format(sed_metric[best_epoch, 0], sed_metric[best_epoch, 1] * 100)) # ------------------ Calculate metric scores for unseen test split --------------------------------- print( '\nLoading the best model and predicting results on the testing split' ) print('\tLoading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( params=params, split=split, shuffle=False, per_file=params['dcase_output'], is_eval=True if params['mode'] is 'eval' else False) model = keras_model.load_seld_model('{}_model.h5'.format(unique_name), params['doa_objective']) pred_test = model.predict_generator( generator=data_gen_test.generate(), steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), verbose=2) test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5 test_doa_pred = evaluation_metrics.reshape_3Dto2D( pred_test[1] if params['doa_objective'] is 'mse' else pred_test[1][:, :, nb_classes:]) if params['dcase_output']: # Dump results in DCASE output format for calculating final scores dcase_dump_folder = os.path.join( params['dcase_dir'], '{}_{}_{}'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.create_folder(dcase_dump_folder) print('Dumping recording-wise results in: {}'.format( dcase_dump_folder)) test_filelist = data_gen_test.get_filelist() # Number of frames for a 60 second audio with 20ms hop length = 3000 frames max_frames_with_content = data_gen_test.get_nb_frames() # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with # zero padding in the remaining frames frames_per_file = data_gen_test.get_frame_per_file() for file_cnt in range(test_sed_pred.shape[0] // frames_per_file): output_file = os.path.join( dcase_dump_folder, test_filelist[file_cnt].replace('.npy', '.csv')) dc = file_cnt * frames_per_file output_dict = feat_cls.regression_label_format_to_output_format( test_sed_pred[dc:dc + max_frames_with_content, :], test_doa_pred[dc:dc + max_frames_with_content, :]) data_gen_test.write_output_format_file(output_file, output_dict) if params['mode'] is 'dev': test_data_in, test_data_out = data_gen_test.get_data_sizes() test_gt = collect_test_labels(data_gen_test, test_data_out, nb_classes, params['quick_test']) test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0]) test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1]) # Calculate DCASE2019 scores test_sed_loss = evaluation_metrics.compute_sed_scores( test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s()) test_doa_loss = evaluation_metrics.compute_doa_scores_regr_xyz( test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt) test_metric_loss = evaluation_metrics.early_stopping_metric( test_sed_loss, test_doa_loss) # Calculate DCASE2020 scores cls_new_metric = SELD_evaluation_metrics.SELDMetrics( nb_classes=data_gen_test.get_nb_classes(), doa_threshold=20) test_pred_dict = feat_cls.regression_label_format_to_output_format( test_sed_pred, test_doa_pred) test_gt_dict = feat_cls.regression_label_format_to_output_format( test_sed_gt, test_doa_gt) test_pred_blocks_dict = feat_cls.segment_labels( test_pred_dict, test_sed_pred.shape[0]) test_gt_blocks_dict = feat_cls.segment_labels( test_gt_dict, test_sed_gt.shape[0]) cls_new_metric.update_seld_scores_xyz(test_pred_blocks_dict, test_gt_blocks_dict) test_new_metric = cls_new_metric.compute_seld_scores() test_new_seld_metric = evaluation_metrics.early_stopping_metric( test_new_metric[:2], test_new_metric[2:]) avg_scores_test.append([ test_new_metric[0], test_new_metric[1], test_new_metric[2], test_new_metric[3], test_new_seld_metric ]) print('Results on test split:') print('\tDCASE2020 Scores') print( '\tClass-aware localization scores: DOA Error: {:0.1f}, F-score: {:0.1f}' .format(test_new_metric[2], test_new_metric[3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(test_new_metric[0], test_new_metric[1] * 100)) print('\tSELD (early stopping metric): {:0.2f}'.format( test_new_seld_metric)) print('\n\tDCASE2019 Scores') print( '\tLocalization-only scores: DOA Error: {:0.1f}, Frame recall: {:0.1f}' .format(test_doa_loss[0], test_doa_loss[1] * 100)) print( '\tDetection-only scores:Error rate: {:0.2f}, F-score: {:0.1f}' .format(test_sed_loss[0], test_sed_loss[1] * 100))
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 second input: task_id - (optional) To chose the system configuration in parameters.py. (default) uses default parameters if len(argv) != 4: logger.info('\n\n') logger.info('-------------------------------------------------------------------------------------------------------') logger.info('The code expected three inputs') logger.info('\t>> python seld.py <job-id> <train-test> <task-id>') logger.info('\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') logger.info('\t\t<task-id> is used to choose the user-defined parameter set from parameter.py') logger.info('Using default inputs for now') logger.info('-------------------------------------------------------------------------------------------------------') logger.info('\n\n') """ job_id = 1 if len(argv) < 2 else argv[1] # use parameter set defined by user task_id = '1' if len(argv) < 3 else argv[2] params = parameter.get_params(task_id) isTraining = True if len(argv) < 4 else (True if argv[3] == 'train' else False) logger.info(f"isTraining {isTraining}") log_dir_name = None if len(argv) < 5 else argv[4] if not log_dir_name and not isTraining: raise ValueError("Specify log_dir if evaluation mode") model_dir = os.path.join(os.pardir, 'models') if isTraining: utils.create_folder(model_dir) unique_name = '{}_ov{}_train{}_val{}_{}'.format( params['dataset'], list_to_string(params['overlap']), list_to_string(params['train_split']), list_to_string(params['val_split']), job_id) if not isTraining: unique_name = job_id logger.info(f"unique_name: {unique_name}") dnn_type = 'QTCN' if params['use_quaternions'] else params['recurrent_type'] if not log_dir_name: log_dir_name = "-".join([dnn_type, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")]) logger.info(f"log_dir_name: {log_dir_name}") log_dir = os.path.join(model_dir, unique_name, log_dir_name) logger.info(f"log_dir: {log_dir}") if isTraining: utils.create_folder(log_dir) utils.setup_logger(log_dir, console_logger_level=logging.INFO) logger.info(f"log_dir {log_dir}") logger.info("unique_name: {}\n".format(unique_name)) data_gen_train = None data_gen_val = None data_gen_test = None if isTraining: load_files_train_splitting_point = None if params['train_val_split'] == 1.0 else 'before' data_gen_train = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['train_split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], debug_load_single_batch=params['debug_load_single_batch'], data_format=params['data_format'], params=params, load_files_before_after_splitting_point=load_files_train_splitting_point ) if not params['quick_test']: load_files_val_splitting_point = None if params['train_val_split'] == 1.0 else 'after' data_gen_val = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['val_split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], shuffle=False, debug_load_single_batch=params['debug_load_single_batch'], data_format=params['data_format'], params=params, load_files_before_after_splitting_point=load_files_val_splitting_point ) else: import copy data_gen_val = copy.deepcopy(data_gen_train) logger.warning(f"Quick test, validation set is a deep copy of training set.") else: data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['test_split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='test', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], shuffle=False, debug_load_single_batch=params['debug_load_single_batch'], data_format=params['data_format'], params=params ) data_gen_for_shapes = data_gen_train if isTraining else data_gen_test data_in, data_out = data_gen_for_shapes.get_data_sizes() logger.info( 'FEATURES:\n' '\tdata_in: {}\n' '\tdata_out: {}\n'.format( data_in, data_out ) ) logger.info( 'MODEL:\n' '\tdropout_rate: {}\n' '\tCNN: nb_cnn_filt: {}, pool_size{}\n' '\trnn_size: {}, fnn_size: {}\n'.format( params['dropout_rate'], params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'], params['rnn_size'], params['fnn_size'] ) ) network.set_global_num_classes(params) keras.backend.set_image_data_format(params['data_format']) logger.info(f"Data format set to {params['data_format']}") model = None if isTraining: if params['use_quaternions']: assert (params['data_format'] == 'channels_last') if params['use_giusenso']: assert (params['data_format'] == 'channels_first') model = keras_model_giusenso.get_model_giusenso(data_in, data_out, params['dropout_rate'], params['nb_cnn2d_filt'], params['pool_size'], params['fnn_size'], params['loss_weights']) else: model = network.get_model(input_shape=data_in, output_shape=data_out, dropout_rate=params['dropout_rate'], pool_size=params['pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], weights=params['loss_weights'], data_format=params['data_format'], params=params) model_path = os.path.join(log_dir, 'model') logger.info(f"model_path {model_path}") if os.path.exists(model_path): logger.info(f"Loading pretrained model from {model_path}") model = network.load_seld_model(model_path, params['doa_objective']) else: if not isTraining: raise FileNotFoundError(f"test mode but model was not found at {os.path.abspath(model_path)}") try: dot_img_file = os.path.join(log_dir, 'model_plot.png') keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True) except ImportError: logger.warning(f"Failed to import pydot, skip plotting") if isTraining: utils.copy_source_code(log_dir) train(model, data_gen_train, data_gen_val, params, log_dir=log_dir, unique_name=unique_name) else: evaluate(model, data_gen_test, params, log_dir=log_dir, unique_name=unique_name)
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 """ if len(argv) != 3: print('\n\n') print( '-------------------------------------------------------------------------------------------------------' ) print('The code expected two optional inputs') print('\t>> python seld.py <task-id> <job-id>') print( '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py' ) print('Using default inputs for now') print( '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print( '-------------------------------------------------------------------------------------------------------' ) print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 2 else argv[1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 3 else argv[-1] train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': test_splits = [1, 2, 3, 4] val_splits = [2, 3, 4, 1] train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]] # SUGGESTION: Considering the long training time, major tuning of the method can be done on the first split. # Once you finlaize the method you can evaluate its performance on the complete cross-validation splits # test_splits = [1] # val_splits = [2] # train_splits = [[3, 4]] elif params['mode'] == 'eval': test_splits = [0] val_splits = [1] train_splits = [[2, 3, 4]] avg_scores_val = [] avg_scores_test = [] for split_cnt, split in enumerate(test_splits): print( '\n\n---------------------------------------------------------------------------------------------------' ) print( '------------------------------------ SPLIT {} -----------------------------------------------' .format(split)) print( '---------------------------------------------------------------------------------------------------' ) # Unique name for the run cls_feature_class.create_folder(params['model_dir']) unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id, params['dataset'], params['mode'], split) unique_name = os.path.join(params['model_dir'], unique_name) model_name = '{}_model.h5'.format(unique_name) print("unique_name: {}\n".format(unique_name)) # Load train and validation data print('Loading training dataset:') data_gen_train = cls_data_generator.DataGenerator( dataset=params['dataset'], split=train_splits[split_cnt], batch_size=params['batch_size'], seq_len=params['sequence_length'], feat_label_dir=params['feat_label_dir']) print('Loading validation dataset:') data_gen_val = cls_data_generator.DataGenerator( dataset=params['dataset'], split=val_splits[split_cnt], batch_size=params['batch_size'], seq_len=params['sequence_length'], feat_label_dir=params['feat_label_dir'], shuffle=False) # Collect the reference labels for validation data data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format( data_in, data_out)) gt = collect_test_labels(data_gen_val, data_out, params['quick_test']) sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0]) doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1]) # rescaling the reference elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose nb_classes = data_gen_train.get_nb_classes() def_elevation = data_gen_train.get_default_elevation() doa_gt[:, nb_classes:] = doa_gt[:, nb_classes:] / (180. / def_elevation) print( 'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, pool_size{}\n\trnn_size: {}, fnn_size: {}\n' .format(params['dropout_rate'], params['nb_cnn2d_filt'], params['pool_size'], params['rnn_size'], params['fnn_size'])) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], weights=params['loss_weights']) best_seld_metric = 99999 best_epoch = -1 patience_cnt = 0 seld_metric = np.zeros(params['nb_epochs']) tr_loss = np.zeros(params['nb_epochs']) val_loss = np.zeros(params['nb_epochs']) doa_metric = np.zeros((params['nb_epochs'], 6)) sed_metric = np.zeros((params['nb_epochs'], 2)) nb_epoch = 2 if params['quick_test'] else params['nb_epochs'] # start training for epoch_cnt in range(nb_epoch): start = time.time() # train once per epoch hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=2 if params['quick_test'] else data_gen_train.get_total_batches_in_data(), validation_data=data_gen_val.generate(), validation_steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(), epochs=params['epochs_per_fit'], verbose=2) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] val_loss[epoch_cnt] = hist.history.get('val_loss')[-1] # predict once per peoch pred = model.predict_generator( generator=data_gen_val.generate(), steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(), verbose=2) # Calculate the metrics sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5 doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1]) # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose doa_pred[:, nb_classes:] = doa_pred[:, nb_classes:] / (180. / def_elevation) sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores( sed_pred, sed_gt, data_gen_val.nb_frames_1s()) doa_metric[ epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr( doa_pred, doa_gt, sed_pred, sed_gt) seld_metric[epoch_cnt] = evaluation_metrics.compute_seld_metric( sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :]) # Visualize the metrics with respect to epochs plot_functions(unique_name, tr_loss, val_loss, sed_metric, doa_metric, seld_metric) patience_cnt += 1 if seld_metric[epoch_cnt] < best_seld_metric: best_seld_metric = seld_metric[epoch_cnt] best_epoch = epoch_cnt model.save(model_name) patience_cnt = 0 print( 'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, ' 'ER_overall: %.2f, F1_overall: %.2f, ' 'doa_error_pred: %.2f, good_pks_ratio:%.2f, ' 'seld_score: %.2f, best_seld_score: %.2f, best_epoch : %d\n' % (epoch_cnt, time.time() - start, tr_loss[epoch_cnt], val_loss[epoch_cnt], sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1], doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1], seld_metric[epoch_cnt], best_seld_metric, best_epoch)) if patience_cnt > params['patience']: break avg_scores_val.append([ sed_metric[best_epoch, 0], sed_metric[best_epoch, 1], doa_metric[best_epoch, 0], doa_metric[best_epoch, 1], best_seld_metric ]) print('\nResults on validation split:') print('\tUnique_name: {} '.format(unique_name)) print('\tSaved model for the best_epoch: {}'.format(best_epoch)) print('\tSELD_score: {}'.format(best_seld_metric)) print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format( doa_metric[best_epoch, 0], doa_metric[best_epoch, 1])) print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format( sed_metric[best_epoch, 0], sed_metric[best_epoch, 1])) # ------------------ Calculate metric scores for unseen test split --------------------------------- print('Loading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], split=split, batch_size=params['batch_size'], seq_len=params['sequence_length'], feat_label_dir=params['feat_label_dir'], shuffle=False, per_file=params['dcase_output'], is_eval=True if params['mode'] is 'eval' else False) print( '\nLoading the best model and predicting results on the testing split' ) model = load_model('{}_model.h5'.format(unique_name)) pred_test = model.predict_generator( generator=data_gen_test.generate(), steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), verbose=2) test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5 test_doa_pred = evaluation_metrics.reshape_3Dto2D(pred_test[1]) # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose test_doa_pred[:, nb_classes:] = test_doa_pred[:, nb_classes:] / ( 180. / def_elevation) if params['dcase_output']: # Dump results in DCASE output format for calculating final scores dcase_dump_folder = os.path.join( params['dcase_dir'], '{}_{}_{}'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.create_folder(dcase_dump_folder) print('Dumping recording-wise results in: {}'.format( dcase_dump_folder)) test_filelist = data_gen_test.get_filelist() # Number of frames for a 60 second audio with 20ms hop length = 3000 frames max_frames_with_content = data_gen_test.get_nb_frames() # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with # zero padding in the remaining frames frames_per_file = data_gen_test.get_frame_per_file() for file_cnt in range(test_sed_pred.shape[0] // frames_per_file): output_file = os.path.join( dcase_dump_folder, test_filelist[file_cnt].replace('.npy', '.csv')) dc = file_cnt * frames_per_file output_dict = evaluation_metrics.regression_label_format_to_output_format( data_gen_test, test_sed_pred[dc:dc + max_frames_with_content, :], test_doa_pred[dc:dc + max_frames_with_content, :] * 180 / np.pi) evaluation_metrics.write_output_format_file( output_file, output_dict) if params['mode'] is 'dev': test_data_in, test_data_out = data_gen_test.get_data_sizes() test_gt = collect_test_labels(data_gen_test, test_data_out, params['quick_test']) test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0]) test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1]) # rescaling the reference elevation from [-180 180] to [-def_elevation def_elevation] for scoring purpose test_doa_gt[:, nb_classes:] = test_doa_gt[:, nb_classes:] / ( 180. / def_elevation) test_sed_loss = evaluation_metrics.compute_sed_scores( test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s()) test_doa_loss = evaluation_metrics.compute_doa_scores_regr( test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt) test_metric_loss = evaluation_metrics.compute_seld_metric( test_sed_loss, test_doa_loss) avg_scores_test.append([ test_sed_loss[0], test_sed_loss[1], test_doa_loss[0], test_doa_loss[1], test_metric_loss ]) print('Results on test split:') print('\tSELD_score: {}, '.format(test_metric_loss)) print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format( test_doa_loss[0], test_doa_loss[1])) print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format( test_sed_loss[0], test_sed_loss[1])) print('\n\nValidation split scores per fold:\n') for cnt in range(len(val_splits)): print( '\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}' .format(cnt, avg_scores_val[cnt][0], avg_scores_val[cnt][1], avg_scores_val[cnt][2], avg_scores_val[cnt][3], avg_scores_val[cnt][4])) if params['mode'] is 'dev': print('\n\nTesting split scores per fold:\n') for cnt in range(len(val_splits)): print( '\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}' .format(cnt, avg_scores_test[cnt][0], avg_scores_test[cnt][1], avg_scores_test[cnt][2], avg_scores_test[cnt][3], avg_scores_test[cnt][4]))
def main(argv): task_id = '1' if len(argv) < 2 else argv[1] params = parameter.get_params(task_id) train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': # test_splits = [1, 2, 3, 4] # val_splits = [2, 3, 4, 1] # train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]] # TODO for debug only test_splits = [1] val_splits = [1] train_splits = [[1, 1]] # SUGGESTION: Considering the long training time, major tuning of the method can be done on the first split. # Once you finlaize the method you can evaluate its performance on the complete cross-validation splits # test_splits = [1] # val_splits = [2] # train_splits = [[3, 4]] elif params['mode'] == 'eval': test_splits = [0] val_splits = [1] train_splits = [[2, 3, 4]] # ------------------ Calculate metric scores for unseen test split --------------------------------- print('Loading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], split=split, batch_size=params['batch_size'], seq_len=params['sequence_length'], feat_label_dir=params['feat_label_dir'], shuffle=False, per_file=params['dcase_output'], is_eval=True if params['mode'] is 'eval' else False) # print('\nLoading the best model and predicting results on the testing split') # model = load_model('{}_model.h5'.format(unique_name)) # pred_test = model.predict_generator( # generator=data_gen_test.generate(), # steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), # verbose=2 # ) test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5 test_doa_pred = evaluation_metrics.reshape_3Dto2D(pred_test[1]) # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose test_doa_pred[:, nb_classes:] = test_doa_pred[:, nb_classes:] / ( 180. / def_elevation) if params['dcase_output']: # Dump results in DCASE output format for calculating final scores dcase_dump_folder = os.path.join( params['dcase_dir'], '{}_{}_{}'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.create_folder(dcase_dump_folder) print( 'Dumping recording-wise results in: {}'.format(dcase_dump_folder)) test_filelist = data_gen_test.get_filelist() # Number of frames for a 60 second audio with 20ms hop length = 3000 frames max_frames_with_content = data_gen_test.get_nb_frames() # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with # zero padding in the remaining frames frames_per_file = data_gen_test.get_frame_per_file() for file_cnt in range(test_sed_pred.shape[0] // frames_per_file): output_file = os.path.join( dcase_dump_folder, test_filelist[file_cnt].replace('.npy', '.csv')) dc = file_cnt * frames_per_file output_dict = evaluation_metrics.regression_label_format_to_output_format( data_gen_test, test_sed_pred[dc:dc + max_frames_with_content, :], test_doa_pred[dc:dc + max_frames_with_content, :] * 180 / np.pi) evaluation_metrics.write_output_format_file( output_file, output_dict) if params['mode'] is 'dev': _, _, test_data_out = data_gen_test.get_data_sizes() test_gt = collect_test_labels(data_gen_test, test_data_out, params['quick_test']) test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0]) test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1]) # rescaling the reference elevation from [-180 180] to [-def_elevation def_elevation] for scoring purpose test_doa_gt[:, nb_classes:] = test_doa_gt[:, nb_classes:] / ( 180. / def_elevation) test_sed_loss = evaluation_metrics.compute_sed_scores( test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s()) test_doa_loss = evaluation_metrics.compute_doa_scores_regr( test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt) test_metric_loss = evaluation_metrics.compute_seld_metric( test_sed_loss, test_doa_loss) avg_scores_test.append([ test_sed_loss[0], test_sed_loss[1], test_doa_loss[0], test_doa_loss[1], test_metric_loss ]) print('Results on test split:') print('\tSELD_score: {}, '.format(test_metric_loss)) print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format( test_doa_loss[0], test_doa_loss[1])) print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format( test_sed_loss[0], test_sed_loss[1]))
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 second input: task_id - (optional) To chose the system configuration in parameters.py. (default) uses default parameters """ if len(argv) != 3: print('\n\n') print( '-------------------------------------------------------------------------------------------------------' ) print('The code expected two inputs') print('\t>> python seld.py <job-id> <task-id>') print( '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print( '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py' ) print('Using default inputs for now') print( '-------------------------------------------------------------------------------------------------------' ) print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 3 else argv[-1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 2 else argv[1] model_dir = 'models/' utils.create_folder(model_dir) unique_name = '{}_ov{}_split{}_{}{}_3d{}_{}'.format( params['dataset'], params['overlap'], params['split'], params['mode'], params['weakness'], int(params['cnn_3d']), job_id) unique_name = os.path.join(model_dir, unique_name) print("unique_name: {}\n".format(unique_name)) data_gen_train = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only']) data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='test', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], shuffle=False) data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n' '\tdata_in: {}\n' '\tdata_out: {}\n'.format(data_in, data_out)) gt = collect_test_labels(data_gen_test, data_out, params['mode'], params['quick_test']) sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0]) doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1]) print('MODEL:\n' '\tdropout_rate: {}\n' '\tCNN: nb_cnn_filt: {}, pool_size{}\n' '\trnn_size: {}, fnn_size: {}\n'.format( params['dropout_rate'], params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'], params['rnn_size'], params['fnn_size'])) # TPU CODE FOR GOOGLE COLABORATORY resolver = tf.distribute.cluster_resolver.TPUClusterResolver( tpu='grpc://' + os.environ['COLAB_TPU_ADDR']) tf.config.experimental_connect_to_cluster(resolver) # This is the TPU initialization code that has to be at the beginning. tf.tpu.experimental.initialize_tpu_system(resolver) print("All devices: ", tf.config.list_logical_devices('TPU')) strategy = tf.distribute.experimental.TPUStrategy(resolver) with strategy.scope(): # Load or create model model = utils.load_model(unique_name) if model is None: model = keras_model.get_model( data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], classification_mode=params['mode'], weights=params['loss_weights']) model.summary() best_metric = 99999 conf_mat = None best_conf_mat = None best_epoch = -1 patience_cnt = 0 epoch_metric_loss = np.zeros(params['nb_epochs']) sed_score = np.zeros(params['nb_epochs']) doa_score = np.zeros(params['nb_epochs']) tr_loss = np.zeros(params['nb_epochs']) val_loss = np.zeros(params['nb_epochs']) doa_loss = np.zeros((params['nb_epochs'], 6)) sed_loss = np.zeros((params['nb_epochs'], 2)) for epoch_cnt in range(params['nb_epochs']): start = time.time() hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=5 if params['quick_test'] else data_gen_train.get_total_batches_in_data(), validation_data=data_gen_test.generate(), validation_steps=5 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), use_multiprocessing=False, epochs=1, verbose=1) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] val_loss[epoch_cnt] = hist.history.get('val_loss')[-1] pred = model.predict_generator( generator=data_gen_test.generate(), steps=5 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), use_multiprocessing=False, verbose=2) print("pred:", pred[1].shape) if params['mode'] == 'regr': sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5 doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1]) sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores( sed_pred, sed_gt, data_gen_test.nb_frames_1s()) if params['azi_only']: doa_loss[ epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy( doa_pred, doa_gt, sed_pred, sed_gt) else: doa_loss[ epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz( doa_pred, doa_gt, sed_pred, sed_gt) # epoch_metric_loss[epoch_cnt] = np.mean([ # sed_loss[epoch_cnt, 0], # 1-sed_loss[epoch_cnt, 1], # 2*np.arcsin(doa_loss[epoch_cnt, 1]/2.0)/np.pi, # 1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0]))] # ) sed_score[epoch_cnt] = np.mean( [sed_loss[epoch_cnt, 0], 1 - sed_loss[epoch_cnt, 1]]) doa_score[epoch_cnt] = np.mean([ 2 * np.arcsin(doa_loss[epoch_cnt, 1] / 2.0) / np.pi, 1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0])) ]) #plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss, epoch_metric_loss) plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss, sed_score, doa_score) patience_cnt += 1 # if epoch_metric_loss[epoch_cnt] < best_metric: # best_metric = epoch_metric_loss[epoch_cnt] # best_conf_mat = conf_mat # best_epoch = epoch_cnt # model.save('{}_model.h5'.format(unique_name)) # patience_cnt = 0 if sed_score[epoch_cnt] < best_metric: best_metric = sed_score[epoch_cnt] best_conf_mat = conf_mat best_epoch = epoch_cnt model.save('{}_model.h5'.format(unique_name)) patience_cnt = 0 print( 'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, ' 'F1_overall: %.2f, ER_overall: %.2f, ' 'doa_error_gt: %.2f, doa_error_pred: %.2f, good_pks_ratio:%.2f, ' 'sed_score: %.2f, doa_score: %.2f, best_error_metric: %.2f, best_epoch : %d' % (epoch_cnt, time.time() - start, tr_loss[epoch_cnt], val_loss[epoch_cnt], sed_loss[epoch_cnt, 1], sed_loss[epoch_cnt, 0], doa_loss[epoch_cnt, 1], doa_loss[epoch_cnt, 2], doa_loss[epoch_cnt, 5] / float(sed_gt.shape[0]), sed_score[epoch_cnt], doa_score[epoch_cnt], best_metric, best_epoch)) #plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss, sed_score, doa_score, epoch_cnt) print('best_conf_mat : {}'.format(best_conf_mat)) print('best_conf_mat_diag : {}'.format(np.diag(best_conf_mat))) print('saved model for the best_epoch: {} with best_metric: {}, '.format( best_epoch, best_metric)) print( 'DOA Metrics: doa_loss_gt: {}, doa_loss_pred: {}, good_pks_ratio: {}'. format(doa_loss[best_epoch, 1], doa_loss[best_epoch, 2], doa_loss[best_epoch, 5] / float(sed_gt.shape[0]))) print('SED Metrics: ER_overall: {}, F1_overall: {}'.format( sed_loss[best_epoch, 0], sed_loss[best_epoch, 1])) print('unique_name: {} '.format(unique_name))
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 second input: task_id - (optional) To chose the system configuration in parameters.py. (default) uses default parameters """ if len(argv) != 3: print('\n\n') print('-------------------------------------------------------------------------------------------------------') print('The code expected two inputs') print('\t>> python seld.py <job-id> <task-id>') print('\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print('\t\t<task-id> is used to choose the user-defined parameter set from parameter.py') print('Using default inputs for now') print('-------------------------------------------------------------------------------------------------------') print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 3 else argv[-1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 2 else argv[1] model_dir = 'models/' utils.create_folder(model_dir) unique_name = '{}_train{}_validation{}_seq{}'.format(params['dataset'], params['train_split'], params['val_split'], params['sequence_length']) unique_name = os.path.join(model_dir, unique_name) print("unique_name: {}\n".format(unique_name)) # Cycling over overlaps for ov in range(1, params['overlap']+1): data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], ov_num=ov, split=params['test_split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='test', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], shuffle=False ) data_in, data_out = data_gen_test.get_data_sizes() n_classes = data_out[0][2] print( 'FEATURES:\n' '\tdata_in: {}\n' '\tdata_out: {}\n'.format( data_in, data_out ) ) gt = collect_test_labels(data_gen_test, data_out, params['mode'], params['quick_test']) sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0]) doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1]) print("#### Saving DOA and SED GT Values ####") f = open("models/doa_gt.txt", "w+") for elem in doa_gt: f.write(str(list(elem)) + "\n") f.close() f = open("models/sed_gt.txt", "w+") for elem in sed_gt: f.write(str(elem)+"\n") f.close() print("######################################") print( 'MODEL:\n' '\tdropout_rate: {}\n' '\tCNN: nb_cnn_filt: {}, pool_size{}\n' '\trnn_size: {}, fnn_size: {}\n'.format( params['dropout_rate'], params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'], params['rnn_size'], params['fnn_size'] ) ) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], classification_mode=params['mode'], weights=params['loss_weights'], summary=False) if(os.path.exists('{}_model.ckpt'.format(unique_name))): print("Model found!") model.load_weights('{}_model.ckpt'.format(unique_name)) for i in range(10): print("###") sed_score = np.zeros(params['nb_epochs']) doa_score = np.zeros(params['nb_epochs']) seld_score = np.zeros(params['nb_epochs']) tr_loss = np.zeros(params['nb_epochs']) val_loss = np.zeros(params['nb_epochs']) doa_loss = np.zeros((params['nb_epochs'], 6)) sed_loss = np.zeros((params['nb_epochs'], 2)) epoch_cnt = 0 start = time.time() print("#### Prediction on validation split ####") pred = model.predict_generator( generator=data_gen_test.generate(), steps=params['quick_test_steps'] if params['quick_test'] else data_gen_test.get_total_batches_in_data(), use_multiprocessing=False, workers=1, verbose=1, ) print("##########################") #print("pred:", pred[1].shape) if params['mode'] == 'regr': sed_pred = np.array(evaluation_metrics.reshape_3Dto2D(pred[0])) > .5 doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1]) print("#### Saving DOA and SED Pred Values ####") f = open("models/doa_pred.txt", "w+") for elem in doa_pred: f.write(str(list(elem)) + "\n") f.close() f = open("models/sed_pred.txt", "w+") for elem in sed_pred: f.write(str(elem)+"\n") f.close() print("########################################") # Old version of confidence intervals ''' # Computing confidence intervals sed_err = sed_gt - sed_pred [sed_conf_low, sed_conf_up, sed_median] = compute_confidence(sed_err) # print("Condidence Interval for SED error is [" + str(sed_conf_low) + ", " + str(sed_conf_up) + "]") print("Confidence Interval for SED error is [ %f, %f ]" % (sed_conf_low, sed_conf_up)) # print("\tMedian is " + str(sed_median)) print("\tMedian is %f" % (sed_median)) # print("\tDisplacement: +/- " + str(sed_conf_up - sed_median)) print("\tDisplacement: +/- %f" % (sed_conf_up - sed_median)) doa_err = doa_gt - doa_pred [doa_conf_low, doa_conf_up, doa_median] = compute_confidence(doa_err) # print("Condidence Interval for DOA is [" + str(doa_conf_low) + ", " + str(doa_conf_up) + "]") print("Confidence Interval for DOA is [ %f, %f ]" % (doa_conf_low, doa_conf_up)) # print("Median is " + str(doa_median)) print("\tMedian is %f" % (doa_median)) # print("Displacement: +/- " + str(doa_conf_up - doa_median)) print("\tDisplacement: +/- %f" % (doa_conf_up - doa_median)) # ------------------------------ ''' sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(sed_pred, sed_gt, data_gen_test.nb_frames_1s()) if params['azi_only']: doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy(doa_pred, doa_gt, sed_pred, sed_gt) else: doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz(doa_pred, doa_gt, sed_pred, sed_gt) sed_score[epoch_cnt] = np.mean([sed_loss[epoch_cnt, 0], 1 - sed_loss[epoch_cnt, 1]]) doa_score[epoch_cnt] = np.mean([2 * np.arcsin(doa_loss[epoch_cnt, 1] / 2.0) / np.pi, 1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0]))]) seld_score[epoch_cnt] = (sed_score[epoch_cnt] + doa_score[epoch_cnt]) / 2 if os.path.isdir('./models'): plot.imshow(conf_mat, cmap='binary', interpolation='None') plot.savefig('models/confusion_matrix.jpg') # New confidence computation, differing doa and sed errors sed_err = sed_loss[epoch_cnt, 0] [sed_conf_low, sed_conf_up] = compute_confidence(sed_err, sed_pred.shape[0]) print("Confidence Interval for SED error is [ %f, %f ]" % (sed_conf_low, sed_conf_up)) doa_err = doa_gt - doa_pred [x_err, y_err, z_err] = compute_doa_confidence(doa_err, n_classes) print('epoch_cnt: %d, time: %.2fs, tr_loss: %.4f, val_loss: %.4f, ' 'F1_overall: %.2f, ER_overall: %.2f, ' 'doa_error_gt: %.2f, doa_error_pred: %.2f, good_pks_ratio:%.2f, ' 'sed_score: %.4f, doa_score: %.4f, seld_score: %.4f' % ( epoch_cnt, time.time() - start, tr_loss[epoch_cnt], val_loss[epoch_cnt], sed_loss[epoch_cnt, 1], sed_loss[epoch_cnt, 0], doa_loss[epoch_cnt, 1], doa_loss[epoch_cnt, 2], doa_loss[epoch_cnt, 5] / float(sed_gt.shape[0]), sed_score[epoch_cnt], doa_score[epoch_cnt], seld_score[epoch_cnt] ) ) simple_plotter.plot_3d("models/doa_gt.txt", "models/doa_pred.txt", 0, 11, 200) simple_plotter.plot_confidence(x_err, y_err, z_err, "ov"+str(ov))
# Extracts the features, labels, and normalizes the training and test split features. Make sure you update the location # of the downloaded datasets before in the cls_feature_class.py import cls_feature_class import cls_feature_extr import parameter params = parameter.get_params('1') dataset_name = params['dataset'] dataset_dir = params['dataset_dir'] feat_label_dir = params['feat_label_dir'] if(dataset_name == "foa"): # -------------- Extract features and labels for development set ----------------------------- dev_feat_cls = cls_feature_extr.FeatureClass(dataset=dataset_name, dataset_dir=dataset_dir, feat_label_dir=feat_label_dir) # Extract features and normalize them dev_feat_cls.extract_all_feature() dev_feat_cls.preprocess_features() # Extract labels in regression mode dev_feat_cls.extract_all_labels() else: # Extracts feature and labels for all overlap and splits for ovo in [2]: # SE overlap for splito in [1]: # all splits. Use [1, 8, 9] for 'real' dataset for nffto in [512]: feat_cls = cls_feature_class.FeatureClass(ov=ovo, split=splito, nfft=nffto, dataset=dataset_name)
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 second input: task_id - (optional) To chose the system configuration in parameters.py. (default) uses default parameters """ if len(argv) != 3: print('\n\n') print('-------------------------------------------------------------------------------------------------------') print('The code expected two inputs') print('\t>> python seld.py <job-id> <task-id>') print('\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print('\t\t<task-id> is used to choose the user-defined parameter set from parameter.py') print('Using default inputs for now') print('-------------------------------------------------------------------------------------------------------') print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 3 else argv[-1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 2 else argv[1] model_dir = 'models/' utils.create_folder(model_dir) unique_name = '{}_train{}_validation{}_seq{}'.format(params['dataset'], params['train_split'], params['val_split'], params['sequence_length']) unique_name = os.path.join(model_dir, unique_name) print("unique_name: {}\n".format(unique_name)) data_gen_train = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['train_split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'] ) data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['val_split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='validation', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], shuffle=False ) data_in, data_out = data_gen_train.get_data_sizes() #n_classes = data_out[0][2] print( 'FEATURES:\n' '\tdata_in: {}\n' '\tdata_out: {}\n'.format( data_in, data_out ) ) gt = collect_test_labels(data_gen_test, data_out, params['mode'], params['quick_test']) sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0]) doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1]) print( 'MODEL:\n' '\tdropout_rate: {}\n' '\tCNN: nb_cnn_filt: {}, pool_size{}\n' '\trnn_size: {}, fnn_size: {}\n'.format( params['dropout_rate'], params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'], params['rnn_size'], params['fnn_size'] ) ) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], classification_mode=params['mode'], weights=params['loss_weights'], summary=True) if (os.path.exists('{}_model.ckpt'.format(unique_name))): print("Model found!") model.load_weights('{}_model.ckpt'.format(unique_name)) for i in range(10): print("###") best_metric = 99999 conf_mat = None best_conf_mat = None best_epoch = -1 patience_cnt = 0 epoch_metric_loss = np.zeros(params['nb_epochs']) sed_score = np.zeros(params['nb_epochs']) doa_score = np.zeros(params['nb_epochs']) seld_score = np.zeros(params['nb_epochs']) tr_loss = np.zeros(params['nb_epochs']) val_loss = np.zeros(params['nb_epochs']) doa_loss = np.zeros((params['nb_epochs'], 6)) sed_loss = np.zeros((params['nb_epochs'], 2)) for epoch_cnt in range(params['nb_epochs']): start = time.time() print("##### Training the model #####") hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=params['quick_test_steps'] if params[ 'quick_test'] else data_gen_train.get_total_batches_in_data(), validation_data=data_gen_test.generate(), validation_steps=params['quick_test_steps'] if params[ 'quick_test'] else data_gen_test.get_total_batches_in_data(), use_multiprocessing=False, workers=1, epochs=1, verbose=1 ) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] val_loss[epoch_cnt] = hist.history.get('val_loss')[-1] print("##########################") # Save, get model and re-load weights for the predict_generator bug print("##### Saving weights #####") model.save_weights('{}_model.ckpt'.format(unique_name)) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], classification_mode=params['mode'], weights=params['loss_weights'], summary=False) model.load_weights('{}_model.ckpt'.format(unique_name)) print("##########################") print("#### Prediction on validation split ####") pred = model.predict_generator( generator=data_gen_test.generate(), steps=params['quick_test_steps'] if params['quick_test'] else data_gen_test.get_total_batches_in_data(), use_multiprocessing=False, workers=1, verbose=1 ) print("########################################") # print("pred:",pred[1].shape) if params['mode'] == 'regr': sed_pred = np.array(evaluation_metrics.reshape_3Dto2D(pred[0])) > .5 doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1]) # Old confidence intervals ''' sed_err = sed_gt - sed_pred [sed_conf_low, sed_conf_up, sed_median] = compute_confidence(sed_err) # print("Condidence Interval for SED error is [" + str(sed_conf_low) + ", " + str(sed_conf_up) + "]") print("Confidence Interval for SED error is [ %.5f, %.5f ]" % (sed_conf_low, sed_conf_up)) # print("\tMedian is " + str(sed_median)) print("\tMedian is %.5f" % (sed_median)) # print("\tDisplacement: +/- " + str(sed_conf_up - sed_median)) print("\tDisplacement: +/- %.5f" % (sed_conf_up - sed_median)) doa_err = doa_gt - doa_pred [doa_conf_low, doa_conf_up, doa_median] = compute_confidence(doa_err) # print("Condidence Interval for DOA is [" + str(doa_conf_low) + ", " + str(doa_conf_up) + "]") print("Confidence Interval for DOA is [ %.5f, %.5f ]" % (doa_conf_low, doa_conf_up)) # print("Median is " + str(doa_median)) print("\tMedian is %.5f" % (doa_median)) # print("Displacement: +/- " + str(doa_conf_up - doa_median)) print("\tDisplacement: +/- %.5f" % (doa_conf_up - doa_median)) ''' sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(sed_pred, sed_gt, data_gen_test.nb_frames_1s()) if params['azi_only']: doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy(doa_pred, doa_gt, sed_pred, sed_gt) else: doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz(doa_pred, doa_gt, sed_pred, sed_gt) sed_score[epoch_cnt] = np.mean([sed_loss[epoch_cnt, 0], 1 - sed_loss[epoch_cnt, 1]]) doa_score[epoch_cnt] = np.mean([2 * np.arcsin(doa_loss[epoch_cnt, 1] / 2.0) / np.pi, 1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0]))]) seld_score[epoch_cnt] = (sed_score[epoch_cnt] + doa_score[epoch_cnt]) / 2 if os.path.isdir('./models'): plot.imshow(conf_mat, cmap='binary', interpolation='None') plot.savefig('models/confusion_matrix.jpg') # New confidence computation, differing doa and sed errors sed_err = sed_loss[epoch_cnt, 0] [sed_conf_low, sed_conf_up] = compute_confidence(sed_err, sed_pred.shape[0]) print("Confidence Interval for SED error is [ %f, %f ]" % (sed_conf_low, sed_conf_up)) #doa_err = doa_gt - doa_pred #[x_err, y_err, z_err] = compute_doa_confidence(doa_err, n_classes) plot_array = [tr_loss[epoch_cnt], # 0 val_loss[epoch_cnt], # 1 sed_loss[epoch_cnt][0], # 2 er sed_loss[epoch_cnt][1], # 3 f1 doa_loss[epoch_cnt][0], # 4 avg_accuracy doa_loss[epoch_cnt][1], # 5 doa_loss_gt doa_loss[epoch_cnt][2], # 6 doa_loss_pred doa_loss[epoch_cnt][3], # 7 doa_loss_gt_cnt doa_loss[epoch_cnt][4], # 8 doa_loss_pred_cnt doa_loss[epoch_cnt][5], # 9 good_frame_cnt sed_score[epoch_cnt], # 10 doa_score[epoch_cnt], seld_score[epoch_cnt], #doa_conf_low, doa_median, #doa_conf_up, sed_conf_low, #sed_median, sed_conf_up] sed_conf_low, sed_conf_up] patience_cnt += 1 # model.save_weights('{}_model.ckpt'.format(unique_name)) simple_plotter.save_array_to_csv("{}_plot.csv".format(unique_name), plot_array) #simple_plotter.plot_confidence(x_err, y_err, z_err, "ov") print("##### Model and metrics saved! #####") if seld_score[epoch_cnt] < best_metric: best_metric = seld_score[epoch_cnt] best_conf_mat = conf_mat best_epoch = epoch_cnt # Now we save the model at every iteration model.save_weights('{}_BEST_model.ckpt'.format(unique_name)) patience_cnt = 0 print('epoch_cnt: %d, time: %.2fs, tr_loss: %.4f, val_loss: %.4f, ' 'F1_overall: %.2f, ER_overall: %.2f, ' 'doa_error_gt: %.2f, doa_error_pred: %.2f, good_pks_ratio:%.2f, ' 'sed_score: %.4f, doa_score: %.4f, seld_score: %.4f, best_error_metric: %.2f, best_epoch : %d' % ( epoch_cnt, time.time() - start, tr_loss[epoch_cnt], val_loss[epoch_cnt], sed_loss[epoch_cnt, 1], sed_loss[epoch_cnt, 0], doa_loss[epoch_cnt, 1], doa_loss[epoch_cnt, 2], doa_loss[epoch_cnt, 5] / float(sed_gt.shape[0]), sed_score[epoch_cnt], doa_score[epoch_cnt], seld_score[epoch_cnt], best_metric, best_epoch ) ) # plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss, sed_score, doa_score, epoch_cnt) print('best_conf_mat : {}'.format(best_conf_mat)) print('best_conf_mat_diag : {}'.format(np.diag(best_conf_mat))) print('saved model for the best_epoch: {} with best_metric: {}, '.format(best_epoch, best_metric)) print('DOA Metrics: doa_loss_gt: {}, doa_loss_pred: {}, good_pks_ratio: {}'.format( doa_loss[best_epoch, 1], doa_loss[best_epoch, 2], doa_loss[best_epoch, 5] / float(sed_gt.shape[0]))) print('SED Metrics: ER_overall: {}, F1_overall: {}'.format(sed_loss[best_epoch, 0], sed_loss[best_epoch, 1])) print('unique_name: {} '.format(unique_name))
def __init__(self, dataset='ansim', ov=3, split=1, nfft=1024, db=30, wav_extra_name='', desc_extra_name=''): # TODO: Change the path according to your machine. # TODO: It should point to a folder which consists of sub-folders for audio and metada params = parameter.get_params('1') dataset_dir = params['dataset_dir'] if dataset == 'ansim': self._base_folder = 'ansim' elif dataset == 'resim': #self._base_folder = os.path.join('/proj/asignal/TUT_SELD/', 'doa_data_echoic/') self._base_folder = 'resim' elif dataset == 'cansim': self._base_folder = os.path.join('/proj/asignal/TUT_SELD/', 'doa_circdata/') elif dataset == 'cresim': self._base_folder = os.path.join('/proj/asignal/TUT_SELD/', 'doa_circdata_echoic/') elif dataset == 'real': self._base_folder = 'real' #self._base_folder = os.path.join('/proj/asignal/TUT_SELD/', 'tut_seld_data/') elif dataset == 'foa': self._base_folder = dataset_dir # Input directories if dataset == 'foa': self._aud_dir = os.path.join( self._base_folder, 'foa_dev/wav_ov{}_split{}'.format(ov, split, db, wav_extra_name)) self._desc_dir = os.path.join( self._base_folder, 'metadata_dev/desc_ov{}_split{}{}'.format( ov, split, desc_extra_name)) else: self._aud_dir = os.path.join( self._base_folder, 'wav_ov{}_split{}_{}db{}'.format(ov, split, db, wav_extra_name)) self._desc_dir = os.path.join( self._base_folder, 'desc_ov{}_split{}{}'.format(ov, split, desc_extra_name)) # Output directories self._label_dir = None self._feat_dir = None self._feat_dir_norm = None # Local parameters self._mode = None self._ov = ov self._split = split self._db = db self._nfft = nfft self._win_len = self._nfft self._hop_len = self._nfft / 2 self._dataset = dataset self._eps = np.spacing(np.float(1e-16)) # If circular-array 8 channels else 4 for Ambisonic if 'c' in self._dataset: self._nb_channels = 8 else: self._nb_channels = 4 # Sound event classes dictionary self._unique_classes = dict() if 'real' in self._dataset: # Urbansound8k sound events self._unique_classes = \ { '1': 0, '3': 1, '4': 2, '5': 3, '6': 4, '7': 5, '8': 6, '9': 7 } else: # DCASE 2016 Task 2 sound events self._unique_classes = \ { 'clearthroat': 2, 'cough': 8, 'doorslam': 9, 'drawer': 1, 'keyboard': 6, 'keysDrop': 4, 'knock': 0, 'laughter': 10, 'pageturn': 7, 'phone': 3, 'speech': 5 } self._fs = 48000 self._hop_len_s = self._nfft / 2.0 / self._fs self._nb_frames_1s = int(1 / self._hop_len_s) self._frame_res = self._fs / float(self._hop_len) self._resolution = 10 self._azi_list = range(-180, 180, self._resolution) self._length = len(self._azi_list) #CNG self._ele_list = range(-40, 50, self._resolution) self._height = len(self._ele_list) self._weakness = None # For regression task only self._default_azi = 180 #CNG self._default_ele = 50 if self._default_azi in self._azi_list: print( 'ERROR: chosen default_azi value {} should not exist in azi_list' .format(self._default_azi)) exit() if self._default_ele in self._ele_list: print( 'ERROR: chosen default_ele value {} should not exist in ele_list' .format(self._default_ele)) exit() self._audio_max_len_samples = 60 * self._fs # TODO: Fix the audio synthesis code to always generate 30s of # audio. Currently it generates audio till the last active sound event, which is not always 30s long. This is a # quick fix to overcome that. We need this because, for processing and training we need the length of features # to be fixed. self._max_frames = int( np.ceil((self._audio_max_len_samples - self._win_len) / float(self._hop_len)))
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 second input: task_id - (optional) To chose the system configuration in parameters.py. (default) uses default parameters """ if len(argv) != 3: print('\n\n') print('-------------------------------------------------------------------------------------------------------') print('The code expected two inputs') print('\t>> python seld.py <job-id> <task-id>') print('\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print('\t\t<task-id> is used to choose the user-defined parameter set from parameter.py') print('Using default inputs for now') print('-------------------------------------------------------------------------------------------------------') print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 3 else argv[-1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 2 else argv[1] model_dir = 'models/' utils.create_folder(model_dir) unique_name = '{}_ov{}_split{}_{}{}_3d{}_{}'.format( params['dataset'], params['overlap'], params['split'], params['mode'], params['weakness'], int(params['cnn_3d']), job_id ) unique_name = os.path.join(model_dir, unique_name) print("unique_name: {}\n".format(unique_name)) data_gen_train = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'] ) data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='test', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], shuffle=False ) data_in, data_out = data_gen_train.get_data_sizes() print( 'FEATURES:\n' '\tdata_in: {}\n' '\tdata_out: {}\n'.format( data_in, data_out ) ) gt = collect_test_labels(data_gen_test, data_out, params['mode'], params['quick_test']) sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0]) doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1]) print( 'MODEL:\n' '\tdropout_rate: {}\n' '\tCNN: nb_cnn_filt: {}, pool_size{}\n' '\trnn_size: {}, fnn_size: {}\n'.format( params['dropout_rate'], params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'], params['rnn_size'], params['fnn_size'] ) ) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], classification_mode=params['mode'], weights=params['loss_weights'], loader=False, loader2=False) # Change loader to True to enable transfer learning, Change loader2 to True to enable transfer learning with different labels best_metric = 99999 conf_mat = None best_conf_mat = None best_epoch = -1 patience_cnt = 0 epoch_metric_loss = np.zeros(params['nb_epochs']) tr_loss = np.zeros(params['nb_epochs']) val_loss = np.zeros(params['nb_epochs']) doa_loss = np.zeros((params['nb_epochs'], 6)) sed_loss = np.zeros((params['nb_epochs'], 2)) nb_epoch = 2 if params['quick_test'] else params['nb_epochs'] for epoch_cnt in range(nb_epoch): start = time.time() hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=2 if params['quick_test'] else data_gen_train.get_total_batches_in_data(), validation_data=data_gen_test.generate(), validation_steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), epochs=1, verbose=0 ) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] val_loss[epoch_cnt] = hist.history.get('val_loss')[-1] pred = model.predict_generator( generator=data_gen_test.generate(), steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), verbose=2 ) if params['mode'] == 'regr': sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5 doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1]) sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(sed_pred, sed_gt, data_gen_test.nb_frames_1s()) if params['azi_only']: doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy(doa_pred, doa_gt, sed_pred, sed_gt) else: doa_loss[epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz(doa_pred, doa_gt, sed_pred, sed_gt) epoch_metric_loss[epoch_cnt] = np.mean([ sed_loss[epoch_cnt, 0], 1-sed_loss[epoch_cnt, 1], 2*np.arcsin(doa_loss[epoch_cnt, 1]/2.0)/np.pi, 1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0]))] ) plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss, epoch_metric_loss) patience_cnt += 1 if epoch_metric_loss[epoch_cnt] < best_metric: best_metric = epoch_metric_loss[epoch_cnt] best_conf_mat = conf_mat best_epoch = epoch_cnt model.save('{}_model.h5'.format(unique_name)) patience_cnt = 0 print( 'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, ' 'F1_overall: %.2f, ER_overall: %.2f, ' 'doa_error_gt: %.2f, doa_error_pred: %.2f, good_pks_ratio:%.2f, ' 'error_metric: %.2f, best_error_metric: %.2f, best_epoch : %d' % ( epoch_cnt, time.time() - start, tr_loss[epoch_cnt], val_loss[epoch_cnt], sed_loss[epoch_cnt, 1], sed_loss[epoch_cnt, 0], doa_loss[epoch_cnt, 1], doa_loss[epoch_cnt, 2], doa_loss[epoch_cnt, 5] / float(sed_gt.shape[0]), epoch_metric_loss[epoch_cnt], best_metric, best_epoch ) ) if patience_cnt > params['patience']: break print('best_conf_mat : {}'.format(best_conf_mat)) print('best_conf_mat_diag : {}'.format(np.diag(best_conf_mat))) print('saved model for the best_epoch: {} with best_metric: {}, '.format(best_epoch, best_metric)) print('DOA Metrics: doa_loss_gt: {}, doa_loss_pred: {}, good_pks_ratio: {}'.format( doa_loss[best_epoch, 1], doa_loss[best_epoch, 2], doa_loss[best_epoch, 5] / float(sed_gt.shape[0]))) print('SED Metrics: ER_overall: {}, F1_overall: {}'.format(sed_loss[best_epoch, 1], sed_loss[best_epoch, 0])) print('unique_name: {} '.format(unique_name))
class_active]: error_string += '{}: {} - {} | '.format( class_active, label[batch, frame, class_active], output[batch, frame, class_active]) is_err = True if label[batch, frame, class_active]: count += 1 if is_err: count_frame_err += 1 if count >= 2: count_overlap_err += 1 return error_string, count_overlap_err, count_frame_err # parameter params = get_params('4') with open('config/evaluate_config.yaml', 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) for key, value in config.items(): print(f"{key:20} {value}") # Get data from fold 1 data_eval = get_data(kind_data='valid', params=params, fold_set=config['fold_set']) # Get model model = get_model(model_name=config['model_name'], input_shape=config['input_shape'], params=params)
if not os.path.isdir(os.path.abspath(out_path)): os.makedirs(os.path.abspath(out_path)) if not os.path.isdir(os.path.abspath(checkpoint_path)): os.makedirs(os.path.abspath(checkpoint_path)) evaluate_every = FLAGS.evaluate_every seq_len = FLAGS.seq_len #learning schedule scheduler = dict(learning_rate=FLAGS.learning_rate, decay_rate=FLAGS.decay_rate, warmup_epoch=10, schedule=[200, 600, 1000, 9000, 9500], training_epoch=FLAGS.training_epoch) params = parameter.get_params(str(FLAGS.task_id)) feat_cls = cls_feature_class.FeatureClass(params) train_splits, train_check_splits, val_splits, test_splits = None, None, None, None if params['mode'] == 'dev': test_splits = [1] val_splits = [2] train_splits = [3, 4, 5, 6] elif params['mode'] == 'eval': test_splits = [7, 8] val_splits = [] train_splits = [1, 2, 3, 4, 5, 6] iseval = (params['mode'] == 'eval')
def main(args): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 second input: task_id - (optional) To chose the system configuration in parameters.py. (default) uses default parameters """ # use parameter set defined by user task_id = args.params params = parameter.get_params(task_id) job_id = args.model_name model_dir = 'models/' + args.author + '/' if args.author != "" else 'models/' utils.create_folder(model_dir) unique_name = '{}_ov{}_split{}_{}{}_3d{}_{}'.format( params['dataset'], params['overlap'], params['split'], params['mode'], params['weakness'], int(params['cnn_3d']), job_id) model_name = unique_name epoch_manager = JSON_Manager(args.author, unique_name) logdir = "logs/" + args.author + "/" + unique_name unique_name = os.path.join(model_dir, unique_name) print("unique_name: {}\n".format(unique_name)) session = tf.InteractiveSession() file_writer = tf.summary.FileWriter(logdir, session.graph) data_gen_train = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only']) data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='test', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], shuffle=False) data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n' '\tdata_in: {}\n' '\tdata_out: {}\n'.format(data_in, data_out)) gt = collect_test_labels(data_gen_test, data_out, params['mode'], params['quick_test']) sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0]) doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1]) print('MODEL:\n' '\tdropout_rate: {}\n' '\tCNN: nb_cnn_filt: {}, pool_size{}\n' '\trnn_size: {}, fnn_size: {}\n'.format( params['dropout_rate'], params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'], params['rnn_size'], params['fnn_size'])) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], classification_mode=params['mode'], weights=params['loss_weights']) initial = epoch_manager.get_epoch() if initial != 0: print(f"Resume training from epoch {initial}") print("Loading already trained model...") # In order to load custom layers we need to link the references to the custom objects model = load_model(os.path.join(model_dir, model_name + "_model.h5"), custom_objects={ 'QuaternionConv2D': QuaternionConv2D, 'QuaternionGRU': QuaternionGRU, 'QuaternionDense': QuaternionDense }) best_metric = epoch_manager.get_best_metric() best_std = epoch_manager.get_best_std() conf_mat = None best_conf_mat = epoch_manager.get_best_conf_mat() best_epoch = epoch_manager.get_best_epoch() patience_cnt = epoch_manager.get_patience_cnt() epoch_metric_loss = np.zeros(params['nb_epochs']) sed_score = np.zeros(params['nb_epochs']) std_score = np.zeros(params['nb_epochs']) doa_score = np.zeros(params['nb_epochs']) seld_score = np.zeros(params['nb_epochs']) tr_loss = np.zeros(params['nb_epochs']) val_loss = np.zeros(params['nb_epochs']) doa_loss = np.zeros((params['nb_epochs'], 6)) sed_loss = np.zeros((params['nb_epochs'], 2)) time_hold = tf.placeholder(tf.float32, shape=None, name='time_summary') time_summ = tf.summary.scalar('time', time_hold) tr_loss_hold = tf.placeholder(tf.float32, shape=None, name='tr_loss_summary') tr_loss_summ = tf.summary.scalar('tr_loss', tr_loss_hold) val_loss_hold = tf.placeholder(tf.float32, shape=None, name='val_loss_summary') val_loss_summ = tf.summary.scalar('val_loss', val_loss_hold) f1_hold = tf.placeholder(tf.float32, shape=None, name='f1_summary') f1_summ = tf.summary.scalar('F1_overall', f1_hold) er_hold = tf.placeholder(tf.float32, shape=None, name='er_summary') er_summ = tf.summary.scalar('ER_overall', er_hold) doa_error_gt_hold = tf.placeholder(tf.float32, shape=None, name='doa_error_gt_summary') doa_error_gt_summ = tf.summary.scalar('doa_error_gt', doa_error_gt_hold) doa_error_pred_hold = tf.placeholder(tf.float32, shape=None, name='doa_error_pred_summary') doa_error_pred_summ = tf.summary.scalar('doa_error_pred', doa_error_pred_hold) good_pks_hold = tf.placeholder(tf.float32, shape=None, name='good_pks_summary') good_pks_summ = tf.summary.scalar('good_pks_ratio', good_pks_hold) sed_score_hold = tf.placeholder(tf.float32, shape=None, name='sed_score_summary') sed_score_summ = tf.summary.scalar('sed_score', sed_score_hold) doa_score_hold = tf.placeholder(tf.float32, shape=None, name='doa_score_summary') doa_score_summ = tf.summary.scalar('doa_score', doa_score_hold) seld_score_hold = tf.placeholder(tf.float32, shape=None, name='seld_score_summary') seld_score_summ = tf.summary.scalar('seld_score', seld_score_hold) std_score_hold = tf.placeholder(tf.float32, shape=None, name='std_score_summary') std_score_summ = tf.summary.scalar('std_score', std_score_hold) best_error_metric_hold = tf.placeholder(tf.float32, shape=None, name='best_error_metric_summary') best_error_metric_summ = tf.summary.scalar('best_error_metric', best_error_metric_hold) best_epoch_hold = tf.placeholder(tf.float32, shape=None, name='best_epoch_summary') best_epoch_summ = tf.summary.scalar('best_epoch', best_epoch_hold) best_std_hold = tf.placeholder(tf.float32, shape=None, name='best_std_summary') best_std_summ = tf.summary.scalar('best_std', best_std_hold) merged = tf.summary.merge_all() for epoch_cnt in range(initial, params['nb_epochs']): start = time.time() hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=5 if params['quick_test'] else data_gen_train.get_total_batches_in_data(), validation_data=data_gen_test.generate(), validation_steps=5 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), use_multiprocessing=False, epochs=1, verbose=1) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] val_loss[epoch_cnt] = hist.history.get('val_loss')[-1] pred = model.predict_generator( generator=data_gen_test.generate(), steps=5 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), use_multiprocessing=False, verbose=2) print("pred:", pred[1].shape) if params['mode'] == 'regr': sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5 doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1]) sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores( sed_pred, sed_gt, data_gen_test.nb_frames_1s()) if params['azi_only']: doa_loss[ epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy( doa_pred, doa_gt, sed_pred, sed_gt) else: doa_loss[ epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz( doa_pred, doa_gt, sed_pred, sed_gt) sed_score[epoch_cnt] = np.mean( [sed_loss[epoch_cnt, 0], 1 - sed_loss[epoch_cnt, 1]]) doa_score[epoch_cnt] = np.mean([ 2 * np.arcsin(doa_loss[epoch_cnt, 1] / 2.0) / np.pi, 1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0])) ]) seld_score[epoch_cnt] = np.mean( [sed_score[epoch_cnt], doa_score[epoch_cnt]]) # standard deviation std_score[epoch_cnt] = np.std( [sed_score[epoch_cnt], doa_score[epoch_cnt]]) plot_functions(unique_name, tr_loss, val_loss, sed_loss, doa_loss, sed_score, doa_score) patience_cnt += 1 epoch_manager.increase_patience_cnt() model.save('{}_model.h5'.format(unique_name)) if seld_score[epoch_cnt] < best_metric: best_metric = seld_score[epoch_cnt] epoch_manager.set_best_metric(best_metric) best_std = std_score[epoch_cnt] epoch_manager.set_best_std(best_std) best_conf_mat = conf_mat epoch_manager.set_best_conf_mat(conf_mat) best_epoch = epoch_cnt epoch_manager.set_best_epoch(best_epoch) model.save('{}_best_model.h5'.format(unique_name)) patience_cnt = 0 epoch_manager.reset_patience_cnt() if patience_cnt > params['patience']: print( f"\n---- PATIENCE TRIGGERED AFTER {epoch_cnt} EPOCHS ----\n") break summary = session.run(merged, feed_dict={ time_hold: time.time() - start, tr_loss_hold: tr_loss[epoch_cnt], val_loss_hold: val_loss[epoch_cnt], f1_hold: sed_loss[epoch_cnt, 1], er_hold: sed_loss[epoch_cnt, 0], doa_error_gt_hold: doa_loss[epoch_cnt, 1], doa_error_pred_hold: doa_loss[epoch_cnt, 2], good_pks_hold: doa_loss[epoch_cnt, 5] / float(sed_gt.shape[0]), sed_score_hold: sed_score[epoch_cnt], doa_score_hold: doa_score[epoch_cnt], seld_score_hold: seld_score[epoch_cnt], std_score_hold: std_score[epoch_cnt], best_error_metric_hold: best_metric, best_epoch_hold: best_epoch, best_std_hold: best_std }) file_writer.add_summary(summary, epoch_cnt) print( 'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, ' 'F1_overall: %.2f, ER_overall: %.2f, ' 'doa_error_gt: %.2f, doa_error_pred: %.2f, good_pks_ratio:%.2f, ' 'sed_score: %.2f, doa_score: %.2f, best_error_metric: %.2f, best_epoch : %d, best_std: %.2f' % (epoch_cnt, time.time() - start, tr_loss[epoch_cnt], val_loss[epoch_cnt], sed_loss[epoch_cnt, 1], sed_loss[epoch_cnt, 0], doa_loss[epoch_cnt, 1], doa_loss[epoch_cnt, 2], doa_loss[epoch_cnt, 5] / float(sed_gt.shape[0]), sed_score[epoch_cnt], doa_score[epoch_cnt], best_metric, best_epoch, best_std)) epoch_manager.increase_epoch() lower_confidence, upper_confidence = evaluation_metrics.compute_confidence_interval( best_metric, best_std, params['nb_epochs'], confid_coeff=1.96) # 1.96 for a 95% CI print("\n---- FINISHED TRAINING ----\n") print('best_conf_mat : {}'.format(best_conf_mat)) print('best_conf_mat_diag : {}'.format(np.diag(best_conf_mat))) print('saved model for the best_epoch: {} with best_metric: {}, '.format( best_epoch, best_metric)) print( 'DOA Metrics: doa_loss_gt: {}, doa_loss_pred: {}, good_pks_ratio: {}'. format(doa_loss[best_epoch, 1], doa_loss[best_epoch, 2], doa_loss[best_epoch, 5] / float(sed_gt.shape[0]))) print('SED Metrics: ER_overall: {}, F1_overall: {}'.format( sed_loss[best_epoch, 0], sed_loss[best_epoch, 1])) print('Confidence Interval: lower_interval: {}, upper_inteval: {}'.format( lower_confidence, upper_confidence)) print('unique_name: {} '.format(unique_name))
'.log') filehandler.setLevel(logging.DEBUG) filehandler.setFormatter(file_formatter) logger.addHandler(filehandler) if args.resume: checkpoint = torch.load(args.resume) params = checkpoint['params'] model = get_model(params['model']) net = model(cgnet_params=params['cgnet_params']).cuda() net.load_state_dict(checkpoint['model_state_dict']) criterion_sed = nn.BCEWithLogitsLoss( pos_weight=torch.FloatTensor([params['bce_weight']])).cuda() criterion_sed.load_state_dict(checkpoint['criterion_sed_state_dict']) else: params = parameter.get_params() model = get_model(params['model']) net = model(cgnet_params=params['cgnet_params']).cuda() criterion_sed = nn.BCEWithLogitsLoss( pos_weight=torch.FloatTensor([params['bce_weight']])).cuda() optimizer = torch.optim.Adam(net.parameters(), lr=params['learning_rate'], weight_decay=params['weight_decay']) if params['learning_rate_scheduling']: scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) logger.info('Parameters: ' + str(params)) shutil.copy2('./parameter.py', os.path.join('result', experiment_id + '_param.py'))
def main(args): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 second input: task_id - (optional) To chose the system configuration in parameters.py. (default) uses default parameters """ # use parameter set defined by user task_id = args.params params = parameter.get_params(task_id) job_id = args.model_name model_dir = 'models/' + args.author + '/' if args.author != "" else 'models/' utils.create_folder(model_dir) unique_name = '{}_ov{}_split{}_{}{}_3d{}_{}'.format( params['dataset'], params['overlap'], params['split'], params['mode'], params['weakness'], int(params['cnn_3d']), job_id) model_name = unique_name epoch_manager = JSON_Manager(args.author, unique_name) logdir = "logs/" + args.author + "/" + unique_name unique_name = os.path.join(model_dir, unique_name) print("unique_name: {}\n".format(unique_name)) data_gen_test = cls_data_generator_seld.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='test', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], shuffle=False) data_in, data_out = data_gen_test.get_data_sizes() print('FEATURES:\n' '\tdata_in: {}\n' '\tdata_out: {}\n'.format(data_in, data_out)) gt = collect_test_labels(data_gen_test, data_out, params['mode'], params['quick_test']) sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0]) doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1]) print('MODEL:\n' '\tdropout_rate: {}\n' '\tCNN: nb_cnn_filt: {}, pool_size{}\n' '\trnn_size: {}, fnn_size: {}\n'.format( params['dropout_rate'], params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'], params['rnn_size'], params['fnn_size'])) model = load_model(os.path.join(model_dir, model_name + "_best_model.h5"), custom_objects={ 'QuaternionConv2D': QuaternionConv2D, 'QuaternionGRU': QuaternionGRU, 'QuaternionDense': QuaternionDense }) model.summary() plot_model(model, to_file=os.path.join(model_dir, 'model.png')) best_metric = epoch_manager.get_best_metric() conf_mat = None best_conf_mat = epoch_manager.get_best_conf_mat() best_epoch = epoch_manager.get_best_epoch() patience_cnt = epoch_manager.get_patience_cnt() epoch_metric_loss = np.zeros(params['nb_epochs']) sed_score = np.zeros(params['nb_epochs']) std_score = np.zeros(params['nb_epochs']) doa_score = np.zeros(params['nb_epochs']) seld_score = np.zeros(params['nb_epochs']) tr_loss = np.zeros(params['nb_epochs']) val_loss = np.zeros(params['nb_epochs']) doa_loss = np.zeros((params['nb_epochs'], 6)) sed_loss = np.zeros((params['nb_epochs'], 2)) epoch_cnt = 0 pred = model.predict_generator( generator=data_gen_test.generate(), steps=data_gen_test.get_total_batches_in_data(), use_multiprocessing=False, verbose=2) print("pred[1]:", pred[1].shape) if params['mode'] == 'regr': sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5 print(f"sed_pred: {sed_pred.shape}") doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1]) print(f"doa_pred: {doa_pred.shape}") sed_loss[epoch_cnt, :] = evaluation_metrics.compute_sed_scores( sed_pred, sed_gt, data_gen_test.nb_frames_1s()) if params['azi_only']: doa_loss[ epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xy( doa_pred, doa_gt, sed_pred, sed_gt) else: doa_loss[ epoch_cnt, :], conf_mat = evaluation_metrics.compute_doa_scores_regr_xyz( doa_pred, doa_gt, sed_pred, sed_gt) sed_score[epoch_cnt] = np.mean( [sed_loss[epoch_cnt, 0], 1 - sed_loss[epoch_cnt, 1]]) print(f"ER: {sed_loss[epoch_cnt, 0]}") er = sed_loss[epoch_cnt, 0] interval = 1.96 * np.sqrt(((er) * (1 - er)) / sed_pred.shape[0]) print(f"interval: {interval}") doa_score[epoch_cnt] = np.mean([ 2 * np.arcsin(doa_loss[epoch_cnt, 1] / 2.0) / np.pi, 1 - (doa_loss[epoch_cnt, 5] / float(doa_gt.shape[0])) ]) seld_score[epoch_cnt] = np.mean( [sed_score[epoch_cnt], doa_score[epoch_cnt]]) doa_error = doa_pred - doa_gt doa_error = np.reshape(doa_error, newshape=(doa_error.shape[0], 11, 2)) doa_error = np.absolute(doa_error[:, :, 0]) print(f"doa_error: {doa_error.shape}") doa_error = np.reshape(doa_error, newshape=(doa_error.shape[0] * doa_error.shape[1])) print(f"doa_error: {doa_error.shape}") np.save(model_name + "_x", doa_error) doa_error = doa_pred - doa_gt doa_error = np.reshape(doa_error, newshape=(doa_error.shape[0], 11, 2)) doa_error = np.absolute(doa_error[:, :, 1]) print(f"doa_error: {doa_error.shape}") doa_error = np.reshape(doa_error, newshape=(doa_error.shape[0] * doa_error.shape[1])) print(f"doa_error: {doa_error.shape}") np.save(model_name + "_y", doa_error) # standard deviation std_score[epoch_cnt] = np.std( [sed_score[epoch_cnt], doa_score[epoch_cnt]]) print(f"{er-interval} / {er+interval}") #lower_confidence, upper_confidence = evaluation_metrics.compute_confidence_interval(best_metric,best_std, params['nb_epochs'], confid_coeff=1.96) # 1.96 for a 95% CI print("\n---- FINISHED ----\n")