def metrics(self, list_dict, submissions_dir, metadata_dir): write_submission(list_dict, submissions_dir) prediction_paths = [ os.path.join(submissions_dir, '{}.csv'.format(dict['name'])) for dict in list_dict ] metrics = calculate_metrics(metadata_dir, prediction_paths) for key in metrics.keys(): logging.info(' {:<20} {:.3f}'.format(key + ' :', metrics[key])) return metrics
def optimize_sed_with_gd(output_dict, submission_path, reference_csv_path, sed_params_dict, metric_type): """Optimize thresholds for SED. Args: output_dict: {'clipwise_output': (N, classes_num), 'framewise_output': (N, frames_num, classes_num)} submission_path: str reference_csv_path: str sed_params_dict: dict metric_type: 'f1' | 'er' Returns: metric: float sed_params_dict: dict, optimized thresholds """ predict_event_list = frame_prediction_to_event_prediction( output_dict, sed_params_dict) write_submission(predict_event_list, submission_path) results = official_evaluate(reference_csv_path, submission_path) metric = _get_metric(results, metric_type) print('Initial {}: {}'.format(metric_type, metric)) print('Running optimization on thresholds.') opt = Adam() opt.alpha = 2e-2 for i in range(10): grads = calculate_sed_gradient(output_dict, submission_path, reference_csv_path, sed_params_dict, metric_type) if metric_type == 'f1': grads = [-e for e in grads] elif metric_type == 'er': pass params = sed_dict_to_params(sed_params_dict) sed_params = opt.GetNewParams(params, grads) sed_params_dict = sed_params_to_dict(sed_params, sed_params_dict) predict_event_list = frame_prediction_to_event_prediction( output_dict, sed_params_dict) write_submission(predict_event_list, submission_path) results = official_evaluate(reference_csv_path, submission_path) metric = _get_metric(results, metric_type) print('******') print('Iteration: {}, {}: {}'.format(i, metric_type, metric)) return metric, sed_params_dict
def calculate_sed_gradient(output_dict, submission_path, reference_csv_path, sed_params_dict, metric_type): """Optimize thresholds for SED. Args: output_dict: {'clipwise_output': (N, classes_num), 'framewise_output': (N, frames_num, classes_num)} submission_path: str reference_csv_path: str sed_params_dict: dict metric_type: 'f1' | 'er' Returns: grads: vector """ predict_event_list = frame_prediction_to_event_prediction( output_dict, sed_params_dict) write_submission(predict_event_list, submission_path) results = official_evaluate(reference_csv_path, submission_path) value = _get_metric(results, metric_type) grads = [] params = sed_dict_to_params(sed_params_dict) for k, param in enumerate(params): print('Param index: {} / {}'.format(k, len(params))) new_params = params.copy() delta = 0.1 cnt = 0 while cnt < 3: cnt += 1 new_params[k] += delta new_params_dict = sed_params_to_dict(new_params, sed_params_dict) predict_event_list = frame_prediction_to_event_prediction( output_dict, new_params_dict) write_submission(predict_event_list, submission_path) results = official_evaluate(reference_csv_path, submission_path) new_value = _get_metric(results, metric_type) if new_value != value: break grad = (new_value - value) / (delta * cnt) grads.append(grad) return grads
def evaluate(self, data_type, metadata_dir, submissions_dir, max_validate_num=None): '''Evaluate the performance. Args: data_type: 'train' | 'validate' metadata_dir: string, directory of reference meta csvs submissions_dir: string: directory to write out submission csvs max_validate_num: None | int, maximum iteration to run to speed up evaluation ''' # Forward generate_func = self.data_generator.generate_validate( data_type=data_type, max_validate_num=max_validate_num) list_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) # Calculate loss (total_loss, event_loss, position_loss) = self.calculate_loss(list_dict) logging.info('{:<20} {}: {:.3f}, {}: {:.3f}, {}: {:.3f}' ''.format(data_type + ' statistics: ', 'total_loss', total_loss, 'event_loss', event_loss, 'position_loss', position_loss)) # Write out submission and evaluate using code provided by organizer write_submission(list_dict, submissions_dir) prediction_paths = [ os.path.join(submissions_dir, '{}.csv'.format(dict['name'])) for dict in list_dict ] statistics = calculate_metrics(metadata_dir, prediction_paths) for key in statistics.keys(): logging.info(' {:<20} {:.3f}'.format(key + ' :', statistics[key])) return statistics
def evaluate(self, data_loader, reference_csv_path, submission_path): """Evaluate AT and SED performance. Args: data_loader: object reference_csv_path: str, strongly labelled ground truth csv submission: str, path to write out submission file Returns: statistics: dict output_dict: dict """ output_dict = forward(model=self.model, data_loader=data_loader, return_input=False, return_target=True) statistics = {} # Clipwise statistics statistics['clipwise_ap'] = metrics.average_precision_score( output_dict['target'], output_dict['clipwise_output'], average=None) # Framewise statistics if 'strong_target' in output_dict.keys(): statistics['framewise_ap'] = sed_average_precision( output_dict['strong_target'], output_dict['framewise_output'], average=None) # Framewise predictions to eventwise predictions predict_event_list = frame_prediction_to_event_prediction( output_dict, self.sed_params_dict) # Write eventwise predictions to submission file write_submission(predict_event_list, submission_path) # SED with official tool statistics['sed_metrics'] = official_evaluate(reference_csv_path, submission_path) return statistics, output_dict
def evaluate(self, reference_csv_path, submission_path): """Evaluate AT and SED performance. Args: reference_csv_path: str, strongly labelled ground truth csv submission: str, path to write out submission file """ output_dict = forward( model=self.model, generator=self.generator, return_input=False, return_target=True) predictions = {'clipwise_output': output_dict['clipwise_output'], 'framewise_output': output_dict['framewise_output']} statistics = {} # Weak statistics clipwise_ap = metrics.average_precision_score( output_dict['target'], output_dict['clipwise_output'], average=None) statistics['clipwise_ap'] = clipwise_ap logging.info(' clipwise mAP: {:.3f}'.format(np.mean(clipwise_ap))) if 'strong_target' in output_dict.keys(): framewise_ap = sed_average_precision(output_dict['strong_target'], output_dict['framewise_output'], average=None) statistics['framewise_ap'] = framewise_ap logging.info(' framewise mAP: {:.3f}'.format(np.mean(framewise_ap))) # Obtain eventwise prediction frame framewise prediction using predefined thresholds predict_event_list = frame_prediction_to_event_prediction(output_dict, self.sed_params_dict) # Write predicted events to submission file write_submission(predict_event_list, submission_path) # SED with official tool results = official_evaluate(reference_csv_path, submission_path) logging.info(' {}'.format(results['overall']['error_rate'])) statistics['sed_metrics'] = results return statistics, predictions
def __call__(self, params): """Use hyper parameters to threshold prediction to obtain output. Then, the scores are calculated between output and target. """ params_dict = self.params_list_to_params_dict(params) # params_dict['n_smooth'] = 1 # params_dict['n_salt'] = 1 predict_event_list = frame_prediction_to_event_prediction( self.output_dict, params_dict) # Write predicted events to submission file write_submission(predict_event_list, self.submission_path) # SED with official tool results = official_evaluate(self.reference_csv_path, self.submission_path) f1 = results['overall']['f_measure']['f_measure'] return f1
def inference_evaluation(args): '''Inference on evaluation data and write out submission file. Args: subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'leaderboard' | 'evaluation' workspace: string, directory of workspace model_type: string, e.g. 'Cnn_9layers' iteration: int batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool ''' # Arugments & parameters subtask = args.subtask data_type = args.data_type workspace = args.workspace model_type = args.model_type iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename holdout_fold = 'none' mel_bins = config.mel_bins frames_per_second = config.frames_per_second in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) trained_sub_dir = get_subdir(subtask, 'development') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(trained_sub_dir)) checkpoint_path = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join(workspace, 'submissions', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations'.format(iteration), 'submission.csv') create_folder(os.path.dirname(submission_path)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = EvaluationDataGenerator( feature_hdf5_path=feature_hdf5_path, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_evaluation(data_type) # Inference output_dict = forward(model, generate_func, cuda, return_input=False, return_target=False) # Write submission write_submission(output_dict, subtask, data_type, submission_path)
def evaluate(self, data_type, metadata_path, submission_path, max_iteration=None): '''Write out submission file and evaluate the performance. Args: data_type: 'train' | 'validate' metadata_path: string, path of reference csv submission_path: string, path to write out submission max_iteration: None | int, maximum iteration to run to speed up evaluation ''' generate_func = self.data_generator.generate_validate( data_type=data_type, max_iteration=max_iteration) # Forward output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) # Evaluate audio tagging if 'weak_target' in output_dict: weak_target = output_dict['weak_target'] clipwise_output = output_dict['clipwise_output'] average_precision = metrics.average_precision_score( weak_target, clipwise_output, average=None) mAP = np.mean(average_precision) logging.info('{} statistics:'.format(data_type)) logging.info(' Audio tagging mAP: {:.3f}'.format(mAP)) statistics = {} statistics['average_precision'] = average_precision if 'strong_target' in output_dict: # Write out submission file write_submission(output_dict, self.sed_params_dict, submission_path) # Evaluate SED with official tools reference_dict = read_csv_file_for_sed_eval_tool(metadata_path) predict_dict = read_csv_file_for_sed_eval_tool(submission_path) # Event & segment based metrics event_based_metric = sed_eval.sound_event.EventBasedMetrics( event_label_list=config.labels, evaluate_onset=True, evaluate_offset=True, t_collar=0.200, percentage_of_length=0.2) segment_based_metric = sed_eval.sound_event.SegmentBasedMetrics( event_label_list=config.labels, time_resolution=0.2) for audio_name in output_dict['audio_name']: if audio_name in reference_dict.keys(): ref_list = reference_dict[audio_name] else: ref_list = [] if audio_name in predict_dict.keys(): pred_list = predict_dict[audio_name] else: pred_list = [] event_based_metric.evaluate(ref_list, pred_list) segment_based_metric.evaluate(ref_list, pred_list) event_metrics = event_based_metric.results_class_wise_average_metrics( ) f_measure = event_metrics['f_measure']['f_measure'] error_rate = event_metrics['error_rate']['error_rate'] deletion_rate = event_metrics['error_rate']['deletion_rate'] insertion_rate = event_metrics['error_rate']['insertion_rate'] statistics['event_metrics'] = { 'f_measure': f_measure, 'error_rate': error_rate, 'deletion_rate': deletion_rate, 'insertion_rate': insertion_rate } logging.info(' Event-based, classwise F score: {:.3f}, ER: ' '{:.3f}, Del: {:.3f}, Ins: {:.3f}'.format( f_measure, error_rate, deletion_rate, insertion_rate)) segment_metrics = segment_based_metric.results_class_wise_average_metrics( ) f_measure = segment_metrics['f_measure']['f_measure'] error_rate = segment_metrics['error_rate']['error_rate'] deletion_rate = segment_metrics['error_rate']['deletion_rate'] insertion_rate = segment_metrics['error_rate']['insertion_rate'] statistics['segment_metrics'] = { 'f_measure': f_measure, 'error_rate': error_rate, 'deletion_rate': deletion_rate, 'insertion_rate': insertion_rate } logging.info(' Segment based, classwise F score: {:.3f}, ER: ' '{:.3f}, Del: {:.3f}, Ins: {:.3f}'.format( f_measure, error_rate, deletion_rate, insertion_rate)) if self.verbose: logging.info(event_based_metric) logging.info(segment_based_metric) return statistics
model.load_weights(checkpoint_path) summary_path = os.path.join(SUMMARY_PATH, 'model_{}'.format(num_folds)) mkdirp(summary_path) callbacks = [EarlyStopping(monitor='val_loss', patience=1, verbose=0, mode='auto'), ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=0, save_best_only=True, mode='auto'), TensorBoard(log_dir=summary_path, histogram_freq=0)] model.fit(X_train, y_train, batch_size=BATCH_SIZE, nb_epoch=NB_EPOCHS, shuffle=True, verbose=1, validation_data=(X_valid, y_valid), callbacks=callbacks) predictions_valid = model.predict(X_valid, batch_size=100, verbose=1) score_valid = log_loss(y_valid, predictions_valid) scores_total.append(score_valid) print('Score: {}'.format(score_valid)) predictions_test = model.predict(X_test, batch_size=100, verbose=1) predictions_total.append(predictions_test) num_folds += 1 score_geom = calc_geom(scores_total, MAX_FOLDS) predictions_geom = calc_geom_arr(predictions_total, MAX_FOLDS) submission_path = os.path.join(SUMMARY_PATH, 'submission_{}_{:.2}.csv'.format(int(time.time()), score_geom)) write_submission(predictions_geom, X_test_ids, submission_path)
wait += 1 print( 'Validation loss did not improve for {}/{} epochs.'.format( wait, patience)) if wait == 2: print( 'Stopping early. Validation loss did not improve for {}/{} epochs.' .format(wait, patience)) break model.summary_writer.close() scores_total.append(score) print('Begin evaluation...') predictions = model.evaluate(X_test) predictions_total.append(predictions) num_folds += 1 score_geom = calc_geom(scores_total, num_folds) predictions_geom = calc_geom_arr(predictions_total, num_folds) print('Writing submission for {} folds, score: {}...'.format( num_folds, score_geom)) submission_dest = os.path.join( SUMMARY_PATH, 'submission_{}_{}.csv'.format(int(time.time()), score_geom)) write_submission(predictions_geom, X_test_ids, submission_dest) print('Done.')
def inference_test(args): '''Inference and calculate metrics on validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' model_type: string, e.g. 'Cnn_9layers_AvgPooling' iteration: int, load model of this iteration batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool, visualize the logmel spectrogram of segments ''' # Arugments & parameters dataset_dir = DATASET_DIR workspace = WORKSPACE train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type model_type = args.model_type iteration = args.iteration batch_size = args.batch_size resume = args.resume cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename holdout_fold = args.holdout_fold # Use model trained on full data without validation mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' test_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'test.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') if not resume: checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join( workspace, 'submissions', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations_submission.csv' ''.format(iteration)) create_folder(os.path.dirname(submission_path)) else: checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, 'resume', '{}_iterations.pth'.format(iteration)) submission_path = os.path.join( workspace, 'submissions', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, 'resume', '{}_iterations_submission.csv' ''.format(iteration)) create_folder(os.path.dirname(submission_path)) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if model_type == 'cbam_ResNet18': model = Model(18, classes_num * 2, 'CBAM') else: model = Model(classes_num * 2) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = TestDataGenerator( test_feature_hdf5_path=test_feature_hdf5_path, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_test() # Results of segments output_dict = forward_infer(model=model, generate_func=generate_func, cuda=cuda) # Results of audio recordings result_dict = segment_prediction_to_clip_prediction(output_dict, average='arithmetic') # Write submission write_submission(result_dict, submission_path)
def calculate_metrics(args): """Calculate metrics. Args: dataset_dir: str workspace: str holdout_fold: '1' model_type: str, e.g., 'Cnn_9layers_Gru_FrameAtt' loss_type: str, e.g., 'clip_bce' augmentation: str, e.g., 'mixup' batch_size: int iteration: int data_type: 'test' | 'evaluate' at_thresholds: bool sed_thresholds: bool """ # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace filename = args.filename holdout_fold = args.holdout_fold model_type = args.model_type loss_type = args.loss_type augmentation = args.augmentation batch_size = args.batch_size iteration = args.iteration data_type = args.data_type at_thresholds = args.at_thresholds sed_thresholds = args.sed_thresholds classes_num = config.classes_num # Paths if data_type == 'test': reference_csv_path = os.path.join(dataset_dir, 'metadata', 'groundtruth_strong_label_testing_set.csv') elif data_type == 'evaluate': reference_csv_path = os.path.join(dataset_dir, 'metadata', 'groundtruth_strong_label_evaluation_set.csv') prediction_path = os.path.join(workspace, 'predictions', '{}'.format(filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '{}_iterations.prediction.{}.pkl'.format(iteration, data_type)) tmp_submission_path = os.path.join(workspace, '_tmp_submission', '{}'.format(filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '_submission.csv') # Load thresholds if at_thresholds: at_thresholds_path = os.path.join(workspace, 'opt_thresholds', '{}'.format(filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '{}_iterations.at.test.pkl'.format(iteration)) at_thresholds = pickle.load(open(at_thresholds_path, 'rb')) else: at_thresholds = [0.3] * classes_num if sed_thresholds: sed_thresholds_path = os.path.join(workspace, 'opt_thresholds', '{}'.format(filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '{}_iterations.sed.test.pkl'.format(iteration)) sed_thresholds = pickle.load(open(sed_thresholds_path, 'rb')) else: sed_thresholds = { 'audio_tagging_threshold': 0.5, 'sed_high_threshold': 0.3, 'sed_low_threshold': 0.1, 'n_smooth': 10, 'n_salt': 10} # Load predictions output_dict = pickle.load(open(prediction_path, 'rb')) print('------ Audio tagging results ------') # Macro mAP mAP = metrics.average_precision_score(output_dict['target'], output_dict['clipwise_output'], average='macro') # Micro precision, recall, F1 (precision, recall, f1) = calculate_precision_recall_f1( output_dict['target'], output_dict['clipwise_output'], thresholds=at_thresholds) print('Macro mAP: {:.3f}'.format(mAP)) print('Micro precision: {:.3f}'.format(precision)) print('Micro recall: {:.3f}'.format(recall)) print('Micro F1: {:.3f}'.format(f1)) print('------ Sound event detection ------') predict_event_list = frame_prediction_to_event_prediction(output_dict, sed_thresholds) # Write predicted events to submission file write_submission(predict_event_list, tmp_submission_path) # SED with official tool results = official_evaluate(reference_csv_path, tmp_submission_path) sed_precision = get_metric(results, 'precision') sed_recall = get_metric(results, 'recall') sed_f1 = get_metric(results, 'f1') sed_er = get_metric(results, 'er') print('Micro precision: {:.3f}'.format(sed_precision)) print('Micro recall: {:.3f}'.format(sed_recall)) print('Micro F1: {:.3f}'.format(sed_f1)) print('Micro ER: {:.3f}'.format(sed_er))
def calculate_metrics(args): """Calculate metrics with optimized thresholds """ # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace holdout_fold = args.holdout_fold model_type = args.model_type freeze_base = args.freeze_base loss_type = args.loss_type augmentation = args.augmentation learning_rate = args.learning_rate batch_size = args.batch_size few_shots = args.few_shots random_seed = args.random_seed iteration = args.iteration filename = args.filename mini_data = False pretrain = False classes_num = config.classes_num # Paths if mini_data: prefix = 'minidata_' else: prefix = '' test_hdf5_path = os.path.join(workspace, 'features', 'testing.waveform.h5'.format(prefix)) evaluate_hdf5_path = os.path.join(workspace, 'features', 'evaluation.waveform.h5'.format(prefix)) test_reference_csv_path = os.path.join( dataset_dir, 'metadata', 'groundtruth_strong_label_testing_set.csv') evaluate_reference_csv_path = os.path.join( dataset_dir, 'metadata', 'groundtruth_strong_label_evaluation_set.csv') predictions_dir = os.path.join(workspace, 'predictions', '{}{}'.format( prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base), 'batch_size={}'.format(batch_size)) tmp_submission_path = os.path.join( workspace, '_tmp_submission', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base), '_submission.csv') post_processing_params_dir = os.path.join( workspace, 'post_processing_params', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base)) t1 = time.time() # Calculate metrics for AT if True: print('------ AT ------') # Load auto thresholds post_processing_params_path = os.path.join(post_processing_params_dir, 'at_f1.npy') auto_thresholds = cPickle.load(open(post_processing_params_path, 'rb')) average = 'micro' # ------ Test metrics ------ # Paths prediction_path = os.path.join( predictions_dir, '{}_iterations.prediction.test.h5'.format(iteration)) # Load ground truth weak target with h5py.File(test_hdf5_path, 'r') as hf: weak_target = hf['weak_target'][:].astype(np.float32) # Load prediction probability (clipwise_prediction, framewise_prediction) = load_prediction(prediction_path) # Macro mAP mAP = metrics.average_precision_score(weak_target, clipwise_prediction, average='macro') print('test macro mAP: {:.3f}'.format(mAP)) # Metrics without thresholds optimization manual_thres_f1 = calculate_f1(weak_target, clipwise_prediction, thresholds=[0.3] * classes_num, average=average) manual_thres_prec, manual_thres_recall = calculate_precision_recall( weak_target, clipwise_prediction, thresholds=[0.3] * classes_num, average=average) print('(no_opt_thres) test f1: {:.3f}, prec: {:.3f}, recall: {:.3f}'. format(manual_thres_f1, manual_thres_prec, manual_thres_recall)) # Metrics with thresholds optimization auto_thres_f1 = calculate_f1(weak_target, clipwise_prediction, thresholds=auto_thresholds, average=average) auto_thres_prec, auto_thres_recall = calculate_precision_recall( weak_target, clipwise_prediction, thresholds=auto_thresholds, average=average) print('(opt_thres) test f1: {:.3f}, prec: {:.3f}, recall: {:.3f}'. format(auto_thres_f1, auto_thres_prec, auto_thres_recall)) # ------ Evaluate metrics ------ # Paths prediction_path = os.path.join( predictions_dir, '{}_iterations.prediction.{}.h5'.format(iteration, 'evaluate')) # Load ground truth weak target with h5py.File(evaluate_hdf5_path, 'r') as hf: weak_target = hf['weak_target'][:].astype(np.float32) # Load prediction probability (clipwise_prediction, framewise_prediction) = load_prediction(prediction_path) # Macro mAP mAP = metrics.average_precision_score(weak_target, clipwise_prediction, average='macro') print('evaluate macro mAP: {:.3f}'.format(mAP)) # Metrics without thresholds optimization manual_thres_f1 = calculate_f1(weak_target, clipwise_prediction, thresholds=[0.3] * classes_num, average=average) manual_thres_prec, manual_thres_recall = calculate_precision_recall( weak_target, clipwise_prediction, thresholds=[0.3] * classes_num, average=average) print( '(no_opt_thres) evaluate f1: {:.3f}, prec: {:.3f}, recall: {:.3f}'. format(manual_thres_f1, manual_thres_prec, manual_thres_recall)) # Metrics with thresholds optimization auto_thres_f1 = calculate_f1(weak_target, clipwise_prediction, auto_thresholds, average=average) auto_thres_prec, auto_thres_recall = calculate_precision_recall( weak_target, clipwise_prediction, thresholds=auto_thresholds, average=average) print( '(opt_thres) evaluate f1: {:.3f}, prec: {:.3f}, recall: {:.3f}'. format(auto_thres_f1, auto_thres_prec, auto_thres_recall)) print() # Calculate metrics for SED if True: print('------ SED ------') # Initial thresholds for SED sed_params_dict = { 'audio_tagging_threshold': [0.3] * classes_num, 'sed_high_threshold': [0.3] * classes_num, 'sed_low_threshold': [0.05] * classes_num, 'n_smooth': [1] * classes_num, 'n_salt': [1] * classes_num } for metric_idx, metric_type in enumerate(['f1', 'er']): print('*** Metric type: {} ***'.format(metric_type)) # Load optimized thresholds post_processing_params_path = os.path.join( post_processing_params_dir, 'sed_{}.npy'.format(metric_type)) auto_sed_params_dict = cPickle.load( open(post_processing_params_path, 'rb')) # ------ Test ------ # Paths prediction_path = os.path.join( predictions_dir, '{}_iterations.prediction.test.h5'.format(iteration)) # Load ground truth strong target with h5py.File(test_hdf5_path, 'r') as hf: audio_name = [name.decode() for name in hf['audio_name'][:]] strong_target = hf['strong_target'][:].astype(np.float32) # Load prediction probability (clipwise_prediction, framewise_prediction) = load_prediction(prediction_path) output_dict = { 'audio_name': audio_name, 'clipwise_output': clipwise_prediction, 'framewise_output': framewise_prediction } # Macro framewise mAP if metric_idx == 0: mAP = metrics.average_precision_score( strong_target.reshape( (strong_target.shape[0] * strong_target.shape[1], strong_target.shape[2])), framewise_prediction.reshape( (framewise_prediction.shape[0] * framewise_prediction.shape[1], framewise_prediction.shape[2])), average='macro') print('test macro mAP: {:.3f}'.format(mAP)) # Eventwise prediction without thresholds optimization predict_event_list = frame_prediction_to_event_prediction( output_dict, sed_params_dict) write_submission(predict_event_list, tmp_submission_path) results = official_evaluate(test_reference_csv_path, tmp_submission_path) metric = _get_metric(results, metric_type) print('(no_opt_thres) test {}: {:.3f}'.format(metric_type, metric)) # Eventwise prediction with thresholds optimization predict_event_list = frame_prediction_to_event_prediction( output_dict, auto_sed_params_dict) write_submission(predict_event_list, tmp_submission_path) results = official_evaluate(test_reference_csv_path, tmp_submission_path) metric = _get_metric(results, metric_type) print('(opt_thres) test {}: {:.3f}'.format(metric_type, metric)) # ------ Evaluate ------ # Paths prediction_path = os.path.join( predictions_dir, '{}_iterations.prediction.evaluate.h5'.format(iteration)) # Load ground truth strong target with h5py.File(evaluate_hdf5_path, 'r') as hf: audio_name = [name.decode() for name in hf['audio_name'][:]] strong_target = hf['strong_target'][:].astype(np.float32) # Load prediction probability (clipwise_prediction, framewise_prediction) = load_prediction(prediction_path) output_dict = { 'audio_name': audio_name, 'clipwise_output': clipwise_prediction, 'framewise_output': framewise_prediction } # Macro framewise mAP if metric_idx == 0: mAP = metrics.average_precision_score( strong_target.reshape( (strong_target.shape[0] * strong_target.shape[1], strong_target.shape[2])), framewise_prediction.reshape( (framewise_prediction.shape[0] * framewise_prediction.shape[1], framewise_prediction.shape[2])), average='macro') print('evaluate mAP: {:.3f}'.format(mAP)) # Eventwise prediction without thresholds optimization predict_event_list = frame_prediction_to_event_prediction( output_dict, sed_params_dict) write_submission(predict_event_list, tmp_submission_path) results = official_evaluate(evaluate_reference_csv_path, tmp_submission_path) value = _get_metric(results, metric_type) print('(no_opt_thres) evaluate {}: {:.3f}'.format( metric_type, value)) # Metrics with thresholds optimization predict_event_list = frame_prediction_to_event_prediction( output_dict, auto_sed_params_dict) write_submission(predict_event_list, tmp_submission_path) results = official_evaluate(evaluate_reference_csv_path, tmp_submission_path) value = _get_metric(results, metric_type) print('(opt_thres) evaluate {}: {:.3f}'.format( metric_type, value)) print() print('time: {:.3f} s'.format(time.time() - t1))