def get_f_measure_by_class(outputs, nb_tags, threshold=None): TP = np.zeros(nb_tags) TN = np.zeros(nb_tags) FP = np.zeros(nb_tags) FN = np.zeros(nb_tags) binarization_type = 'global_threshold' probability_encoder = ProbabilityEncoder() threshold = 0.5 if not threshold else threshold for predictions, utt_targets in outputs: predictions = probability_encoder.binarization(predictions, binarization_type=binarization_type, threshold=threshold, time_axis=0 ) TP += (predictions + utt_targets == 2).sum(axis=0) FP += (predictions - utt_targets == 1).sum(axis=0) FN += (utt_targets - predictions == 1).sum(axis=0) TN += (predictions + utt_targets == 0).sum(axis=0) macro_f_measure = np.zeros(nb_tags) mask_f_score = 2*TP + FP + FN != 0 macro_f_measure[mask_f_score] = 2 * \ TP[mask_f_score] / (2*TP + FP + FN)[mask_f_score] return macro_f_measure
def evaluate_threshold( model_path: str, features: str = "features/logmel_64/test.ark", result_filename='dev.txt', test_labels: str = "metadata/test/test.csv", threshold=0.5, window=1, hop_size=0.02): from dcase_util.data import ProbabilityEncoder, DecisionEncoder, ManyHotEncoder from dcase_util.containers import MetaDataContainer from scipy.signal import medfilt modeldump = torch.load( model_path, map_location=lambda storage, loc: storage) model = modeldump['model'] config_parameters = modeldump['config'] scaler = modeldump['scaler'] many_hot_encoder = modeldump['encoder'] model_dirname = os.path.dirname(model_path) meta_container_resultfile = os.path.join( model_dirname, "pred_nowindow.txt") metacontainer = MetaDataContainer(filename=meta_container_resultfile) kaldi_string = parsecopyfeats( features, **config_parameters['feature_args']) model = model.to(device).eval() probability_encoder = ProbabilityEncoder() decision_encoder = DecisionEncoder( label_list=many_hot_encoder.label_list ) binarization_type = 'global_threshold' if isinstance( threshold, float) else 'class_threshold' # If class thresholds are given, then use those if isinstance(threshold, str): threshold = torch.load(threshold) windows = {k: window for k in many_hot_encoder.label_list} if isinstance(window, str): windows = torch.load(window) with torch.no_grad(): for k, feat in kaldi_io.read_mat_ark(kaldi_string): # Add batch dim feat = torch.from_numpy( scaler.transform(feat)).to(device).unsqueeze(0) feat = model(feat) probabilities = torch.sigmoid(feat).cpu().numpy().squeeze(0) frame_decisions = probability_encoder.binarization( probabilities=probabilities, binarization_type=binarization_type, threshold=threshold, time_axis=0, ) for i, label in enumerate(many_hot_encoder.label_list): label_frame_decisions = medfilt( frame_decisions[:, i], kernel_size=windows[label]) # Found only zeros, no activity, go on if (label_frame_decisions == 0).all(): continue estimated_events = decision_encoder.find_contiguous_regions( activity_array=label_frame_decisions ) for [onset, offset] in estimated_events: metacontainer.append({'event_label': label, 'onset': onset * hop_size, 'offset': offset * hop_size, 'filename': os.path.basename(k) }) metacontainer.save() estimated_event_list = MetaDataContainer().load( filename=meta_container_resultfile) reference_event_list = MetaDataContainer().load(filename=test_labels) event_based_metric = event_based_evaluation( reference_event_list, estimated_event_list) onset_scores = precision_recall_fscore_on_offset( reference_event_list, estimated_event_list, offset=False) offset_scores = precision_recall_fscore_on_offset( reference_event_list, estimated_event_list, onset=False) onset_offset_scores = precision_recall_fscore_on_offset( reference_event_list, estimated_event_list) # Utt wise Accuracy precision_labels = precision_recall_fscore_on_offset( reference_event_list, estimated_event_list, onset=False, offset=False, label=True) print(event_based_metric.__str__()) print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("UttLabel", *precision_labels)) print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("Onset", *onset_scores)) print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("Offset", *offset_scores)) print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("On-Offset", *onset_offset_scores)) result_filename = os.path.join(model_dirname, result_filename) with open(result_filename, 'w') as wp: wp.write(event_based_metric.__str__()) wp.write('\n') wp.write("{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format( "UttLabel", *precision_labels)) wp.write( "{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format("Onset", *onset_scores)) wp.write( "{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format("Offset", *offset_scores)) wp.write("{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format( "On-Offset", *onset_offset_scores))