예제 #1
0
def get_f_measure_by_class(outputs, nb_tags, threshold=None):
    TP = np.zeros(nb_tags)
    TN = np.zeros(nb_tags)
    FP = np.zeros(nb_tags)
    FN = np.zeros(nb_tags)

    binarization_type = 'global_threshold'
    probability_encoder = ProbabilityEncoder()
    threshold = 0.5 if not threshold else threshold
    for predictions, utt_targets in outputs:
        predictions = probability_encoder.binarization(predictions,
                                                       binarization_type=binarization_type,
                                                       threshold=threshold,
                                                       time_axis=0
                                                       )
        TP += (predictions + utt_targets == 2).sum(axis=0)
        FP += (predictions - utt_targets == 1).sum(axis=0)
        FN += (utt_targets - predictions == 1).sum(axis=0)
        TN += (predictions + utt_targets == 0).sum(axis=0)

    macro_f_measure = np.zeros(nb_tags)
    mask_f_score = 2*TP + FP + FN != 0
    macro_f_measure[mask_f_score] = 2 * \
        TP[mask_f_score] / (2*TP + FP + FN)[mask_f_score]

    return macro_f_measure
예제 #2
0
def evaluate_threshold(
        model_path: str, features: str = "features/logmel_64/test.ark",
        result_filename='dev.txt',
        test_labels:
        str = "metadata/test/test.csv",
        threshold=0.5,
        window=1,
        hop_size=0.02):
    from dcase_util.data import ProbabilityEncoder, DecisionEncoder, ManyHotEncoder
    from dcase_util.containers import MetaDataContainer
    from scipy.signal import medfilt
    modeldump = torch.load(
        model_path,
        map_location=lambda storage, loc: storage)
    model = modeldump['model']
    config_parameters = modeldump['config']
    scaler = modeldump['scaler']
    many_hot_encoder = modeldump['encoder']
    model_dirname = os.path.dirname(model_path)
    meta_container_resultfile = os.path.join(
        model_dirname, "pred_nowindow.txt")
    metacontainer = MetaDataContainer(filename=meta_container_resultfile)

    kaldi_string = parsecopyfeats(
        features, **config_parameters['feature_args'])
    model = model.to(device).eval()

    probability_encoder = ProbabilityEncoder()
    decision_encoder = DecisionEncoder(
        label_list=many_hot_encoder.label_list
    )
    binarization_type = 'global_threshold' if isinstance(
        threshold, float) else 'class_threshold'
    # If class thresholds are given, then use those
    if isinstance(threshold, str):
        threshold = torch.load(threshold)
    windows = {k: window for k in many_hot_encoder.label_list}
    if isinstance(window, str):
        windows = torch.load(window)

    with torch.no_grad():
        for k, feat in kaldi_io.read_mat_ark(kaldi_string):
            # Add batch dim
            feat = torch.from_numpy(
                scaler.transform(feat)).to(device).unsqueeze(0)
            feat = model(feat)
            probabilities = torch.sigmoid(feat).cpu().numpy().squeeze(0)
            frame_decisions = probability_encoder.binarization(
                probabilities=probabilities,
                binarization_type=binarization_type,
                threshold=threshold,
                time_axis=0,
            )
            for i, label in enumerate(many_hot_encoder.label_list):
                label_frame_decisions = medfilt(
                    frame_decisions[:, i], kernel_size=windows[label])
                # Found only zeros, no activity, go on
                if (label_frame_decisions == 0).all():
                    continue
                estimated_events = decision_encoder.find_contiguous_regions(
                    activity_array=label_frame_decisions
                )
                for [onset, offset] in estimated_events:
                    metacontainer.append({'event_label': label,
                                          'onset': onset * hop_size,
                                          'offset': offset * hop_size,
                                          'filename': os.path.basename(k)
                                          })
    metacontainer.save()
    estimated_event_list = MetaDataContainer().load(
        filename=meta_container_resultfile)
    reference_event_list = MetaDataContainer().load(filename=test_labels)

    event_based_metric = event_based_evaluation(
        reference_event_list, estimated_event_list)
    onset_scores = precision_recall_fscore_on_offset(
        reference_event_list, estimated_event_list, offset=False)
    offset_scores = precision_recall_fscore_on_offset(
        reference_event_list, estimated_event_list, onset=False)
    onset_offset_scores = precision_recall_fscore_on_offset(
        reference_event_list, estimated_event_list)
    # Utt wise Accuracy
    precision_labels = precision_recall_fscore_on_offset(
        reference_event_list, estimated_event_list, onset=False, offset=False, label=True)

    print(event_based_metric.__str__())
    print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("UttLabel", *precision_labels))
    print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("Onset", *onset_scores))
    print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("Offset", *offset_scores))
    print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("On-Offset", *onset_offset_scores))

    result_filename = os.path.join(model_dirname, result_filename)

    with open(result_filename, 'w') as wp:
        wp.write(event_based_metric.__str__())
        wp.write('\n')
        wp.write("{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format(
            "UttLabel", *precision_labels))
        wp.write(
            "{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format("Onset", *onset_scores))
        wp.write(
            "{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format("Offset", *offset_scores))
        wp.write("{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format(
            "On-Offset", *onset_offset_scores))