def evaluate_classification(groundtruth, predictions):
    '''
    Evaluation for the **classification mode**
      i.e. all classes in the groundtruth are 'counted'

    Parameters:
      groundtruth: numpy array-like  N (N samples - int label specifying the class)
      predictions: numpy array-like  N x K

    Returns:
      dictionary with several evaluation metrics
    '''
    measures = {}
    confmat = pmetrics.softmax_to_confmat(groundtruth, predictions)
    measures['accuracy'] = pmetrics.accuracy(confmat)
    measures['BER'] = pmetrics.balanced_error_rate(confmat)
    return measures
Ejemplo n.º 2
0
def evaluate_classification(groundtruth, predictions):
    '''
    Evaluation for the **classification mode**
      i.e. all classes in the groundtruth are 'counted'

    Parameters:
      groundtruth: numpy array-like  N (N samples - int label specifying the class)
      predictions: numpy array-like  N x K

    Returns:
      dictionary with several evaluation metrics
    '''
    measures = {}
    confmat = pmetrics.softmax_to_confmat(groundtruth, predictions)
    measures['accuracy'] = pmetrics.accuracy(confmat)
    measures['BER'] = pmetrics.balanced_error_rate(confmat)
    return measures
def evaluate_retrieval(groundtruth, predictions):
    '''
    Evaluation for the **retrieval viewpoint**
      i.e. the first class (idx 0) is treated as N/A (or ?) class
      For binary (?, yes, no)-type attributes this computes the AP

    Parameters:
      groundtruth: numpy array-like  N (N samples - int label specifying the class)
      predictions: numpy array-like  N x K

    Returns:
      dictionary with several evaluation metrics
    '''
    measures = {}
    confmat = pmetrics.softmax_to_confmat(groundtruth, predictions)

    # check if there were predictions for the first class
    # which is not supposed to happen in retrieval mode
    if np.any(confmat[:, 0] > 0):
        raise ValueError(
            'retrieval mode - but there were predictions for N/A class')

    # this compute the two class accuracy - it does not reflect performance on the
    # exmaples with N/A groundtruth label
    measures['accuracy'] = pmetrics.accuracy(confmat, ignore_na=True)

    if predictions.shape[1] == 3:
        # predictions need to be converted to a continous score before
        pred_scores = pmetrics.softmax_prediction_to_binary(predictions)
        # convert groundtruth to [-1, 1] -- groundtruth input is [0,1,...N]
        gt_bin = pmetrics.labels_to_binary(groundtruth)

        measures['AP'] = pmetrics.average_precision_everingham(
            gt_bin, pred_scores)
    else:
        # non-binary case, for example Orientation, Orientation8, etc.
        # compute specific measures here... if you want.
        pass
    return measures
Ejemplo n.º 4
0
def evaluate_retrieval(groundtruth, predictions):
    '''
    Evaluation for the **retrieval viewpoint**
      i.e. the first class (idx 0) is treated as N/A (or ?) class
      For binary (?, yes, no)-type attributes this computes the AP

    Parameters:
      groundtruth: numpy array-like  N (N samples - int label specifying the class)
      predictions: numpy array-like  N x K

    Returns:
      dictionary with several evaluation metrics
    '''
    measures = {}
    confmat = pmetrics.softmax_to_confmat(groundtruth, predictions)

    # check if there were predictions for the first class
    # which is not supposed to happen in retrieval mode
    if np.any(confmat[:,0] > 0):
        raise ValueError('retrieval mode - but there were predictions for N/A class')

    # this compute the two class accuracy - it does not reflect performance on the
    # exmaples with N/A groundtruth label
    measures['accuracy'] = pmetrics.accuracy(confmat, ignore_na=True)

    if predictions.shape[1] == 3:
        # predictions need to be converted to a continous score before
        pred_scores = pmetrics.softmax_prediction_to_binary(predictions)
        # convert groundtruth to [-1, 1] -- groundtruth input is [0,1,...N]
        gt_bin = pmetrics.labels_to_binary(groundtruth)

        measures['AP'] = pmetrics.average_precision_everingham(gt_bin, pred_scores)
    else:
        # non-binary case, for example Orientation, Orientation8, etc.
        # compute specific measures here... if you want.
        pass
    return measures