def evaluate_classification(groundtruth, predictions): ''' Evaluation for the **classification mode** i.e. all classes in the groundtruth are 'counted' Parameters: groundtruth: numpy array-like N (N samples - int label specifying the class) predictions: numpy array-like N x K Returns: dictionary with several evaluation metrics ''' measures = {} confmat = pmetrics.softmax_to_confmat(groundtruth, predictions) measures['accuracy'] = pmetrics.accuracy(confmat) measures['BER'] = pmetrics.balanced_error_rate(confmat) return measures
def evaluate_retrieval(groundtruth, predictions): ''' Evaluation for the **retrieval viewpoint** i.e. the first class (idx 0) is treated as N/A (or ?) class For binary (?, yes, no)-type attributes this computes the AP Parameters: groundtruth: numpy array-like N (N samples - int label specifying the class) predictions: numpy array-like N x K Returns: dictionary with several evaluation metrics ''' measures = {} confmat = pmetrics.softmax_to_confmat(groundtruth, predictions) # check if there were predictions for the first class # which is not supposed to happen in retrieval mode if np.any(confmat[:, 0] > 0): raise ValueError( 'retrieval mode - but there were predictions for N/A class') # this compute the two class accuracy - it does not reflect performance on the # exmaples with N/A groundtruth label measures['accuracy'] = pmetrics.accuracy(confmat, ignore_na=True) if predictions.shape[1] == 3: # predictions need to be converted to a continous score before pred_scores = pmetrics.softmax_prediction_to_binary(predictions) # convert groundtruth to [-1, 1] -- groundtruth input is [0,1,...N] gt_bin = pmetrics.labels_to_binary(groundtruth) measures['AP'] = pmetrics.average_precision_everingham( gt_bin, pred_scores) else: # non-binary case, for example Orientation, Orientation8, etc. # compute specific measures here... if you want. pass return measures
def evaluate_retrieval(groundtruth, predictions): ''' Evaluation for the **retrieval viewpoint** i.e. the first class (idx 0) is treated as N/A (or ?) class For binary (?, yes, no)-type attributes this computes the AP Parameters: groundtruth: numpy array-like N (N samples - int label specifying the class) predictions: numpy array-like N x K Returns: dictionary with several evaluation metrics ''' measures = {} confmat = pmetrics.softmax_to_confmat(groundtruth, predictions) # check if there were predictions for the first class # which is not supposed to happen in retrieval mode if np.any(confmat[:,0] > 0): raise ValueError('retrieval mode - but there were predictions for N/A class') # this compute the two class accuracy - it does not reflect performance on the # exmaples with N/A groundtruth label measures['accuracy'] = pmetrics.accuracy(confmat, ignore_na=True) if predictions.shape[1] == 3: # predictions need to be converted to a continous score before pred_scores = pmetrics.softmax_prediction_to_binary(predictions) # convert groundtruth to [-1, 1] -- groundtruth input is [0,1,...N] gt_bin = pmetrics.labels_to_binary(groundtruth) measures['AP'] = pmetrics.average_precision_everingham(gt_bin, pred_scores) else: # non-binary case, for example Orientation, Orientation8, etc. # compute specific measures here... if you want. pass return measures