def pac_metric(solution, prediction, task=BINARY_CLASSIFICATION): """ Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array. :param solution: :param prediction: :param task: :return: """ debug_flag = False [sample_num, label_num] = solution.shape if label_num == 1: task = BINARY_CLASSIFICATION eps = 1e-15 the_log_loss = log_loss(solution, prediction, task) # Compute the base log loss (using the prior probabilities) pos_num = 1. * sum(solution) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) # Alternative computation of the same thing (slower) # Should always return the same thing except in the multi-label case # For which the analytic solution makes more sense if debug_flag: base_prediction = np.empty(prediction.shape) for k in range(sample_num): base_prediction[k, :] = frac_pos base_log_loss = log_loss(solution, base_prediction, task) diff = np.array(abs(the_base_log_loss - base_log_loss)) if len(diff.shape) > 0: diff = max(diff) if (diff) > 1e-10: print('Arrggh {} != {}'.format(the_base_log_loss, base_log_loss)) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = np.mean(np.exp(-the_log_loss)) base_pac = np.mean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score
def pac_metric(solution, prediction, task=BINARY_CLASSIFICATION): """ Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array. :param solution: :param prediction: :param task: :return: """ debug_flag = False [sample_num, label_num] = solution.shape if label_num == 1: task = BINARY_CLASSIFICATION eps = 1e-15 the_log_loss = log_loss(solution, prediction, task) # Compute the base log loss (using the prior probabilities) pos_num = 1. * sum(solution) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) # Alternative computation of the same thing (slower) # Should always return the same thing except in the multi-label case # For which the analytic solution makes more sense if debug_flag: base_prediction = np.empty(prediction.shape) for k in range(sample_num): base_prediction[k, :] = frac_pos base_log_loss = log_loss(solution, base_prediction, task) diff = np.array(abs(the_base_log_loss - base_log_loss)) if len(diff.shape) > 0: diff = max(diff) if (diff) > 1e-10: print('Arrggh {} != {}'.format(the_base_log_loss, base_log_loss)) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = mv_mean(np.exp(-the_log_loss)) base_pac = mv_mean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score
def pac_metric(solution, prediction, task=BINARY_CLASSIFICATION): """ Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array. :param solution: :param prediction: :param task: :return: """ if task == BINARY_CLASSIFICATION: if len(solution.shape) == 1: # Solution won't be touched - no copy solution = solution.reshape((-1, 1)) elif len(solution.shape) == 2: if solution.shape[1] > 1: raise ValueError('Solution array must only contain one class ' 'label, but contains %d' % solution.shape[1]) else: raise ValueError('Solution.shape %s' % solution.shape) solution = solution.copy() if len(prediction.shape) == 2: if prediction.shape[1] > 2: raise ValueError('A prediction array with probability values ' 'for %d classes is not a binary ' 'classification problem' % prediction.shape[1]) # Prediction will be copied into a new binary array - no copy prediction = prediction[:, 1].reshape((-1, 1)) else: raise ValueError('Invalid prediction shape %s' % prediction.shape) elif task == MULTICLASS_CLASSIFICATION: if len(solution.shape) == 1: solution = create_multiclass_solution(solution, prediction) elif len(solution.shape) == 2: if solution.shape[1] > 1: raise ValueError('Solution array must only contain one class ' 'label, but contains %d' % solution.shape[1]) else: solution = create_multiclass_solution( solution.reshape((-1, 1)), prediction) else: raise ValueError('Solution.shape %s' % solution.shape) elif task == MULTILABEL_CLASSIFICATION: solution = solution.copy() else: raise NotImplementedError('auc_metric does not support task type %s' % task) solution, prediction = normalize_array(solution, prediction.copy()) [sample_num, label_num] = solution.shape if label_num == 1: task = BINARY_CLASSIFICATION eps = 1e-7 # Compute the base log loss (using the prior probabilities) pos_num = 1. * np.sum(solution, axis=0, dtype=float) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) the_log_loss = log_loss(solution, prediction, task) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = np.mean(np.exp(-the_log_loss)) base_pac = np.mean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score
def pac_metric(solution, prediction, task=BINARY_CLASSIFICATION): """ Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array. :param solution: :param prediction: :param task: :return: """ if task == BINARY_CLASSIFICATION: if len(solution.shape) == 1: # Solution won't be touched - no copy solution = solution.reshape((-1, 1)) elif len(solution.shape) == 2: if solution.shape[1] > 1: raise ValueError('Solution array must only contain one class ' 'label, but contains %d' % solution.shape[1]) else: solution = solution[:, 1] else: raise ValueError('Solution.shape %s' % solution.shape) solution = solution.copy() if len(prediction.shape) == 2: if prediction.shape[1] > 2: raise ValueError('A prediction array with probability values ' 'for %d classes is not a binary ' 'classification problem' % prediction.shape[1]) # Prediction will be copied into a new binary array - no copy prediction = prediction[:, 1].reshape((-1, 1)) else: raise ValueError('Invalid prediction shape %s' % prediction.shape) elif task == MULTICLASS_CLASSIFICATION: if len(solution.shape) == 1: solution = create_multiclass_solution(solution, prediction) elif len(solution.shape) == 2: if solution.shape[1] > 1: raise ValueError('Solution array must only contain one class ' 'label, but contains %d' % solution.shape[1]) else: solution = create_multiclass_solution(solution.reshape((-1, 1)), prediction) else: raise ValueError('Solution.shape %s' % solution.shape) elif task == MULTILABEL_CLASSIFICATION: solution = solution.copy() else: raise NotImplementedError('auc_metric does not support task type %s' % task) solution, prediction = normalize_array(solution, prediction.copy()) [sample_num, label_num] = solution.shape if label_num == 1: task = BINARY_CLASSIFICATION eps = 1e-7 # Compute the base log loss (using the prior probabilities) pos_num = 1. * np.sum(solution, axis=0, dtype=float) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) the_log_loss = log_loss(solution, prediction, task) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = np.mean(np.exp(-the_log_loss)) base_pac = np.mean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score