Ejemplo n.º 1
0
def pac_metric(solution, prediction, task=BINARY_CLASSIFICATION):
    """
    Probabilistic Accuracy based on log_loss metric.

    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.
    :param solution:
    :param prediction:
    :param task:
    :return:
    """
    debug_flag = False
    [sample_num, label_num] = solution.shape
    if label_num == 1:
        task = BINARY_CLASSIFICATION
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)
    pos_num = 1. * sum(solution)  # float conversion!
    frac_pos = pos_num / sample_num  # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num):
            base_prediction[k, :] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)
        diff = np.array(abs(the_base_log_loss - base_log_loss))
        if len(diff.shape) > 0:
            diff = max(diff)
        if (diff) > 1e-10:
            print('Arrggh {} != {}'.format(the_base_log_loss, base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp
    # because it is an NL operation
    pac = np.mean(np.exp(-the_log_loss))
    base_pac = np.mean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score
def pac_metric(solution, prediction, task=BINARY_CLASSIFICATION):
    """
    Probabilistic Accuracy based on log_loss metric.

    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.
    :param solution:
    :param prediction:
    :param task:
    :return:
    """
    debug_flag = False
    [sample_num, label_num] = solution.shape
    if label_num == 1:
        task = BINARY_CLASSIFICATION
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)
    pos_num = 1. * sum(solution)  # float conversion!
    frac_pos = pos_num / sample_num  # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num):
            base_prediction[k, :] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)
        diff = np.array(abs(the_base_log_loss - base_log_loss))
        if len(diff.shape) > 0:
            diff = max(diff)
        if (diff) > 1e-10:
            print('Arrggh {} != {}'.format(the_base_log_loss, base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp
    # because it is an NL operation
    pac = mv_mean(np.exp(-the_log_loss))
    base_pac = mv_mean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score
Ejemplo n.º 3
0
def pac_metric(solution, prediction, task=BINARY_CLASSIFICATION):
    """
    Probabilistic Accuracy based on log_loss metric.

    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.
    :param solution:
    :param prediction:
    :param task:
    :return:
    """
    if task == BINARY_CLASSIFICATION:
        if len(solution.shape) == 1:
            # Solution won't be touched - no copy
            solution = solution.reshape((-1, 1))
        elif len(solution.shape) == 2:
            if solution.shape[1] > 1:
                raise ValueError('Solution array must only contain one class '
                                 'label, but contains %d' % solution.shape[1])
        else:
            raise ValueError('Solution.shape %s' % solution.shape)
        solution = solution.copy()

        if len(prediction.shape) == 2:
            if prediction.shape[1] > 2:
                raise ValueError('A prediction array with probability values '
                                 'for %d classes is not a binary '
                                 'classification problem' %
                                 prediction.shape[1])
            # Prediction will be copied into a new binary array - no copy
            prediction = prediction[:, 1].reshape((-1, 1))
        else:
            raise ValueError('Invalid prediction shape %s' % prediction.shape)

    elif task == MULTICLASS_CLASSIFICATION:
        if len(solution.shape) == 1:
            solution = create_multiclass_solution(solution, prediction)
        elif len(solution.shape) == 2:
            if solution.shape[1] > 1:
                raise ValueError('Solution array must only contain one class '
                                 'label, but contains %d' % solution.shape[1])
            else:
                solution = create_multiclass_solution(
                    solution.reshape((-1, 1)), prediction)
        else:
            raise ValueError('Solution.shape %s' % solution.shape)
    elif task == MULTILABEL_CLASSIFICATION:
        solution = solution.copy()
    else:
        raise NotImplementedError('auc_metric does not support task type %s' %
                                  task)
    solution, prediction = normalize_array(solution, prediction.copy())

    [sample_num, label_num] = solution.shape
    if label_num == 1:
        task = BINARY_CLASSIFICATION
    eps = 1e-7
    # Compute the base log loss (using the prior probabilities)
    pos_num = 1. * np.sum(solution, axis=0, dtype=float)  # float conversion!
    frac_pos = pos_num / sample_num  # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    the_log_loss = log_loss(solution, prediction, task)

    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp
    # because it is an NL operation
    pac = np.mean(np.exp(-the_log_loss))
    base_pac = np.mean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score
def pac_metric(solution, prediction, task=BINARY_CLASSIFICATION):
    """
    Probabilistic Accuracy based on log_loss metric.

    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.
    :param solution:
    :param prediction:
    :param task:
    :return:
    """
    if task == BINARY_CLASSIFICATION:
        if len(solution.shape) == 1:
            # Solution won't be touched - no copy
            solution = solution.reshape((-1, 1))
        elif len(solution.shape) == 2:
            if solution.shape[1] > 1:
                raise ValueError('Solution array must only contain one class '
                                 'label, but contains %d' % solution.shape[1])
            else:
                solution = solution[:, 1]
        else:
            raise ValueError('Solution.shape %s' % solution.shape)
        solution = solution.copy()

        if len(prediction.shape) == 2:
            if prediction.shape[1] > 2:
                raise ValueError('A prediction array with probability values '
                                 'for %d classes is not a binary '
                                 'classification problem' % prediction.shape[1])
            # Prediction will be copied into a new binary array - no copy
            prediction = prediction[:, 1].reshape((-1, 1))
        else:
            raise ValueError('Invalid prediction shape %s' % prediction.shape)

    elif task == MULTICLASS_CLASSIFICATION:
        if len(solution.shape) == 1:
            solution = create_multiclass_solution(solution, prediction)
        elif len(solution.shape) == 2:
            if solution.shape[1] > 1:
                raise ValueError('Solution array must only contain one class '
                                 'label, but contains %d' % solution.shape[1])
            else:
                solution = create_multiclass_solution(solution.reshape((-1, 1)),
                                                      prediction)
        else:
            raise ValueError('Solution.shape %s' % solution.shape)
    elif task == MULTILABEL_CLASSIFICATION:
        solution = solution.copy()
    else:
        raise NotImplementedError('auc_metric does not support task type %s'
                                  % task)
    solution, prediction = normalize_array(solution, prediction.copy())

    [sample_num, label_num] = solution.shape
    if label_num == 1:
        task = BINARY_CLASSIFICATION
    eps = 1e-7
    # Compute the base log loss (using the prior probabilities)
    pos_num = 1. * np.sum(solution, axis=0, dtype=float)  # float conversion!
    frac_pos = pos_num / sample_num  # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    the_log_loss = log_loss(solution, prediction, task)

    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp
    # because it is an NL operation
    pac = np.mean(np.exp(-the_log_loss))
    base_pac = np.mean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score