コード例 #1
0
def plot_predicted_probabilities(labels: np.ndarray,
                                 probability_predictions: np.ndarray,
                                 colors: Optional[Sequence[str]] = ('b', 'g'),
                                 print_stats: bool = True,
                                 fig_width: Optional[int] = 20,
                                 fig_height: Optional[int] = 15) -> axes.Axes:
    """Plots the distributions of predicted probabilities for each class.

  Args:
    labels: An array of true binary labels represented by 1.0 and 0.0.
    probability_predictions: An array of predicted probabilities between 0.0 and
      1.0.
    colors: Colors for the probability plots.
    print_stats: Flag that whether the stats of probabilities are plotted
    fig_width: Width of the figure.
    fig_height: Height of the figure.

  Returns:
    plots: Class density plots of the predicted probabilities.
  """
    utils.assert_label_values_are_valid(labels)
    utils.assert_prediction_values_are_valid(probability_predictions)
    utils.assert_label_and_prediction_length_match(labels,
                                                   probability_predictions)
    assert len(np.unique(labels)) == len(colors),\
           'number of colors should be the same as number of unique labels.'

    unique_labels = np.sort(np.unique(labels))

    _, plots = pyplot.subplots(figsize=(fig_width, fig_height))
    for color, label in zip(colors, unique_labels):
        index_plot = np.where(labels == label)[0]
        preds_plot = probability_predictions[index_plot]
        label_plot = 'class[%s]' % (str(label))
        if print_stats:
            label_plot += ': mean=%.4f, std=%.4f, median=%.4f' % (
                np.mean(preds_plot), np.std(preds_plot), np.median(preds_plot))
        sns.kdeplot(preds_plot,
                    shade=True,
                    color=color,
                    label=label_plot,
                    ax=plots)

    pyplot.title('Distribution of predicted probabilities')
    pyplot.legend()
    pyplot.xlabel('Probability')
    pyplot.ylabel('Density')
    pyplot.xlim([0, 1])

    return plots
コード例 #2
0
def plot_precision_recall_curve(
        labels: np.ndarray,
        probability_predictions: np.ndarray,
        print_stats: bool = True,
        fig_width: Optional[int] = 8,
        fig_height: Optional[int] = 8,
        curve_color: Optional[str] = 'blue') -> axes.Axes:
    """Plots the Precision-Recall curve for the predictions.

  Args:
    labels: An array of true binary labels represented by 1.0 and 0.0.
    probability_predictions: An array of predicted probabilities between 0.0 and
      1.0.
    print_stats: Flag that whether the Average Precision is plotted.
    fig_width: Width of the figure.
    fig_height: Height of the figure.
    curve_color: Color of the Precision-Recall curve.

  Returns:
      plots: Class density plots of the ROC curve.
  """
    utils.assert_label_values_are_valid(labels)
    utils.assert_prediction_values_are_valid(probability_predictions)
    utils.assert_label_and_prediction_length_match(labels,
                                                   probability_predictions)

    precision, recall, _ = sklearn.metrics.precision_recall_curve(
        labels, probability_predictions)
    _, plots = pyplot.subplots(figsize=(fig_width, fig_height))
    pyplot.plot(recall,
                precision,
                marker='.',
                label='Precision-Recall',
                color=curve_color)

    pyplot.xlabel('Recall')
    pyplot.ylabel('Precision')

    pyplot.legend()
    if print_stats:
        pyplot.title('Average Precision=%.4f' %
                     sklearn.metrics.average_precision_score(
                         labels, probability_predictions))

    return plots
コード例 #3
0
def calc_chisquared_sample_sizes_for_bins(
    labels: np.ndarray,
    probability_predictions: np.ndarray,
    number_bins: int = 3,
    uplift_percentages: Sequence[np.float64] = (10, 20),
    power_percentages: Sequence[np.float64] = (80, 90),
    confidence_level_percentages: Sequence[np.float64] = (90, 95)
) -> pd.DataFrame:
    """Calculates statistical sample sizes for the bins defined on predictions.

  These sample sizes for the bins defined on the predicted probabilities are
    estimated using the Chi-squared test of proportions for each combination
    of uplift_percentage, power_percentage and confidence_level_percentage.
    These sizes could be used as the minimum required size for each Test or
    Control group when designing an experiment to target users from each of
    these bins of predictions.

  Args:
    labels: An array of true binary labels represented by 1.0 and 0.0.
    probability_predictions: An array of predicted probabilities between 0.0 and
      1.0.
    number_bins: Number of bins that we want to divide the ranked predictions
      into. Default is 3 bins such that the 1st bin contains the
      highest 1/3rd of the predictions (High Propensity group), the 2nd bin
      contains the next 1/3rd of the predictions (Medium Propensity group) and
      the last bin contains the lowest 1/3rd of the predictions (Lowest
      Propensity group).
    uplift_percentages: Sequence of different expected uplift percentages.
    power_percentages: Sequence of different statistical powers for the test.
    confidence_level_percentages: Sequence of different statistical confidence
      levels for the test.

  Returns:
    bin_metrics: Following metrics calculated for each bin of the predictions.
     bin_number: Bin number starting from 1.
     bin_size: Total numbers of instances in the bin.
     min_probability: Minimum predicted probability within the bin.
     conversion_rate: Proportion of positive instances out of all the instances
       in the bin (precision).
     expected_uplift: Expected uplift_percentage.
     power_percentage: Statistical power of the test.
     confidence_level_percentage: Statistical confidence level of the test.
     required_sample_size: Statistical sample size required.
  """
    utils.assert_label_values_are_valid(labels)
    utils.assert_prediction_values_are_valid(probability_predictions)
    utils.assert_label_and_prediction_length_match(labels,
                                                   probability_predictions)

    # separate the probability_predictions into bins of equal size
    binned_data = pd.DataFrame(list(zip(labels, probability_predictions)),
                               columns=['label', 'prediction'])
    binned_data = binned_data.sort_values('prediction').reset_index()
    # to avoid duplicate edges of bins use the index in the qcat function below
    binned_data['bin_number'] = pd.qcut(binned_data.index,
                                        q=number_bins,
                                        labels=False)

    # calculate the conversion rate for each bin
    total_instances = (binned_data[['bin_number',
                                    'label']].groupby('bin_number').count())
    total_instances.columns = ['bin_size']
    total_instances = total_instances.reset_index()
    positive_instances = (binned_data.loc[binned_data['label'] > 0][[
        'bin_number', 'label'
    ]].groupby('bin_number').count())
    positive_instances.columns = ['positive_instances']
    positive_instances = positive_instances.reset_index()

    bin_conv_rate = pd.merge(total_instances,
                             positive_instances,
                             on='bin_number',
                             how='left')
    bin_conv_rate.fillna(0, inplace=True)
    bin_conv_rate['conversion_rate'] = round(
        (bin_conv_rate['positive_instances'] / bin_conv_rate['bin_size'] *
         100), 2)

    bin_metrics_list = list()
    for bin_number in bin_conv_rate['bin_number']:
        conv_rate = bin_conv_rate['conversion_rate'][bin_number]
        bin_size = bin_conv_rate['bin_size'][bin_number]
        min_prob = min(
            binned_data[binned_data['bin_number'] == bin_number]['prediction'])
        for uplift_percentage in uplift_percentages:
            for power_percentage in power_percentages:
                for confidence_level_percentage in confidence_level_percentages:
                    sample_size = calc_chisquared_sample_size(
                        conv_rate, uplift_percentage, power_percentage,
                        confidence_level_percentage)
                    bin_metrics_list.append(
                        (bin_number, bin_size, min_prob, conv_rate,
                         uplift_percentage, power_percentage,
                         confidence_level_percentage, sample_size))

    bin_metrics = pd.DataFrame(bin_metrics_list,
                               columns=[
                                   'bin_number', 'bin_size', 'min_probability',
                                   'conv_rate_percentage', 'uplift_percentage',
                                   'power_percentage',
                                   'confidence_level_percentage',
                                   'required_sample_size'
                               ])

    # reverse the order of bin numbers such that bin 1 has the highest
    # predicted probability
    bin_metrics['bin_number'] = number_bins - bin_metrics['bin_number']
    bin_metrics = bin_metrics.sort_values(['bin_number'
                                           ]).reset_index(drop=True)

    return bin_metrics
コード例 #4
0
def calc_chisquared_sample_sizes_for_cumulative_bins(
    labels: np.ndarray,
    probability_predictions: np.ndarray,
    number_bins: int = 10,
    uplift_percentages: Sequence[np.float64] = (10, 20),
    power_percentages: Sequence[np.float64] = (80, 90),
    confidence_level_percentages: Sequence[np.float64] = (90, 95)
) -> pd.DataFrame:
    """Calculates statistical sample sizes for the cumulative bins of predictions.

  These sample sizes for the cumulative bins of predicted probabilities are
  estimated using the Chi-squared test of proportions for each combination of
  uplift_percentage, power_percentage and confidence_level_percentage. These
  sizes could be used as the minimum required sizes for each Test or Control
  group when designing an experiment to target users having the top X% of
  predicted probabilities.

  Args:
    labels: An array of true binary labels represented by 1.0 and 0.0.
    probability_predictions: An array of predicted probabilities between 0.0 and
      1.0.
    number_bins: Number of cumulative bins that we want to divide the ranked
      predictions into. Default is deciles (10 bins) such that the 1st bin
      contains the highest 10% of the predictions, the 2nd bin contains the
      highest 20% of the predictions and so on.
    uplift_percentages: Sequence of different expected uplift percentages.
    power_percentages: Sequence of different statistical powers for the test.
    confidence_level_percentages: Sequence of different statistical confidence
      levels for the test.

  Returns:
    bin_metrics: Following metrics calculated for each cumulative bin.
      cumulative_bin_number: Bin number starting from 1.
      bin_size: Total numbers of instances in the bin.
      bin_size_percentage: Percentage of instances in the bin out of all the
        instances in the labels.
      min_probability: Minimum predicted probability within the bin.
      conversion_rate: Proportion of positive instances out of all the instances
        in the bin (precision).
      expected_uplift: Expected uplift_percentage.
      power_percentage: Statistical power of the test.
      confidence_level_percentage: Statistical confidence level of the test.
      required_sample_size: Statistical sample size required.
  """
    utils.assert_label_values_are_valid(labels)
    utils.assert_prediction_values_are_valid(probability_predictions)
    utils.assert_label_and_prediction_length_match(labels,
                                                   probability_predictions)

    # Separate the probability_predictions into bins.
    label_predictions = pd.DataFrame(list(zip(labels,
                                              probability_predictions)),
                                     columns=['label', 'prediction'])
    label_predictions = label_predictions.sort_values(by='prediction',
                                                      ascending=False)
    number_total_instances = label_predictions.shape[0]
    equal_bin_size = number_total_instances / number_bins

    cumulative_bin_metrics_list = []

    for bin_number in range(1, (number_bins + 1)):
        current_bin_size = round(equal_bin_size * bin_number)
        bin_size_percentage = round(
            current_bin_size / number_total_instances * 100, 2)
        bin_instances = label_predictions.head(current_bin_size)
        positive_instance_indeces = bin_instances['label'] > 0.0
        number_bin_positive_instances = bin_instances[
            positive_instance_indeces].shape[0]
        conv_rate = round(
            number_bin_positive_instances / current_bin_size * 100, 2)
        min_prob = min(bin_instances['prediction'])

        for uplift_percentage in uplift_percentages:
            for power_percentage in power_percentages:
                for confidence_level_percentage in confidence_level_percentages:
                    sample_size = calc_chisquared_sample_size(
                        conv_rate, uplift_percentage, power_percentage,
                        confidence_level_percentage)
                    cumulative_bin_metrics_list.append(
                        (bin_number, current_bin_size, bin_size_percentage,
                         min_prob, conv_rate, uplift_percentage,
                         power_percentage, confidence_level_percentage,
                         sample_size))

    return pd.DataFrame(cumulative_bin_metrics_list,
                        columns=[
                            'cumulative_bin_number', 'bin_size',
                            'bin_size_percentage', 'min_probability',
                            'conv_rate_percentage', 'uplift_percentage',
                            'power_percentage', 'confidence_level_percentage',
                            'required_sample_size'
                        ])
コード例 #5
0
def calc_performance_metrics(
    labels: np.ndarray,
    probability_predictions: np.ndarray,
    binarize_threshold: Optional[float] = None,
    decimal_points: Optional[int] = 4,
) -> Dict[str, float]:
    """Calculates performance metrics related to a binary classification model.

  Args:
    labels: An array of true binary labels represented by 1.0 and 0.0.
    probability_predictions: An array of predicted probabilities between 0.0 and
      1.0.
    binarize_threshold: Probability threshold to be used to binarize the
      predicted probabilities. By default the proportion of positive instances
      in the labels is used.
    decimal_points: Number of decimal points to use when outputting the
      calculated performance metrics.

  Returns:
    metrics: Dictionary of the following performance metric
      {prop_positives: Proportion of instances where label = 1.0,
       auc_roc: Area under the recall vs (1-specificity) (ROC) curve,
       auc_pr: Area under the recall vs precision (ROC) curve.
       Following metrics are calculated after binarizing the predicted
       probabilities based on the given binarize_threshold,
       accuracy: Total accuracy of the predictions,
       true_positive_rate (recall or sensitivity): True positive rate,
       true_negative_rate (specificity): True negative rate,
       precision: Precision (confidence) of the true positive predictions,
       f1_score: F1 score of sensitivity and specificity,
       precision_uplift: Uplift of the precision compared to random prediction}
  """
    utils.assert_label_values_are_valid(labels)
    utils.assert_prediction_values_are_valid(probability_predictions)
    utils.assert_label_and_prediction_length_match(labels,
                                                   probability_predictions)

    num_positives = labels.sum()
    prop_positives = float(num_positives) / len(labels)

    if binarize_threshold is None:
        binarize_threshold = prop_positives

    # Calculate auc metrics.
    auc_roc = round(
        sklearn.metrics.roc_auc_score(labels, probability_predictions),
        decimal_points)
    auc_pr = round(
        sklearn.metrics.average_precision_score(labels,
                                                probability_predictions),
        decimal_points)

    # Binarize the predictions.
    binarized_predictions = ((probability_predictions >
                              binarize_threshold).astype(int))

    # Calculate metrics based on binarized predictions.
    accuracy = sklearn.metrics.accuracy_score(labels, binarized_predictions)
    tp_rate = sklearn.metrics.recall_score(labels,
                                           binarized_predictions,
                                           pos_label=1)
    tn_rate = sklearn.metrics.recall_score(labels,
                                           binarized_predictions,
                                           pos_label=0)
    precision = sklearn.metrics.precision_score(labels, binarized_predictions)
    f1_score = sklearn.metrics.f1_score(labels, binarized_predictions)

    return {
        'prop_positives': round(prop_positives, decimal_points),
        'auc_roc': round(auc_roc, decimal_points),
        'auc_pr': round(auc_pr, decimal_points),
        'binarize_threshold': round(binarize_threshold, decimal_points),
        'accuracy': round(accuracy, decimal_points),
        'true_positive_rate': round(tp_rate, decimal_points),
        'true_negative_rate': round(tn_rate, decimal_points),
        'precision': round(precision, decimal_points),
        'f1_score': round(f1_score, decimal_points)
    }
コード例 #6
0
def calc_cumulative_bin_metrics(
        labels: np.ndarray,
        probability_predictions: np.ndarray,
        number_bins: int = 10,
        decimal_points: Optional[int] = 4) -> pd.DataFrame:
    """Calculates performance metrics for cumulative bins of the predictions.

  Args:
    labels: An array of true binary labels represented by 1.0 and 0.0.
    probability_predictions: An array of predicted probabilities between 0.0 and
      1.0.
    number_bins: Number of cumulative bins that we want to divide the ranked
      predictions into. Default is 10 bins such that the 1st bin contains the
      highest 10% of the predictions, 2nd bin contains the highest 20% of the
      predictions and so on.
      decimal_points: Number of decimal points to use when outputting the
        calculated performance metrics.

  Returns:
    bin_metrics: Following metrics calculated for each cumulative bin.
      cumulative_bin_number: Bin number starting from 1.
      bin_size: Total numbers of instances in the bin,
      bin_size_proportion: Proportion of instances in the bin out of all the
        instances in the labels.
      positive_instances: Numbers of positive instances in the bin,
      precision: Proportion of positive instances out of all the instances
        in the bin,
      coverage (recall): Proportion of positives instances in the bin out of
        all the positive instances in the labels,
      prop_label_positives: Proportion of positive instances in the labels,
      precision_uplift: Uplift of precision of the bin compared to the
        precision of the random prediction (prop_label_positives).
  """
    utils.assert_label_values_are_valid(labels)
    utils.assert_prediction_values_are_valid(probability_predictions)
    utils.assert_label_and_prediction_length_match(labels,
                                                   probability_predictions)

    # Separate the probability_predictions into bins.
    label_predictions = pd.DataFrame(list(zip(labels,
                                              probability_predictions)),
                                     columns=['label', 'prediction'])
    label_predictions = label_predictions.sort_values(by='prediction',
                                                      ascending=False)
    number_total_instances = label_predictions.shape[0]
    equal_bin_size = number_total_instances / number_bins
    number_total_positive_instances = label_predictions[
        label_predictions['label'] > 0].shape[0]
    prop_label_positives = round(
        number_total_positive_instances / number_total_instances,
        decimal_points)

    cumulative_bin_metrics_list = list()

    for i in range(1, (number_bins + 1)):
        current_bin_size = round(equal_bin_size * i)
        bin_size_proportion = round(current_bin_size / number_total_instances,
                                    decimal_points)
        bin_instances = label_predictions.head(current_bin_size)
        number_bin_positive_instances = bin_instances[
            bin_instances['label'] > 0].shape[0]
        bin_precision = round(number_bin_positive_instances / current_bin_size,
                              decimal_points)
        bin_recall = round(
            number_bin_positive_instances / number_total_positive_instances,
            decimal_points)
        bin_precision_uplift = round(bin_precision / prop_label_positives,
                                     decimal_points)

        cumulative_bin_metrics_list.append(
            (i, current_bin_size, bin_size_proportion,
             number_bin_positive_instances, bin_precision, bin_recall,
             prop_label_positives, bin_precision_uplift))

    return pd.DataFrame(cumulative_bin_metrics_list,
                        columns=[
                            'cumulative_bin_number', 'bin_size',
                            'bin_size_proportion', 'positive_instances',
                            'precision', 'coverage (recall)',
                            'prop_label_positives', 'precision_uplift'
                        ])
コード例 #7
0
def calc_bin_metrics(labels: np.ndarray,
                     probability_predictions: np.ndarray,
                     number_bins: Optional[int] = 10,
                     decimal_points: Optional[int] = 4) -> pd.DataFrame:
    """Calculates performance metrics for each bin of the predictions.

  Args:
    labels: An array of true binary labels represented by 1.0 and 0.0.
    probability_predictions: An array of predicted probabilities between 0.0 and
      1.0.
    number_bins: Number of bins that we want to divide the ranked predictions
      into. Default is deciles (10 bins) such that the 1st bin contains the
      highest 10% of the predictions, the 2nd bin contains the next 10% of the
      predictions and so on.
      decimal_points: Number of decimal points to use when outputting the
        calculated performance metrics.

  Returns:
    bin_metrics: Following metrics calculated for each bin.
      bin_number: Bin number starting from 1.
      bin_size: Total numbers of instances in the bin,
      positive_instances: Numbers of positive instances in the bin,
      precision: Proportion of positive instances out of all the instances
        in the bin,
      coverage: Proportion of positives instances out of all the positive
        instances in the dataset
      prop_positives: Proportion of positive instances in the label,
      precision_uplift: Uplift of precision compared to the precision
        of the random prediction (prop_positives).
  """
    utils.assert_label_values_are_valid(labels)
    utils.assert_prediction_values_are_valid(probability_predictions)
    utils.assert_label_and_prediction_length_match(labels,
                                                   probability_predictions)

    # Separate the probability_predictions into bins.
    bins = pd.qcut(probability_predictions, q=number_bins, labels=False)
    binned_data = pd.DataFrame(list(zip(labels, probability_predictions,
                                        bins)),
                               columns=['label', 'prediction', 'bin_number'])

    # Calculate the metrics for each bin.
    total_instances = (binned_data[['bin_number',
                                    'label']].groupby('bin_number').count())
    total_instances.columns = ['bin_size']
    total_instances = total_instances.reset_index()
    positive_instances = (binned_data.loc[binned_data['label'] > 0][[
        'bin_number', 'label'
    ]].groupby('bin_number').count())
    positive_instances.columns = ['positive_instances']
    positive_instances = positive_instances.reset_index()

    bin_metrics = pd.merge(total_instances,
                           positive_instances,
                           on='bin_number',
                           how='left')
    bin_metrics.fillna(0, inplace=True)
    bin_metrics['precision'] = (bin_metrics['positive_instances'] /
                                bin_metrics['bin_size'])
    bin_metrics['precision'] = [
        round(val, decimal_points) for val in bin_metrics['precision']
    ]
    prop_positives = round(labels[labels == 1.0].shape[0] / len(labels),
                           decimal_points)
    bin_metrics['prop_positives'] = prop_positives
    # Convert bin_number from zero-based offset to 1-based offset.
    bin_metrics['bin_number'] = bin_metrics['bin_number'] + 1
    bin_metrics['precision_uplift'] = bin_metrics['precision'] / prop_positives
    bin_metrics['precision_uplift'] = [
        round(val, decimal_points) for val in bin_metrics['precision_uplift']
    ]
    bin_metrics['coverage'] = (bin_metrics['positive_instances'] /
                               sum(bin_metrics['positive_instances']))
    bin_metrics['coverage'] = [
        round(val, decimal_points) for val in bin_metrics['coverage']
    ]

    # Reverse the order of bin numbers such that bin 1 has the highest
    # predicted probability.
    bin_metrics['bin_number'] = number_bins - bin_metrics['bin_number'] + 1
    bin_metrics = bin_metrics.sort_values(['bin_number'
                                           ]).reset_index(drop=True)

    return bin_metrics
コード例 #8
0
 def test_assert_prediction_values_are_valid_raises_right_error(self):
     with self.assertRaises(AssertionError):
         utils.assert_prediction_values_are_valid(
             np.array([0.0, 0.5, 0.33, 0.1, 2.0]))