def selection_rate(y_true, y_pred, *, pos_label: Any = 1, sample_weight=None) -> float: """Calculate the fraction of predicted labels matching the 'good' outcome. The argument `pos_label` specifies the 'good' outcome. For consistency with other metric functions, the ``y_true`` argument is required, but ignored. Parameters ---------- y_true : array_like The true labels (ignored) y_pred : array_like The predicted labels pos_label : Scalar The label to treat as the 'good' outcome sample_weight : array_like Optional array of sample weights """ if len(y_pred) == 0: raise ValueError(_EMPTY_INPUT_PREDICTIONS_ERROR_MESSAGE) selected = (_convert_to_ndarray_and_squeeze(y_pred) == pos_label) s_w = np.ones(len(selected)) if sample_weight is not None: s_w = np.squeeze(np.asarray(sample_weight)) return np.dot(selected, s_w) / s_w.sum()
def test_single_element(self): X = [[[1]]] result = fmim._convert_to_ndarray_and_squeeze(X) assert isinstance(result, np.ndarray) assert result.shape == (1, ) assert result[0] == 1
def test_multi_columns(self): X = [[0, 1]] result = fmim._convert_to_ndarray_and_squeeze(X) assert isinstance(result, np.ndarray) assert result.shape == (2, ) assert result[0] == 0 assert result[1] == 1
def __init__(self, *, metrics: Union[Callable, Dict[str, Callable]], y_true, y_pred, sensitive_features, control_features: Optional = None, sample_params: Optional[Union[Dict[str, Any], Dict[str, Dict[str, Any]]]] = None): """Read a placeholder comment.""" check_consistent_length(y_true, y_pred) y_t = _convert_to_ndarray_and_squeeze(y_true) y_p = _convert_to_ndarray_and_squeeze(y_pred) func_dict = self._process_functions(metrics, sample_params) # Now, prepare the sensitive features sf_list = self._process_features("sensitive_feature_", sensitive_features, y_t) self._sf_names = [x.name for x in sf_list] # Prepare the control features # Adjust _sf_indices if needed cf_list = None self._cf_names = None if control_features is not None: cf_list = self._process_features("control_feature_", control_features, y_t) self._cf_names = [x.name for x in cf_list] # Check for duplicate feature names nameset = set() namelist = self._sf_names if self._cf_names: namelist = namelist + self._cf_names for name in namelist: if name in nameset: raise ValueError(_DUPLICATE_FEATURE_NAME.format(name)) nameset.add(name) self._overall = self._compute_overall(func_dict, y_t, y_p, cf_list) self._by_group = self._compute_by_group(func_dict, y_t, y_p, sf_list, cf_list)
def test_simple_list(self): X = [0, 1, 2] result = fmim._convert_to_ndarray_and_squeeze(X) assert isinstance(result, np.ndarray) assert result.shape == (3, ) assert result[0] == 0 assert result[1] == 1 assert result[2] == 2
def selection_rate(y_true, y_pred, *, pos_label=1, sample_weight=None): """Calculate the fraction of predicted labels matching the 'good' outcome. The argument `pos_label` specifies the 'good' outcome. """ if len(y_pred) == 0: raise ValueError(_EMPTY_INPUT_PREDICTIONS_ERROR_MESSAGE) selected = (_convert_to_ndarray_and_squeeze(y_pred) == pos_label) s_w = np.ones(len(selected)) if sample_weight is not None: s_w = np.squeeze(np.asarray(sample_weight)) return np.dot(selected, s_w) / s_w.sum()
def __init__( self, *, metrics: Union[Callable, Dict[str, Callable]], y_true, y_pred, sensitive_features, control_features=None, sample_params: Optional[Union[Dict[str, Any], Dict[str, Dict[str, Any]]]] = None, ): """Read a placeholder comment.""" check_consistent_length(y_true, y_pred) y_t = _convert_to_ndarray_and_squeeze(y_true) y_p = _convert_to_ndarray_and_squeeze(y_pred) all_data = pd.DataFrame.from_dict({ "y_true": list(y_t), "y_pred": list(y_p) }) annotated_funcs = self._process_functions(metrics, sample_params, all_data) # Now, prepare the sensitive features sf_list = self._process_features("sensitive_feature_", sensitive_features, y_t) self._sf_names = [x.name_ for x in sf_list] # Prepare the control features # Adjust _sf_indices if needed cf_list = None self._cf_names = None if control_features is not None: cf_list = self._process_features("control_feature_", control_features, y_t) self._cf_names = [x.name_ for x in cf_list] # Add sensitive and conditional features to all_data for sf in sf_list: all_data[sf.name_] = list(sf.raw_feature_) if cf_list is not None: for cf in cf_list: all_data[cf.name_] = list(cf.raw_feature_) # Check for duplicate feature names nameset = set() namelist = self._sf_names if self._cf_names: namelist = namelist + self._cf_names for name in namelist: if name in nameset: raise ValueError(_DUPLICATE_FEATURE_NAME.format(name)) nameset.add(name) # Create the 'overall' results self._overall = self._build_overall_frame(all_data, annotated_funcs, cf_list, self._cf_names) grouping_features = copy.deepcopy(sf_list) if cf_list is not None: # Prepend the conditional features, so they are 'higher' grouping_features = copy.deepcopy(cf_list) + grouping_features # Create the 'by group' results self._by_group = self._build_by_group_frame(all_data, annotated_funcs, grouping_features)
def __init__( self, *, metrics: Union[Callable, Dict[str, Callable]], y_true, y_pred, sensitive_features, control_features=None, sample_params: Optional[Union[Dict[str, Any], Dict[str, Dict[str, Any]]]] = None, ): """Read a placeholder comment.""" check_consistent_length(y_true, y_pred) y_t = _convert_to_ndarray_and_squeeze(y_true) y_p = _convert_to_ndarray_and_squeeze(y_pred) all_data = pd.DataFrame.from_dict({ "y_true": list(y_t), "y_pred": list(y_p) }) annotated_funcs = self._process_functions(metrics, sample_params, all_data) # Now, prepare the sensitive features sf_list = self._process_features("sensitive_feature_", sensitive_features, y_t) self._sf_names = [x.name_ for x in sf_list] # Prepare the control features # Adjust _sf_indices if needed cf_list = None self._cf_names = None if control_features is not None: cf_list = self._process_features("control_feature_", control_features, y_t) self._cf_names = [x.name_ for x in cf_list] # Add sensitive and conditional features to all_data for sf in sf_list: all_data[sf.name_] = list(sf.raw_feature_) if cf_list is not None: for cf in cf_list: all_data[cf.name_] = list(cf.raw_feature_) # Check for duplicate feature names nameset = set() namelist = self._sf_names if self._cf_names: namelist = namelist + self._cf_names for name in namelist: if name in nameset: raise ValueError(_DUPLICATE_FEATURE_NAME.format(name)) nameset.add(name) if self._cf_names is None: self._overall = apply_to_dataframe( all_data, metric_functions=annotated_funcs) else: temp = all_data.groupby(by=self._cf_names).apply( apply_to_dataframe, metric_functions=annotated_funcs) # If there are multiple control features, might have missing combinations if len(self._cf_names) > 1: all_indices = pd.MultiIndex.from_product( [x.classes_ for x in cf_list], names=[x.name_ for x in cf_list], ) self._overall = temp.reindex(index=all_indices) else: self._overall = temp grouping_features = copy.deepcopy(sf_list) if cf_list is not None: # Prepend the conditional features, so they are 'higher' grouping_features = copy.deepcopy(cf_list) + grouping_features temp = all_data.groupby([x.name_ for x in grouping_features ]).apply(apply_to_dataframe, metric_functions=annotated_funcs) if len(grouping_features) > 1: all_indices = pd.MultiIndex.from_product( [x.classes_ for x in grouping_features], names=[x.name_ for x in grouping_features], ) self._by_group = temp.reindex(index=all_indices) else: self._by_group = temp