def test_numpy1d(self): input = np.array([0, 1, 4]) sample_array = [2, 3, 4] result = convert_to_string_list_dict("Base {0}", input, sample_array) assert isinstance(result, dict) assert len(result) == 1 assert "Base 0" in result arr = result["Base 0"] assert isinstance(arr, list) assert np.array_equal(arr, [0, 1, 4])
def test_named_series(self): input = pd.Series(data=[1, 3, 5], name="Something") sample_array = [4, 5, 6] result = convert_to_string_list_dict("Base {0}", input, sample_array) assert isinstance(result, dict) assert len(result) == 1 assert "Something" in result arr = result["Something"] assert isinstance(arr, list) assert np.array_equal(arr, [1, 3, 5])
def test_unnamed_series(self): input = pd.Series(data=[0, 1, 2]) sample_array = [4, 5, 6] result = convert_to_string_list_dict("Base {0}", input, sample_array) assert isinstance(result, dict) assert len(result) == 1 assert "Base 0" in result arr = result["Base 0"] assert isinstance(arr, list) assert np.array_equal(arr, [0, 1, 2])
def test_dict(self): input = {"a": np.array([0, 1, 2]), "b": pd.Series(data=[3, 4, 5])} sample_array = [2, 3, 4] result = convert_to_string_list_dict("Base {0}", input, sample_array) assert isinstance(result, dict) assert len(result) == 2 assert "a" in result arr = result["a"] assert isinstance(arr, list) assert np.array_equal(arr, [0, 1, 2]) assert "b" in result arr = result["b"] assert isinstance(arr, list) assert np.array_equal(arr, [3, 4, 5])
def test_dataframe(self): input = pd.DataFrame.from_dict({"a": [0, 1, 2], "b": [4, 5, 6]}) sample_array = [3, 6, 9] result = convert_to_string_list_dict("Base {0}", input, sample_array) assert isinstance(result, dict) assert len(result) == 2 assert "a" in result arr = result["a"] assert isinstance(arr, list) assert np.array_equal(arr, [0, 1, 2]) assert "b" in result arr = result["b"] assert isinstance(arr, list) assert np.array_equal(arr, [4, 5, 6])
def test_numpy2d(self): # Note transpose on the end input = np.array([[0, 1, 4], [2, 6, 7]]).T sample_array = [2, 3, 4] result = convert_to_string_list_dict("Base {0}", input, sample_array) assert isinstance(result, dict) assert len(result) == 2 assert "Base 0" in result arr = result["Base 0"] assert isinstance(arr, list) assert np.array_equal(arr, [0, 1, 4]) assert "Base 1" in result arr = result["Base 1"] assert isinstance(arr, list) assert np.array_equal(arr, [2, 6, 7])
def __init__(self, *, sensitive_features, y_true, y_pred, locale=None, public_ip=None, port=None, fairness_metric_module=None, fairness_metric_mapping=None): """Initialize the fairness dashboard.""" metrics_module = FairnessMetricModule( module_name=fairness_metric_module, mapping=fairness_metric_mapping) if sensitive_features is None or y_true is None or y_pred is None: raise ValueError("Required parameters not provided") model_dict = convert_to_string_list_dict("Model {0}", y_pred, y_true) sf_dict = convert_to_string_list_dict("Sensitive Feature {0}", sensitive_features, y_true) # Make sure that things are as the TS layer expects self._y_true = convert_to_list(y_true) self._y_pred = list(model_dict.values()) # Note transpose in the following dataset = (np.array(list(sf_dict.values())).T).tolist() if np.shape(self._y_true)[0] != np.shape(self._y_pred)[1]: raise ValueError("Predicted y does not match true y shape") if np.shape(self._y_true)[0] != np.shape(dataset)[0]: raise ValueError("Sensitive features shape does not match true y " "shape") fairness_input = { "true_y": self._y_true, "model_names": list(model_dict.keys()), "predicted_ys": self._y_pred, "features": list(sf_dict.keys()), "dataset": dataset, "classification_methods": metrics_module.classification_methods, "regression_methods": metrics_module.regression_methods, "probability_methods": metrics_module.probability_methods, } super(FairnessDashboard, self).__init__(dashboard_type="Fairness", model_data=fairness_input, public_ip=public_ip, port=port, locale=locale) self.fairness_metrics_module = metrics_module def metrics(): """ Note: This function always calculates the error_function, if available, so that the value is cached in the MetricsCache Request attributes: binVector: the sensitive features binning vector metricKey: the metricKey that corresponds to the function that will be calculated modelIndex: the model index used to index the predicted y's by that model """ try: data = request.get_json(force=True) if type(data["binVector"][0]) == np.int32: data['binVector'] = [ str(bin_) for bin_ in data['binVector'] ] metric_name = data['metricKey'] error_function_name = f"{metric_name} bounds" metric_function = \ self.fairness_metrics_module._metric_methods.get( data["metricKey"]).get("function") metric_method = {metric_name: metric_function} error_function = \ self.fairness_metrics_module._metric_methods.get( data["metricKey"]).get("error_function") if error_function is not None: metric_method.update({error_function_name: error_function}) metric_frame = self.fairness_metrics_module.MetricFrame( metrics=metric_method, y_true=self.model_data['true_y'], y_pred=self.model_data['predicted_ys'][data["modelIndex"]], sensitive_features=data["binVector"]) result = { "data": { "global": metric_frame.overall[metric_name], "bins": list([ entry for entry in list( metric_frame.by_group.to_dict().values()) if not isinstance(entry[0], tuple) ][0].values()), } } if error_function_name in metric_method: result["data"].update({ "bounds": { "lower": metric_frame.overall[error_function_name][0], "upper": metric_frame.overall[error_function_name][1], }, # [(x1, y1), (x2, y2), (x3, y3)...] "binBounds": [{ "lower": bounds[0], "upper": bounds[1] } for bounds in list( metric_frame.by_group[error_function_name]\ .to_dict().values())] }) return jsonify(result) except Exception as ex: import sys import traceback exc_type, exc_value, exc_traceback = sys.exc_info() return jsonify({ "error": str(ex), "stacktrace": str( repr( traceback.format_exception(exc_type, exc_value, exc_traceback))), "locals": str(locals()), }) self.add_url_rule(metrics, '/metrics', methods=["POST"])