Beispiel #1
0
    def test_index_to_list(self):
        input_index = pd.Index(data=[[0, 4], [1, 5], [2, 6]])
        expected_list = [[0, 4], [1, 5], [2, 6]]
        input_as_list = _convert_to_list(input_index)

        assert input_as_list is not None
        assert input_as_list == expected_list
Beispiel #2
0
    def test_list_to_list(self):
        input_list = [[0, 4], [1, 5], [2, 6]]
        expected_list = [[0, 4], [1, 5], [2, 6]]
        input_as_list = _convert_to_list(input_list)

        assert input_as_list is not None
        assert input_as_list == expected_list
Beispiel #3
0
    def test_series_to_list(self):
        input_series = pd.Series(data=[[0, 4], [1, 5], [2, 6]])
        expected_list = [[0, 4], [1, 5], [2, 6]]
        input_as_list = _convert_to_list(input_series)

        assert input_as_list is not None
        assert input_as_list == expected_list
Beispiel #4
0
    def test_array_to_list(self):
        input_array = np.array([[0, 4], [1, 5], [2, 6]])
        expected_list = [[0, 4], [1, 5], [2, 6]]
        input_as_list = _convert_to_list(input_array)

        assert input_as_list is not None
        assert input_as_list == expected_list
Beispiel #5
0
    def test_pandas_dataframe_to_list(self):
        input_dataframe = pd.DataFrame.from_dict(
            {"a": [0, 1, 2], "b": [4, 5, 6]}
        )
        expected_list = [[0, 4], [1, 5], [2, 6]]
        input_as_list = _convert_to_list(input_dataframe)

        assert input_as_list is not None
        assert input_as_list == expected_list
Beispiel #6
0
 def on_predict(self, data):
     try:
         if self._dataframeColumns is not None:
             data = pd.DataFrame(data, columns=self._dataframeColumns)
             data = data.astype(dict(self._dfdtypes))
         if (self._is_classifier):
             prediction = _convert_to_list(self._model.predict_proba(data),
                                           EXP_VIZ_ERR_MSG)
         else:
             prediction = _convert_to_list(self._model.predict(data),
                                           EXP_VIZ_ERR_MSG)
         return {WidgetRequestResponseConstants.DATA: prediction}
     except Exception:
         return {
             WidgetRequestResponseConstants.ERROR: "Model threw exception"
             " while predicting...",
             WidgetRequestResponseConstants.DATA: []
         }
 def predicted_y_to_list(self, predicted_y):
     try:
         predicted_y = _convert_to_list(predicted_y)
     except Exception as ex:
         ex_str = _format_exception(ex)
         raise ValueError(
             "Model prediction output of unsupported type,"
             "inner error: {}".format(ex_str))
     return predicted_y
 def on_predict(self, data):
     try:
         data = pd.DataFrame(
             data, columns=self.dashboard_input.dataset.feature_names)
         if (self._is_classifier):
             prediction = _convert_to_list(
                 self._analysis.model.predict_proba(data), EXP_VIZ_ERR_MSG)
         else:
             prediction = _convert_to_list(
                 self._analysis.model.predict(data), EXP_VIZ_ERR_MSG)
         return {WidgetRequestResponseConstants.data: prediction}
     except Exception as e:
         print(e)
         traceback.print_exc()
         e_str = _format_exception(e)
         return {
             WidgetRequestResponseConstants.error:
             "Model threw exception"
             " while predicting..."
             "inner error: {}".format(e_str),
             WidgetRequestResponseConstants.data: []
         }
Beispiel #9
0
    def test_csr_matrix_to_list(self):
        input_sparse_matrix = csr_matrix((3, 10000),
                                         dtype=np.int8)
        with pytest.raises(ValueError) as ve:
            _convert_to_list(input_sparse_matrix)
        assert "Exceeds maximum number of features for " + \
            "visualization (1000)" in str(ve.value)

        with pytest.raises(ValueError) as ve:
            _convert_to_list(input_sparse_matrix,
                             custom_err_msg="Error occurred")
        assert "Error occurred" in str(ve.value)

        row = np.array([0, 0, 1, 2, 2, 2])
        col = np.array([0, 2, 2, 0, 1, 2])
        data = np.array([1, 2, 3, 4, 5, 6])
        sparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))
        expected_list = [[1, 0, 2],
                         [0, 0, 3],
                         [4, 5, 6]]
        input_as_list = _convert_to_list(sparse_matrix)

        assert input_as_list is not None
        assert input_as_list == expected_list
Beispiel #10
0
    def _save_metadata(self, path):
        """Save the metadata like target column, categorical features,
           task type and the classes (if any).

        :param path: The directory path to save the RAIInsights to.
        :type path: str
        """
        top_dir = Path(path)
        classes = _convert_to_list(self._classes)
        meta = {
            _TARGET_COLUMN: self.target_column,
            _TASK_TYPE: self.task_type,
            _CATEGORICAL_FEATURES: self.categorical_features,
            _CLASSES: classes
        }
        with open(top_dir / _META_JSON, 'w') as file:
            json.dump(meta, file)
    def _get_interpret(self, explanation):
        interpretation = ModelExplanationData()

        # List of explanations, key of explanation type is "explanation_type"
        if explanation is not None:
            mli_explanations = explanation.data(-1)["mli"]
        else:
            mli_explanations = None
        local_explanation = self._find_first_explanation(
            ExplanationKeys.LOCAL_EXPLANATION_KEY, mli_explanations)
        global_explanation = self._find_first_explanation(
            ExplanationKeys.GLOBAL_EXPLANATION_KEY, mli_explanations)
        ebm_explanation = self._find_first_explanation(
            ExplanationKeys.EBM_GLOBAL_EXPLANATION_KEY, mli_explanations)

        if explanation is not None and hasattr(explanation, 'method'):
            interpretation.method = explanation.method

        local_dim = None

        if local_explanation is not None or global_explanation is not None\
                or ebm_explanation is not None:
            interpretation.precomputedExplanations = PrecomputedExplanations()

        if local_explanation is not None:
            try:
                local_feature_importance = FeatureImportance()
                local_feature_importance.scores = _convert_to_list(
                    local_explanation["scores"])
                if np.shape(local_feature_importance.scores)[-1] > 1000:
                    raise ValueError("Exceeds maximum number of features for "
                                     "visualization (1000). Please regenerate"
                                     " the explanation using fewer features.")
                local_feature_importance.intercept = _convert_to_list(
                    local_explanation["intercept"])
                # We can ignore perf explanation data.
                # Note if it is added back at any point,
                # the numpy values will need to be converted to python,
                # otherwise serialization fails.
                local_explanation["perf"] = None
                interpretation.precomputedExplanations.localFeatureImportance\
                    = local_feature_importance
            except Exception as ex:
                raise ValueError("Unsupported local explanation type") from ex
            if self._evaluation_examples is not None:

                _feature_length = self._evaluation_examples.shape[1]
                _row_length = self._evaluation_examples.shape[0]
                local_dim = np.shape(local_feature_importance.scores)
                if len(local_dim) != 2 and len(local_dim) != 3:
                    raise ValueError(
                        "Local explanation expected to be a 2D or 3D list")
                if (len(local_dim) == 2 and (local_dim[1] != _feature_length
                                             or local_dim[0] != _row_length)):
                    raise ValueError("Shape mismatch: local explanation"
                                     "length differs from dataset")
                if (len(local_dim) == 3 and (local_dim[2] != _feature_length
                                             or local_dim[1] != _row_length)):
                    raise ValueError("Shape mismatch: local explanation"
                                     " length differs from dataset")
        if global_explanation is not None:
            try:
                global_feature_importance = FeatureImportance()
                global_feature_importance.scores = _convert_to_list(
                    global_explanation["scores"])
                if 'intercept' in global_explanation:
                    global_feature_importance.intercept\
                        = _convert_to_list(
                            global_explanation["intercept"])
                interpretation.precomputedExplanations.globalFeatureImportance\
                    = global_explanation
            except Exception as ex:
                raise ValueError("Unsupported global explanation type") from ex
        if ebm_explanation is not None:
            try:
                ebm_feature_importance = EBMGlobalExplanation()
                ebm_feature_importance.feature_list\
                    = ebm_explanation["feature_list"]
                interpretation.precomputedExplanations.ebmGlobalExplanation\
                    = ebm_feature_importance

            except Exception as ex:
                raise ValueError("Unsupported ebm explanation type") from ex
        return interpretation
Beispiel #12
0
    def __init__(self, explanation, model, dataset, true_y, classes, features):
        """Initialize the Explanation Dashboard Input.

        :param explanation: An object that represents an explanation.
        :type explanation: ExplanationMixin
        :param model: An object that represents a model.
        It is assumed that for the classification case
            it has a method of predict_proba() returning
            the prediction probabilities for each
            class and for the regression case a method of predict()
            returning the prediction value.
        :type model: object
        :param dataset: A matrix of feature vector examples
        (# examples x # features), the same samples
            used to build the explanation.
            Will overwrite any set on explanation object already.
            Must have fewer than
            100000 rows and fewer than 1000 columns.
            Note dashboard may become slow or crash for more than 10000 rows.
        :type dataset: numpy.ndarray or list[][]
        :param true_y: The true labels for the provided dataset.
            Will overwrite any set on
            explanation object already.
        :type true_y: numpy.ndarray or list[]
        :param classes: The class names.
        :type classes: numpy.ndarray or list[]
        :param features: Feature names.
        :type features: numpy.ndarray or list[]
        """
        self._model = model
        self._is_classifier = _is_classifier(model)
        self._dataframeColumns = None
        self.dashboard_input = {}
        # List of explanations, key of explanation type is "explanation_type"
        if explanation is not None:
            self._mli_explanations = explanation.data(-1)["mli"]
        else:
            self._mli_explanations = None
        local_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_LOCAL_EXPLANATION_KEY)
        global_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_GLOBAL_EXPLANATION_KEY)
        ebm_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_EBM_GLOBAL_EXPLANATION_KEY)
        dataset_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_EXPLANATION_DATASET_KEY)

        if explanation is not None and hasattr(explanation, 'method'):
            self.dashboard_input[ExplanationDashboardInterface.
                                 EXPLANATION_METHOD] = explanation.method

        predicted_y = None
        feature_length = None
        if dataset_explanation is not None:
            if dataset is None:
                dataset = dataset_explanation[
                    ExplanationDashboardInterface.MLI_DATASET_X_KEY]
            if true_y is None:
                true_y = dataset_explanation[
                    ExplanationDashboardInterface.MLI_DATASET_Y_KEY]

        if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'):
            self._dataframeColumns = dataset.columns
            self._dfdtypes = dataset.dtypes
        try:
            list_dataset = _convert_to_list(dataset, EXP_VIZ_ERR_MSG)
        except Exception as ex:
            ex_str = _format_exception(ex)
            raise ValueError(
                "Unsupported dataset type, inner error: {}".format(ex_str))
        if dataset is not None and model is not None:
            try:
                predicted_y = model.predict(dataset)
            except Exception as ex:
                ex_str = _format_exception(ex)
                msg = "Model does not support predict method for given"
                "dataset type, inner error: {}".format(ex_str)
                raise ValueError(msg)
            try:
                predicted_y = _convert_to_list(predicted_y, EXP_VIZ_ERR_MSG)
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Model prediction output of unsupported type,"
                                 "inner error: {}".format(ex_str))
        if predicted_y is not None:
            self.dashboard_input[
                ExplanationDashboardInterface.PREDICTED_Y] = predicted_y
        row_length = 0
        if list_dataset is not None:
            row_length, feature_length = np.shape(list_dataset)
            if row_length > 100000:
                raise ValueError("Exceeds maximum number of rows"
                                 "for visualization (100000)")
            if feature_length > 1000:
                warnings.warn("Exceeds maximum number of features for"
                              " visualization (1000)."
                              " Please regenerate the"
                              " explanation using fewer features or"
                              " initialize the dashboard without"
                              " passing a dataset. Dashboard will"
                              " show limited view.")
            else:
                self.dashboard_input[ExplanationDashboardInterface.
                                     TRAINING_DATA] = serialize_json_safe(
                                         list_dataset)
            self.dashboard_input[ExplanationDashboardInterface.
                                 IS_CLASSIFIER] = self._is_classifier

        local_dim = None

        if true_y is not None and len(true_y) == row_length:
            self.dashboard_input[
                ExplanationDashboardInterface.TRUE_Y] = _convert_to_list(
                    true_y, EXP_VIZ_ERR_MSG)

        if local_explanation is not None:
            try:
                local_explanation["scores"] = _convert_to_list(
                    local_explanation["scores"], EXP_VIZ_ERR_MSG)
                local_explanation["intercept"] = _convert_to_list(
                    local_explanation["intercept"], EXP_VIZ_ERR_MSG)
                # We can ignore perf explanation data.
                # Note if it is added back at any point,
                # the numpy values will need to be converted to python,
                # otherwise serialization fails.
                local_explanation["perf"] = None
                self.dashboard_input[ExplanationDashboardInterface.
                                     LOCAL_EXPLANATIONS] = local_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Unsupported local explanation type,"
                                 "inner error: {}".format(ex_str))
            if list_dataset is not None:
                local_dim = np.shape(local_explanation["scores"])
                if len(local_dim) != 2 and len(local_dim) != 3:
                    raise ValueError(
                        "Local explanation expected to be a 2D or 3D list")
                if len(local_dim) == 2 and (local_dim[1] != feature_length
                                            or local_dim[0] != row_length):
                    raise ValueError("Shape mismatch: local explanation"
                                     "length differs from dataset")
                if len(local_dim) == 3 and (local_dim[2] != feature_length
                                            or local_dim[1] != row_length):
                    raise ValueError("Shape mismatch: local explanation"
                                     " length differs from dataset")
        if local_explanation is None and global_explanation is not None:
            try:
                global_explanation["scores"] = _convert_to_list(
                    global_explanation["scores"], EXP_VIZ_ERR_MSG)
                if 'intercept' in global_explanation:
                    global_explanation["intercept"] = _convert_to_list(
                        global_explanation["intercept"], EXP_VIZ_ERR_MSG)
                self.dashboard_input[ExplanationDashboardInterface.
                                     GLOBAL_EXPLANATION] = global_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Unsupported global explanation type,"
                                 "inner error: {}".format(ex_str))
        if ebm_explanation is not None:
            try:
                self.dashboard_input[ExplanationDashboardInterface.
                                     EBM_EXPLANATION] = ebm_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError(
                    "Unsupported ebm explanation type: {}".format(ex_str))

        if features is None\
                and explanation is not None\
                and hasattr(explanation, 'features')\
                and explanation.features is not None:
            features = explanation.features
        if features is not None:
            features = _convert_to_list(features, EXP_VIZ_ERR_MSG)
            if feature_length is not None and len(features) != feature_length:
                raise ValueError("Feature vector length mismatch:"
                                 " feature names length differs"
                                 " from local explanations dimension")
            self.dashboard_input[
                ExplanationDashboardInterface.FEATURE_NAMES] = features
        if classes is None\
                and explanation is not None\
                and hasattr(explanation, 'classes')\
                and explanation.classes is not None:
            classes = explanation.classes
        if classes is not None:
            classes = _convert_to_list(classes, EXP_VIZ_ERR_MSG)
            if local_dim is not None and len(classes) != local_dim[0]:
                raise ValueError("Class vector length mismatch:"
                                 "class names length differs from"
                                 "local explanations dimension")
            self.dashboard_input[
                ExplanationDashboardInterface.CLASS_NAMES] = classes
        if _is_classifier(model) and dataset is not None:
            try:
                probability_y = model.predict_proba(dataset)
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Model does not support predict_proba method"
                                 " for given dataset type,"
                                 " inner error: {}".format(ex_str))
            try:
                probability_y = _convert_to_list(probability_y,
                                                 EXP_VIZ_ERR_MSG)
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError(
                    "Model predict_proba output of unsupported type,"
                    "inner error: {}".format(ex_str))
            self.dashboard_input[
                ExplanationDashboardInterface.PROBABILITY_Y] = probability_y
    def input_explanation_data(self, list_dataset, classes):
        # List of explanations, key of explanation type is "explanation_type"
        local_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_LOCAL_EXPLANATION_KEY)
        global_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_GLOBAL_EXPLANATION_KEY)
        ebm_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_EBM_GLOBAL_EXPLANATION_KEY)

        if local_explanation is not None:
            try:
                local_explanation["scores"] = _convert_to_list(
                    local_explanation["scores"])
                if np.shape(local_explanation["scores"])[-1] > 1000:
                    raise ValueError("Exceeds maximum number of features for "
                                     "visualization (1000). Please regenerate"
                                     " the explanation using fewer features.")
                local_explanation["intercept"] = _convert_to_list(
                    local_explanation["intercept"])
                # We can ignore perf explanation data.
                # Note if it is added back at any point,
                # the numpy values will need to be converted to python,
                # otherwise serialization fails.
                local_explanation["perf"] = None
                self.dashboard_input[
                    ExplanationDashboardInterface.LOCAL_EXPLANATIONS
                ] = local_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError(
                    "Unsupported local explanation type,"
                    "inner error: {}".format(ex_str))
            if list_dataset is not None:
                row_length, feature_length = np.shape(list_dataset)
                local_dim = np.shape(local_explanation["scores"])
                if len(local_dim) != 2 and len(local_dim) != 3:
                    raise ValueError(
                        "Local explanation expected to be a 2D or 3D list")
                if len(local_dim) == 2 and (local_dim[1] != feature_length or
                                            local_dim[0] != row_length):
                    raise ValueError(
                        "Shape mismatch: local explanation"
                        "length differs from dataset")
                if len(local_dim) == 3 and (local_dim[2] != feature_length or
                                            local_dim[1] != row_length):
                    raise ValueError(
                        "Shape mismatch: local explanation"
                        " length differs from dataset")
                if classes is not None and len(classes) != local_dim[0]:
                    raise ValueError("Class vector length mismatch:"
                                     "class names length differs from"
                                     "local explanations dimension")
        if local_explanation is None and global_explanation is not None:
            try:
                global_explanation["scores"] = _convert_to_list(
                    global_explanation["scores"])
                if 'intercept' in global_explanation:
                    global_explanation["intercept"] = _convert_to_list(
                        global_explanation["intercept"])
                self.dashboard_input[
                    ExplanationDashboardInterface.GLOBAL_EXPLANATION
                ] = global_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Unsupported global explanation type,"
                                 "inner error: {}".format(ex_str))
        if ebm_explanation is not None:
            try:
                self.dashboard_input[
                    ExplanationDashboardInterface.EBM_EXPLANATION
                ] = ebm_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError(
                    "Unsupported ebm explanation type: {}".format(ex_str))
    def __init__(self,
                 *,
                 sensitive_features,
                 y_true,
                 y_pred,
                 locale=None,
                 public_ip=None,
                 port=None,
                 fairness_metric_module=None,
                 fairness_metric_mapping=None):
        """Initialize the fairness dashboard."""

        metrics_module = FairnessMetricModule(
            module_name=fairness_metric_module,
            mapping=fairness_metric_mapping)

        if sensitive_features is None or y_true is None or y_pred is None:
            raise ValueError("Required parameters not provided")

        model_dict = _convert_to_string_list_dict("Model {0}", y_pred, y_true)
        sf_dict = _convert_to_string_list_dict("Sensitive Feature {0}",
                                               sensitive_features, y_true)

        # Make sure that things are as the TS layer expects
        self._y_true = _convert_to_list(y_true)
        self._y_pred = list(model_dict.values())
        # Note transpose in the following
        dataset = (np.array(list(sf_dict.values())).T).tolist()

        if np.shape(self._y_true)[0] != np.shape(self._y_pred)[1]:
            raise ValueError("Predicted y does not match true y shape")

        if np.shape(self._y_true)[0] != np.shape(dataset)[0]:
            raise ValueError("Sensitive features shape does not match true y "
                             "shape")

        fairness_input = {
            "true_y": self._y_true,
            "model_names": list(model_dict.keys()),
            "predicted_ys": self._y_pred,
            "features": list(sf_dict.keys()),
            "dataset": dataset,
            "classification_methods": metrics_module.classification_methods,
            "regression_methods": metrics_module.regression_methods,
            "probability_methods": metrics_module.probability_methods,
        }

        super(FairnessDashboard, self).__init__(dashboard_type="Fairness",
                                                model_data=fairness_input,
                                                public_ip=public_ip,
                                                port=port,
                                                locale=locale)

        self.fairness_metrics_module = metrics_module

        def metrics():
            """
            Note:
                This function always calculates the error_function,
                if available, so that the value is cached in the MetricsCache

            Request attributes:
                binVector: the sensitive features binning vector
                metricKey: the metricKey that corresponds to the function that
                    will be calculated
                modelIndex: the model index used to index the predicted y's
                    by that model
            """
            try:
                data = request.get_json(force=True)

                if type(data["binVector"][0]) == np.int32:
                    data['binVector'] = [
                        str(bin_) for bin_ in data['binVector']
                    ]

                metric_name = data['metricKey']
                error_function_name = f"{metric_name} bounds"
                metric_function = \
                    self.fairness_metrics_module._metric_methods.get(
                        data["metricKey"]).get("function")
                metric_method = {metric_name: metric_function}
                error_function = \
                    self.fairness_metrics_module._metric_methods.get(
                        data["metricKey"]).get("error_function")
                if error_function is not None:
                    metric_method.update({error_function_name: error_function})

                metric_frame = self.fairness_metrics_module.MetricFrame(
                    metrics=metric_method,
                    y_true=self.model_data['true_y'],
                    y_pred=self.model_data['predicted_ys'][data["modelIndex"]],
                    sensitive_features=data["binVector"])

                result = {
                    "data": {
                        "global":
                        metric_frame.overall[metric_name],
                        "bins":
                        list([
                            entry for entry in list(
                                metric_frame.by_group.to_dict().values())
                            if not isinstance(entry[0], tuple)
                        ][0].values()),
                    }
                }
                if error_function_name in metric_method:
                    result["data"].update({
                        "bounds": {
                            "lower":
                            metric_frame.overall[error_function_name][0],
                            "upper":
                            metric_frame.overall[error_function_name][1],
                        },
                        # [(x1, y1), (x2, y2), (x3, y3)...]
                        "binBounds": [{
                            "lower": bounds[0],
                            "upper": bounds[1]
                        }
                            for bounds in list(
                                metric_frame.by_group[error_function_name]\
                                .to_dict().values())]
                    })
                return jsonify(result)
            except Exception as ex:
                import sys
                import traceback
                exc_type, exc_value, exc_traceback = sys.exc_info()

                return jsonify({
                    "error":
                    str(ex),
                    "stacktrace":
                    str(
                        repr(
                            traceback.format_exception(exc_type, exc_value,
                                                       exc_traceback))),
                    "locals":
                    str(locals()),
                })

        self.add_url_rule(metrics, '/metrics', methods=["POST"])
Beispiel #15
0
    def _get_dataset(self):
        dashboard_dataset = Dataset()
        dashboard_dataset.task_type = self.task_type
        dashboard_dataset.class_names = _convert_to_list(self._classes)

        predicted_y = None
        feature_length = None

        dataset: pd.DataFrame = self.test.drop([self.target_column], axis=1)

        if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'):
            self._dataframeColumns = dataset.columns
        try:
            list_dataset = _convert_to_list(dataset)
        except Exception as ex:
            raise ValueError("Unsupported dataset type") from ex
        if dataset is not None and self.model is not None:
            try:
                predicted_y = self.model.predict(dataset)
            except Exception as ex:
                msg = "Model does not support predict method for given"
                "dataset type"
                raise ValueError(msg) from ex
            try:
                predicted_y = _convert_to_list(predicted_y)
            except Exception as ex:
                raise ValueError(
                    "Model prediction output of unsupported type,") from ex
        if predicted_y is not None:
            if (self.task_type == "classification"
                    and dashboard_dataset.class_names is not None):
                predicted_y = [
                    dashboard_dataset.class_names.index(y) for y in predicted_y
                ]
            dashboard_dataset.predicted_y = predicted_y
        row_length = 0

        if list_dataset is not None:
            row_length, feature_length = np.shape(list_dataset)
            if row_length > 100000:
                raise ValueError("Exceeds maximum number of rows"
                                 "for visualization (100000)")
            if feature_length > 1000:
                raise ValueError("Exceeds maximum number of features for"
                                 " visualization (1000). Please regenerate the"
                                 " explanation using fewer features or"
                                 " initialize the dashboard without passing a"
                                 " dataset.")
            dashboard_dataset.features = list_dataset

        true_y = self.test[self.target_column]

        if true_y is not None and len(true_y) == row_length:
            if (self.task_type == "classification"
                    and dashboard_dataset.class_names is not None):
                true_y = [
                    dashboard_dataset.class_names.index(y) for y in true_y
                ]
            dashboard_dataset.true_y = _convert_to_list(true_y)

        features = dataset.columns

        if features is not None:
            features = _convert_to_list(features)
            if feature_length is not None and len(features) != feature_length:
                raise ValueError("Feature vector length mismatch:"
                                 " feature names length differs"
                                 " from local explanations dimension")
            dashboard_dataset.feature_names = features
        dashboard_dataset.target_column = self.target_column
        if (self.model is not None
                and hasattr(self.model, SKLearn.PREDICT_PROBA)
                and self.model.predict_proba is not None
                and dataset is not None):
            try:
                probability_y = self.model.predict_proba(dataset)
            except Exception as ex:
                raise ValueError("Model does not support predict_proba method"
                                 " for given dataset type,") from ex
            try:
                probability_y = _convert_to_list(probability_y)
            except Exception as ex:
                raise ValueError(
                    "Model predict_proba output of unsupported type,") from ex
            dashboard_dataset.probability_y = probability_y

        return dashboard_dataset
    def __init__(self,
                 *,
                 sensitive_features,
                 y_true,
                 y_pred,
                 locale=None,
                 public_ip=None,
                 port=None,
                 fairness_metric_module=None,
                 fairness_metric_mapping=None):
        """Initialize the fairness dashboard."""

        metrics_module = FairnessMetricModule(
            module_name=fairness_metric_module,
            mapping=fairness_metric_mapping)

        if sensitive_features is None or y_true is None or y_pred is None:
            raise ValueError("Required parameters not provided")

        model_dict = _convert_to_string_list_dict("Model {0}", y_pred, y_true)
        sf_dict = _convert_to_string_list_dict("Sensitive Feature {0}",
                                               sensitive_features, y_true)

        # Make sure that things are as the TS layer expects
        self._y_true = _convert_to_list(y_true)
        self._y_pred = list(model_dict.values())
        # Note transpose in the following
        dataset = (np.array(list(sf_dict.values())).T).tolist()

        if np.shape(self._y_true)[0] != np.shape(self._y_pred)[1]:
            raise ValueError("Predicted y does not match true y shape")

        if np.shape(self._y_true)[0] != np.shape(dataset)[0]:
            raise ValueError("Sensitive features shape does not match true y "
                             "shape")

        fairness_input = {
            "true_y": self._y_true,
            "model_names": list(model_dict.keys()),
            "predicted_ys": self._y_pred,
            "features": list(sf_dict.keys()),
            "dataset": dataset,
            "classification_methods": metrics_module.classification_methods,
            "regression_methods": metrics_module.regression_methods,
            "probability_methods": metrics_module.probability_methods,
        }

        super(FairnessDashboard, self).__init__(dashboard_type="Fairness",
                                                model_data=fairness_input,
                                                public_ip=public_ip,
                                                port=port,
                                                locale=locale)

        self.fairness_metrics_module = metrics_module

        def metrics():
            try:
                data = request.get_json(force=True)

                if type(data["binVector"][0]) == np.int32:
                    data['binVector'] = [
                        str(bin_) for bin_ in data['binVector']
                    ]

                metric_method = self.fairness_metrics_module.\
                    _metric_methods.get(data["metricKey"]).get("function")
                metric_frame = self.fairness_metrics_module.MetricFrame(
                    metric_method,
                    self.model_data['true_y'],
                    self.model_data['predicted_ys'][data["modelIndex"]],
                    sensitive_features=data["binVector"])
                return jsonify({
                    "data": {
                        "global": metric_frame.overall,
                        "bins": list(metric_frame.by_group.to_dict().values())
                    }
                })
            except Exception as ex:
                import sys
                import traceback
                exc_type, exc_value, exc_traceback = sys.exc_info()

                return jsonify({
                    "error":
                    str(ex),
                    "stacktrace":
                    str(
                        repr(
                            traceback.format_exception(exc_type, exc_value,
                                                       exc_traceback))),
                    "locals":
                    str(locals()),
                })

        self.add_url_rule(metrics, '/metrics', methods=["POST"])
    def __init__(
            self,
            explanation,
            model,
            dataset,
            true_y,
            classes,
            features,
            categorical_features,
            true_y_dataset,
            pred_y,
            pred_y_dataset,
            model_task,
            metric,
            max_depth,
            num_leaves,
            min_child_samples,
            sample_dataset):
        """Initialize the ErrorAnalysis Dashboard Input.

        :param explanation: An object that represents an explanation.
        :type explanation: ExplanationMixin
        :param model: An object that represents a model.
            It is assumed that for the classification case
            it has a method of predict_proba() returning
            the prediction probabilities for each
            class and for the regression case a method of predict()
            returning the prediction value.
        :type model: object
        :param dataset: A matrix of feature vector examples
        (# examples x # features), the same samples
            used to build the explanation.
            Will overwrite any set on explanation object already.
        :type dataset: numpy.ndarray or list[][] or pandas.DataFrame
        :param true_y: The true labels for the provided explanation.
            Will overwrite any set on explanation object already.
        :type true_y: numpy.ndarray or list[] or pandas.Series
        :param classes: The class names.
        :type classes: numpy.ndarray or list[]
        :param features: Feature names.
        :type features: numpy.ndarray or list[]
        :param categorical_features: The categorical feature names.
        :type categorical_features: list[str]
        :param true_y_dataset: The true labels for the provided dataset.
        Only needed if the explanation has a sample of instances from the
        original dataset.  Otherwise specify true_y parameter only.
        :type true_y_dataset: numpy.ndarray or list[] or pandas.Series
        :param pred_y: The predicted y values, can be passed in as an
            alternative to the model and explanation for a more limited
            view.
        :type pred_y: numpy.ndarray or list[] or pandas.Series
        :param pred_y_dataset: The predicted labels for the provided dataset.
            Only needed if providing a sample dataset for the UI while using
            the full dataset for the tree view and heatmap. Otherwise specify
            pred_y parameter only.
        :type pred_y_dataset: numpy.ndarray or list[] or pandas.Series
        :param model_task: Optional parameter to specify whether the model
            is a classification or regression model. In most cases, the
            type of the model can be inferred based on the shape of the
            output, where a classifier has a predict_proba method and
            outputs a 2 dimensional array, while a regressor has a
            predict method and outputs a 1 dimensional array.
        :type model_task: str
        :param metric: The metric name to evaluate at each tree node or
            heatmap grid.  Currently supported classification metrics
            include 'error_rate', 'recall_score' for binary
            classification and 'micro_recall_score' or
            'macro_recall_score' for multiclass classification,
            'precision_score' for binary classification and
            'micro_precision_score' or 'macro_precision_score'
            for multiclass classification, 'f1_score' for binary
            classification and 'micro_f1_score' or 'macro_f1_score'
            for multiclass classification, and 'accuracy_score'.
            Supported regression metrics include 'mean_absolute_error',
            'mean_squared_error', 'r2_score', and 'median_absolute_error'.
        :type metric: str
        :param max_depth: The maximum depth of the surrogate tree trained
            on errors.
        :type max_depth: int
        :param num_leaves: The number of leaves of the surrogate tree
            trained on errors.
        :type num_leaves: int
        :param min_child_samples: The minimal number of data required
            to create one leaf.
        :type min_child_samples: int
        :param sample_dataset: Dataset with fewer samples than the main
            dataset. Used to improve performance only when an
            Explanation object is not provided.  Used only if
            explanation is not specified for the dataset explorer.
            Specify less than 10k points for optimal performance.
        :type sample_dataset: pd.DataFrame or numpy.ndarray or list[][]
        """
        self._model = model
        full_dataset = dataset
        if true_y_dataset is None:
            full_true_y = true_y
        else:
            full_true_y = true_y_dataset
        if pred_y_dataset is None:
            full_pred_y = pred_y
        else:
            full_pred_y = pred_y_dataset
        self._categorical_features = categorical_features
        self._string_ind_data = None
        self._categories = []
        self._categorical_indexes = []
        self._is_classifier = _is_classifier(model)
        self._dataframeColumns = None
        self.dashboard_input = {}
        has_explanation = explanation is not None
        feature_length = None
        probability_y = None

        if has_explanation:
            if classes is None:
                has_classes_attr = hasattr(explanation, 'classes')
                if has_classes_attr and explanation.classes is not None:
                    classes = explanation.classes
            dataset, true_y = self.input_explanation(explanation,
                                                     dataset,
                                                     true_y)
            row_length = len(dataset)
            # Only check dataset on explanation for row length bounds
            if row_length > 100000:
                raise ValueError(
                    "Exceeds maximum number of rows"
                    "for visualization (100000)")
        elif sample_dataset is not None:
            dataset = sample_dataset

        if classes is not None:
            classes = _convert_to_list(classes)
            self.dashboard_input[
                ExplanationDashboardInterface.CLASS_NAMES
            ] = classes
            class_to_index = {k: v for v, k in enumerate(classes)}

        if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'):
            self._dataframeColumns = dataset.columns
            self._dfdtypes = dataset.dtypes
        try:
            list_dataset = _convert_to_list(dataset)
        except Exception as ex:
            ex_str = _format_exception(ex)
            raise ValueError(
                "Unsupported dataset type, inner error: {}".format(ex_str))

        if has_explanation:
            self.input_explanation_data(list_dataset, classes)
            if features is None and hasattr(explanation, 'features'):
                features = explanation.features

        model_available = model is not None

        if model_available and pred_y is not None:
            raise ValueError(
                'Only model or pred_y can be specified, not both')

        self.dashboard_input[ENABLE_PREDICT] = model_available

        if model_available:
            predicted_y = self.compute_predicted_y(model, dataset)
        else:
            predicted_y = self.predicted_y_to_list(pred_y)

        if predicted_y is not None:
            # If classes specified, convert predicted_y to
            # numeric representation
            if classes is not None and predicted_y[0] in class_to_index:
                for i in range(len(predicted_y)):
                    predicted_y[i] = class_to_index[predicted_y[i]]
            self.dashboard_input[
                ExplanationDashboardInterface.PREDICTED_Y
            ] = predicted_y
        row_length = 0
        if list_dataset is not None:
            row_length, feature_length = np.shape(list_dataset)
            if feature_length > 1000:
                raise ValueError("Exceeds maximum number of features for"
                                 " visualization (1000). Please regenerate the"
                                 " explanation using fewer features or"
                                 " initialize the dashboard without passing a"
                                 " dataset.")
            self.dashboard_input[
                ExplanationDashboardInterface.TRAINING_DATA
            ] = serialize_json_safe(list_dataset)
            self.dashboard_input[
                ExplanationDashboardInterface.IS_CLASSIFIER
            ] = self._is_classifier

        if true_y is not None and len(true_y) == row_length:
            list_true_y = _convert_to_list(true_y)
            # If classes specified, convert true_y to numeric representation
            if classes is not None and list_true_y[0] in class_to_index:
                for i in range(len(list_true_y)):
                    list_true_y[i] = class_to_index[list_true_y[i]]
            self.dashboard_input[
                ExplanationDashboardInterface.TRUE_Y
            ] = list_true_y

        if features is not None:
            features = _convert_to_list(features)
            if feature_length is not None and len(features) != feature_length:
                raise ValueError("Feature vector length mismatch:"
                                 " feature names length differs"
                                 " from local explanations dimension")
            self.dashboard_input[FEATURE_NAMES] = features
        if model_available and _is_classifier(model) and \
                dataset is not None:
            try:
                probability_y = model.predict_proba(dataset)
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Model does not support predict_proba method"
                                 " for given dataset type,"
                                 " inner error: {}".format(ex_str))
            try:
                probability_y = _convert_to_list(probability_y)
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError(
                    "Model predict_proba output of unsupported type,"
                    "inner error: {}".format(ex_str))
            self.dashboard_input[
                ExplanationDashboardInterface.PROBABILITY_Y
            ] = probability_y
        if model_available:
            self._error_analyzer = ModelAnalyzer(model,
                                                 full_dataset,
                                                 full_true_y,
                                                 features,
                                                 categorical_features,
                                                 model_task,
                                                 metric,
                                                 classes)
        else:
            # Model task cannot be unknown when passing predictions
            # Assume classification for backwards compatibility
            if model_task == ModelTask.UNKNOWN:
                model_task = ModelTask.CLASSIFICATION
            self._error_analyzer = PredictionsAnalyzer(full_pred_y,
                                                       full_dataset,
                                                       full_true_y,
                                                       features,
                                                       categorical_features,
                                                       model_task,
                                                       metric,
                                                       classes)
        if self._categorical_features:
            self.dashboard_input[
                ExplanationDashboardInterface.CATEGORICAL_MAP
            ] = serialize_json_safe(self._error_analyzer.category_dictionary)
        # Compute metrics on all data cohort
        if self._error_analyzer.model_task == ModelTask.CLASSIFICATION:
            if self._error_analyzer.metric is None:
                metric = Metrics.ERROR_RATE
            else:
                metric = self._error_analyzer.metric
        else:
            if self._error_analyzer.metric is None:
                metric = Metrics.MEAN_SQUARED_ERROR
            else:
                metric = self._error_analyzer.metric
        if model_available:
            full_pred_y = self.compute_predicted_y(model, full_dataset)
        # If we don't have an explanation or model/probabilities specified
        # we can try to use model task to figure out the method
        if not has_explanation and probability_y is None:
            method = MethodConstants.REGRESSION
            if self._error_analyzer.model_task == ModelTask.CLASSIFICATION:
                if (len(np.unique(predicted_y)) > 2):
                    method = MethodConstants.MULTICLASS
                else:
                    method = MethodConstants.BINARY
            self.dashboard_input[
                ErrorAnalysisDashboardInterface.METHOD
            ] = method

        data = self.get_error_analysis_data(max_depth,
                                            num_leaves,
                                            min_child_samples,
                                            metric)
        self.dashboard_input[
            ExplanationDashboardInterface.ERROR_ANALYSIS_DATA
        ] = data