def setup_visualization_input(self, classes, predicted_y, list_dataset, true_y, features): if classes is not None: classes = convert_to_list(classes) self.dashboard_input[ ExplanationDashboardInterface.CLASS_NAMES] = classes class_to_index = {k: v for v, k in enumerate(classes)} if predicted_y is not None: # If classes specified, convert predicted_y to # numeric representation if classes is not None and predicted_y[0] in class_to_index: for i in range(len(predicted_y)): predicted_y[i] = class_to_index[predicted_y[i]] self.dashboard_input[ ExplanationDashboardInterface.PREDICTED_Y] = predicted_y row_length = 0 feature_length = None if list_dataset is not None: row_length, feature_length = np.shape(list_dataset) if feature_length > 1000: raise ValueError("Exceeds maximum number of features for" " visualization (1000). Please regenerate the" " explanation using fewer features or" " initialize the dashboard without passing a" " dataset.") self.dashboard_input[ExplanationDashboardInterface. TRAINING_DATA] = serialize_json_safe( list_dataset) if true_y is not None and len(true_y) == row_length: list_true_y = convert_to_list(true_y) # If classes specified, convert true_y to numeric representation if classes is not None and list_true_y[0] in class_to_index: for i in range(len(list_true_y)): list_true_y[i] = class_to_index[list_true_y[i]] self.dashboard_input[ ExplanationDashboardInterface.TRUE_Y] = list_true_y if features is not None: features = convert_to_list(features) if feature_length is not None and len(features) != feature_length: raise ValueError("Feature vector length mismatch:" " feature names length differs" " from local explanations dimension") self.dashboard_input[FEATURE_NAMES] = features
def test_index_to_list(self): input_index = pd.Index(data=[[0, 4], [1, 5], [2, 6]]) expected_list = [[0, 4], [1, 5], [2, 6]] input_as_list = convert_to_list(input_index) assert input_as_list is not None assert input_as_list == expected_list
def test_series_to_list(self): input_series = pd.Series(data=[[0, 4], [1, 5], [2, 6]]) expected_list = [[0, 4], [1, 5], [2, 6]] input_as_list = convert_to_list(input_series) assert input_as_list is not None assert input_as_list == expected_list
def test_list_to_list(self): input_list = [[0, 4], [1, 5], [2, 6]] expected_list = [[0, 4], [1, 5], [2, 6]] input_as_list = convert_to_list(input_list) assert input_as_list is not None assert input_as_list == expected_list
def test_array_to_list(self): input_array = np.array([[0, 4], [1, 5], [2, 6]]) expected_list = [[0, 4], [1, 5], [2, 6]] input_as_list = convert_to_list(input_array) assert input_as_list is not None assert input_as_list == expected_list
def predicted_y_to_list(self, predicted_y): try: predicted_y = convert_to_list(predicted_y) except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Model prediction output of unsupported type," "inner error: {}".format(ex_str)) return predicted_y
def on_predict(self, data): try: if self._dataframeColumns is not None: data = pd.DataFrame(data, columns=self._dataframeColumns) data = data.astype(dict(self._dfdtypes)) if (self._is_classifier): prediction = convert_to_list(self._model.predict_proba(data), EXP_VIZ_ERR_MSG) else: prediction = convert_to_list(self._model.predict(data), EXP_VIZ_ERR_MSG) return {WidgetRequestResponseConstants.DATA: prediction} except Exception: return { WidgetRequestResponseConstants.ERROR: "Model threw exception" " while predicting...", WidgetRequestResponseConstants.DATA: [] }
def test_pandas_dataframe_to_list(self): input_dataframe = pd.DataFrame.from_dict({ "a": [0, 1, 2], "b": [4, 5, 6] }) expected_list = [[0, 4], [1, 5], [2, 6]] input_as_list = convert_to_list(input_dataframe) assert input_as_list is not None assert input_as_list == expected_list
def test_csr_matrix_to_list(self): input_sparse_matrix = csr_matrix((3, 10000), dtype=np.int8) with pytest.raises(ValueError) as ve: convert_to_list(input_sparse_matrix) assert "Exceeds maximum number of features for " + \ "visualization (1000)" in str(ve.value) with pytest.raises(ValueError) as ve: convert_to_list(input_sparse_matrix, custom_err_msg="Error occurred") assert "Error occurred" in str(ve.value) row = np.array([0, 0, 1, 2, 2, 2]) col = np.array([0, 2, 2, 0, 1, 2]) data = np.array([1, 2, 3, 4, 5, 6]) sparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3)) expected_list = [[1, 0, 2], [0, 0, 3], [4, 5, 6]] input_as_list = convert_to_list(sparse_matrix) assert input_as_list is not None assert input_as_list == expected_list
def on_predict(self, data): try: data = pd.DataFrame( data, columns=self.dashboard_input.dataset.feature_names) if (self._is_classifier): prediction = convert_to_list( self._analysis.model.predict_proba(data), EXP_VIZ_ERR_MSG) else: prediction = convert_to_list( self._analysis.model.predict(data), EXP_VIZ_ERR_MSG) return {WidgetRequestResponseConstants.data: prediction} except Exception as e: print(e) traceback.print_exc() e_str = _format_exception(e) return { WidgetRequestResponseConstants.error: "Model threw exception" " while predicting..." "inner error: {}".format(e_str), WidgetRequestResponseConstants.data: [] }
def setup_pyspark(self, model, dataset, true_y, classes, features, categorical_features, true_y_dataset, pred_y, pred_y_dataset, model_task, metric, max_depth, num_leaves, min_child_samples, sample_dataset, model_available): self._error_analyzer = ModelAnalyzer(model, dataset, true_y, features, categorical_features, model_task, metric, classes) sample = dataset.to_spark().limit(100) scored_sample = model.transform(sample) pd_sample = scored_sample.toPandas() predicted_y = pd_sample["prediction"] predicted_y = self.predicted_y_to_list(predicted_y) true_y = pd_sample[true_y] pd_sample = pd_sample[features] list_dataset = convert_to_list(pd_sample) self.setup_visualization_input(classes, predicted_y, list_dataset, true_y, features)
def _save_metadata(self, path): """Save the metadata like target column, categorical features, task type and the classes (if any). :param path: The directory path to save the RAIInsights to. :type path: str """ top_dir = Path(path) classes = convert_to_list(self._classes) feature_metadata_dict = None if self._feature_metadata is not None: feature_metadata_dict = self._feature_metadata.to_dict() meta = { _TARGET_COLUMN: self.target_column, _TASK_TYPE: self.task_type, _CATEGORICAL_FEATURES: self.categorical_features, _CLASSES: classes, _FEATURE_COLUMNS: self._feature_columns, _FEATURE_RANGES: self._feature_ranges, _FEATURE_METADATA: feature_metadata_dict } with open(top_dir / _META_JSON, 'w') as file: json.dump(meta, file)
def _get_dataset(self): dashboard_dataset = Dataset() dashboard_dataset.task_type = self.task_type dashboard_dataset.categorical_features = self.categorical_features dashboard_dataset.class_names = convert_to_list(self._classes) if self._feature_metadata is not None: dashboard_dataset.feature_metadata = \ self._feature_metadata.to_dict() else: dashboard_dataset.feature_metadata = None dashboard_dataset.data_balance_measures = \ self._data_balance_manager.get_data() predicted_y = None feature_length = None dataset: pd.DataFrame = self.test.drop([self.target_column], axis=1) if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'): self._dataframeColumns = dataset.columns try: list_dataset = convert_to_list(dataset) except Exception as ex: raise ValueError("Unsupported dataset type") from ex if dataset is not None and self.model is not None: try: predicted_y = self.model.predict(dataset) except Exception as ex: msg = "Model does not support predict method for given" "dataset type" raise ValueError(msg) from ex try: predicted_y = convert_to_list(predicted_y) except Exception as ex: raise ValueError( "Model prediction output of unsupported type,") from ex if predicted_y is not None: if (self.task_type == "classification" and dashboard_dataset.class_names is not None): predicted_y = [ dashboard_dataset.class_names.index(y) for y in predicted_y ] dashboard_dataset.predicted_y = predicted_y row_length = 0 if list_dataset is not None: row_length, feature_length = np.shape(list_dataset) if row_length > 100000: raise ValueError("Exceeds maximum number of rows" "for visualization (100000)") if feature_length > 1000: raise ValueError("Exceeds maximum number of features for" " visualization (1000). Please regenerate the" " explanation using fewer features or" " initialize the dashboard without passing a" " dataset.") dashboard_dataset.features = list_dataset true_y = self.test[self.target_column] if true_y is not None and len(true_y) == row_length: if (self.task_type == "classification" and dashboard_dataset.class_names is not None): true_y = [ dashboard_dataset.class_names.index(y) for y in true_y ] dashboard_dataset.true_y = convert_to_list(true_y) features = dataset.columns if features is not None: features = convert_to_list(features) if feature_length is not None and len(features) != feature_length: raise ValueError("Feature vector length mismatch:" " feature names length differs" " from local explanations dimension") dashboard_dataset.feature_names = features dashboard_dataset.target_column = self.target_column if is_classifier(self.model) and dataset is not None: try: probability_y = self.model.predict_proba(dataset) except Exception as ex: raise ValueError("Model does not support predict_proba method" " for given dataset type,") from ex try: probability_y = convert_to_list(probability_y) except Exception as ex: raise ValueError( "Model predict_proba output of unsupported type,") from ex dashboard_dataset.probability_y = probability_y return dashboard_dataset
def __init__(self, *, sensitive_features, y_true, y_pred, locale=None, public_ip=None, port=None, fairness_metric_module=None, fairness_metric_mapping=None): """Initialize the fairness dashboard.""" metrics_module = FairnessMetricModule( module_name=fairness_metric_module, mapping=fairness_metric_mapping) if sensitive_features is None or y_true is None or y_pred is None: raise ValueError("Required parameters not provided") model_dict = convert_to_string_list_dict("Model {0}", y_pred, y_true) sf_dict = convert_to_string_list_dict("Sensitive Feature {0}", sensitive_features, y_true) # Make sure that things are as the TS layer expects self._y_true = convert_to_list(y_true) self._y_pred = list(model_dict.values()) # Note transpose in the following dataset = (np.array(list(sf_dict.values())).T).tolist() if np.shape(self._y_true)[0] != np.shape(self._y_pred)[1]: raise ValueError("Predicted y does not match true y shape") if np.shape(self._y_true)[0] != np.shape(dataset)[0]: raise ValueError("Sensitive features shape does not match true y " "shape") fairness_input = { "true_y": self._y_true, "model_names": list(model_dict.keys()), "predicted_ys": self._y_pred, "features": list(sf_dict.keys()), "dataset": dataset, "classification_methods": metrics_module.classification_methods, "regression_methods": metrics_module.regression_methods, "probability_methods": metrics_module.probability_methods, } super(FairnessDashboard, self).__init__(dashboard_type="Fairness", model_data=fairness_input, public_ip=public_ip, port=port, locale=locale) self.fairness_metrics_module = metrics_module def metrics(): """ Note: This function always calculates the error_function, if available, so that the value is cached in the MetricsCache Request attributes: binVector: the sensitive features binning vector metricKey: the metricKey that corresponds to the function that will be calculated modelIndex: the model index used to index the predicted y's by that model """ try: data = request.get_json(force=True) if type(data["binVector"][0]) == np.int32: data['binVector'] = [ str(bin_) for bin_ in data['binVector'] ] metric_name = data['metricKey'] error_function_name = f"{metric_name} bounds" metric_function = \ self.fairness_metrics_module._metric_methods.get( data["metricKey"]).get("function") metric_method = {metric_name: metric_function} error_function = \ self.fairness_metrics_module._metric_methods.get( data["metricKey"]).get("error_function") if error_function is not None: metric_method.update({error_function_name: error_function}) metric_frame = self.fairness_metrics_module.MetricFrame( metrics=metric_method, y_true=self.model_data['true_y'], y_pred=self.model_data['predicted_ys'][data["modelIndex"]], sensitive_features=data["binVector"]) result = { "data": { "global": metric_frame.overall[metric_name], "bins": list([ entry for entry in list( metric_frame.by_group.to_dict().values()) if not isinstance(entry[0], tuple) ][0].values()), } } if error_function_name in metric_method: result["data"].update({ "bounds": { "lower": metric_frame.overall[error_function_name][0], "upper": metric_frame.overall[error_function_name][1], }, # [(x1, y1), (x2, y2), (x3, y3)...] "binBounds": [{ "lower": bounds[0], "upper": bounds[1] } for bounds in list( metric_frame.by_group[error_function_name]\ .to_dict().values())] }) return jsonify(result) except Exception as ex: import sys import traceback exc_type, exc_value, exc_traceback = sys.exc_info() return jsonify({ "error": str(ex), "stacktrace": str( repr( traceback.format_exception(exc_type, exc_value, exc_traceback))), "locals": str(locals()), }) self.add_url_rule(metrics, '/metrics', methods=["POST"])
def _get_interpret(self, explanation, evaluation_examples=None): interpretation = ModelExplanationData() # List of explanations, key of explanation type is "explanation_type" if explanation is not None: mli_explanations = explanation.data(-1)["mli"] else: mli_explanations = None local_explanation = self._find_first_explanation( ExplanationKeys.LOCAL_EXPLANATION_KEY, mli_explanations) global_explanation = self._find_first_explanation( ExplanationKeys.GLOBAL_EXPLANATION_KEY, mli_explanations) ebm_explanation = self._find_first_explanation( ExplanationKeys.EBM_GLOBAL_EXPLANATION_KEY, mli_explanations) if explanation is not None and hasattr(explanation, 'method'): interpretation.method = explanation.method local_dim = None if local_explanation is not None or global_explanation is not None\ or ebm_explanation is not None: interpretation.precomputedExplanations = PrecomputedExplanations() if local_explanation is not None: try: local_feature_importance = FeatureImportance() local_feature_importance.scores = convert_to_list( local_explanation["scores"]) if np.shape(local_feature_importance.scores)[-1] > 1000: raise ValueError("Exceeds maximum number of features for " "visualization (1000). Please regenerate" " the explanation using fewer features.") local_feature_importance.intercept = convert_to_list( local_explanation["intercept"]) # We can ignore perf explanation data. # Note if it is added back at any point, # the numpy values will need to be converted to python, # otherwise serialization fails. local_explanation["perf"] = None interpretation.precomputedExplanations.localFeatureImportance\ = local_feature_importance except Exception as ex: raise ValueError("Unsupported local explanation type") from ex if evaluation_examples is not None: _feature_length = evaluation_examples.shape[1] _row_length = evaluation_examples.shape[0] local_dim = np.shape(local_feature_importance.scores) if len(local_dim) != 2 and len(local_dim) != 3: raise ValueError( "Local explanation expected to be a 2D or 3D list") if (len(local_dim) == 2 and (local_dim[1] != _feature_length or local_dim[0] != _row_length)): raise ValueError("Shape mismatch: local explanation" "length differs from dataset") if (len(local_dim) == 3 and (local_dim[2] != _feature_length or local_dim[1] != _row_length)): raise ValueError("Shape mismatch: local explanation" " length differs from dataset") if global_explanation is not None: try: global_feature_importance = FeatureImportance() global_feature_importance.scores = convert_to_list( global_explanation["scores"]) if 'intercept' in global_explanation: global_feature_importance.intercept\ = convert_to_list( global_explanation["intercept"]) interpretation.precomputedExplanations.globalFeatureImportance\ = global_explanation except Exception as ex: raise ValueError("Unsupported global explanation type") from ex if ebm_explanation is not None: try: ebm_feature_importance = EBMGlobalExplanation() ebm_feature_importance.feature_list\ = ebm_explanation["feature_list"] interpretation.precomputedExplanations.ebmGlobalExplanation\ = ebm_feature_importance except Exception as ex: raise ValueError("Unsupported ebm explanation type") from ex return interpretation
def __init__(self, explanation, model, dataset, true_y, classes, features): """Initialize the Explanation Dashboard Input. :param explanation: An object that represents an explanation. :type explanation: ExplanationMixin :param model: An object that represents a model. It is assumed that for the classification case it has a method of predict_proba() returning the prediction probabilities for each class and for the regression case a method of predict() returning the prediction value. :type model: object :param dataset: A matrix of feature vector examples (# examples x # features), the same samples used to build the explanation. Will overwrite any set on explanation object already. Must have fewer than 100000 rows and fewer than 1000 columns. Note dashboard may become slow or crash for more than 10000 rows. :type dataset: numpy.ndarray or list[][] :param true_y: The true labels for the provided dataset. Will overwrite any set on explanation object already. :type true_y: numpy.ndarray or list[] :param classes: The class names. :type classes: numpy.ndarray or list[] :param features: Feature names. :type features: numpy.ndarray or list[] """ self._model = model self._is_classifier = is_classifier(model) self._dataframeColumns = None self.dashboard_input = {} # List of explanations, key of explanation type is "explanation_type" if explanation is not None: self._mli_explanations = explanation.data(-1)["mli"] else: self._mli_explanations = None local_explanation = self._find_first_explanation( ExplanationDashboardInterface.MLI_LOCAL_EXPLANATION_KEY) global_explanation = self._find_first_explanation( ExplanationDashboardInterface.MLI_GLOBAL_EXPLANATION_KEY) ebm_explanation = self._find_first_explanation( ExplanationDashboardInterface.MLI_EBM_GLOBAL_EXPLANATION_KEY) dataset_explanation = self._find_first_explanation( ExplanationDashboardInterface.MLI_EXPLANATION_DATASET_KEY) if explanation is not None and hasattr(explanation, 'method'): self.dashboard_input[ExplanationDashboardInterface. EXPLANATION_METHOD] = explanation.method predicted_y = None feature_length = None if dataset_explanation is not None: if dataset is None: dataset = dataset_explanation[ ExplanationDashboardInterface.MLI_DATASET_X_KEY] if true_y is None: true_y = dataset_explanation[ ExplanationDashboardInterface.MLI_DATASET_Y_KEY] if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'): self._dataframeColumns = dataset.columns self._dfdtypes = dataset.dtypes try: list_dataset = convert_to_list(dataset, EXP_VIZ_ERR_MSG) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( "Unsupported dataset type, inner error: {}".format(ex_str)) if dataset is not None and model is not None: try: predicted_y = model.predict(dataset) except Exception as ex: ex_str = _format_exception(ex) msg = "Model does not support predict method for given" "dataset type, inner error: {}".format(ex_str) raise ValueError(msg) try: predicted_y = convert_to_list(predicted_y, EXP_VIZ_ERR_MSG) except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Model prediction output of unsupported type," "inner error: {}".format(ex_str)) if predicted_y is not None: self.dashboard_input[ ExplanationDashboardInterface.PREDICTED_Y] = predicted_y row_length = 0 if list_dataset is not None: row_length, feature_length = np.shape(list_dataset) if row_length > 100000: raise ValueError("Exceeds maximum number of rows" "for visualization (100000)") if feature_length > 1000: warnings.warn("Exceeds maximum number of features for" " visualization (1000)." " Please regenerate the" " explanation using fewer features or" " initialize the dashboard without" " passing a dataset. Dashboard will" " show limited view.") else: self.dashboard_input[ExplanationDashboardInterface. TRAINING_DATA] = serialize_json_safe( list_dataset) self.dashboard_input[ExplanationDashboardInterface. IS_CLASSIFIER] = self._is_classifier local_dim = None if true_y is not None and len(true_y) == row_length: self.dashboard_input[ ExplanationDashboardInterface.TRUE_Y] = convert_to_list( true_y, EXP_VIZ_ERR_MSG) if local_explanation is not None: try: local_explanation["scores"] = convert_to_list( local_explanation["scores"], EXP_VIZ_ERR_MSG) local_explanation["intercept"] = convert_to_list( local_explanation["intercept"], EXP_VIZ_ERR_MSG) # We can ignore perf explanation data. # Note if it is added back at any point, # the numpy values will need to be converted to python, # otherwise serialization fails. local_explanation["perf"] = None self.dashboard_input[ExplanationDashboardInterface. LOCAL_EXPLANATIONS] = local_explanation except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Unsupported local explanation type," "inner error: {}".format(ex_str)) if list_dataset is not None: local_dim = np.shape(local_explanation["scores"]) if len(local_dim) != 2 and len(local_dim) != 3: raise ValueError( "Local explanation expected to be a 2D or 3D list") if len(local_dim) == 2 and (local_dim[1] != feature_length or local_dim[0] != row_length): raise ValueError("Shape mismatch: local explanation" "length differs from dataset") if len(local_dim) == 3 and (local_dim[2] != feature_length or local_dim[1] != row_length): raise ValueError("Shape mismatch: local explanation" " length differs from dataset") if local_explanation is None and global_explanation is not None: try: global_explanation["scores"] = convert_to_list( global_explanation["scores"], EXP_VIZ_ERR_MSG) if 'intercept' in global_explanation: global_explanation["intercept"] = convert_to_list( global_explanation["intercept"], EXP_VIZ_ERR_MSG) self.dashboard_input[ExplanationDashboardInterface. GLOBAL_EXPLANATION] = global_explanation except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Unsupported global explanation type," "inner error: {}".format(ex_str)) if ebm_explanation is not None: try: self.dashboard_input[ExplanationDashboardInterface. EBM_EXPLANATION] = ebm_explanation except Exception as ex: ex_str = _format_exception(ex) raise ValueError( "Unsupported ebm explanation type: {}".format(ex_str)) if features is None\ and explanation is not None\ and hasattr(explanation, 'features')\ and explanation.features is not None: features = explanation.features if features is not None: features = convert_to_list(features, EXP_VIZ_ERR_MSG) if feature_length is not None and len(features) != feature_length: raise ValueError("Feature vector length mismatch:" " feature names length differs" " from local explanations dimension") self.dashboard_input[ ExplanationDashboardInterface.FEATURE_NAMES] = features if classes is None\ and explanation is not None\ and hasattr(explanation, 'classes')\ and explanation.classes is not None: classes = explanation.classes if classes is not None: classes = convert_to_list(classes, EXP_VIZ_ERR_MSG) if local_dim is not None and len(classes) != local_dim[0]: raise ValueError("Class vector length mismatch:" "class names length differs from" "local explanations dimension") self.dashboard_input[ ExplanationDashboardInterface.CLASS_NAMES] = classes if is_classifier(model) and dataset is not None: try: probability_y = model.predict_proba(dataset) except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Model does not support predict_proba method" " for given dataset type," " inner error: {}".format(ex_str)) try: probability_y = convert_to_list(probability_y, EXP_VIZ_ERR_MSG) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( "Model predict_proba output of unsupported type," "inner error: {}".format(ex_str)) self.dashboard_input[ ExplanationDashboardInterface.PROBABILITY_Y] = probability_y
def input_explanation_data(self, list_dataset, classes): # List of explanations, key of explanation type is "explanation_type" local_explanation = self._find_first_explanation( ExplanationDashboardInterface.MLI_LOCAL_EXPLANATION_KEY) global_explanation = self._find_first_explanation( ExplanationDashboardInterface.MLI_GLOBAL_EXPLANATION_KEY) ebm_explanation = self._find_first_explanation( ExplanationDashboardInterface.MLI_EBM_GLOBAL_EXPLANATION_KEY) if local_explanation is not None: try: local_explanation["scores"] = convert_to_list( local_explanation["scores"]) if np.shape(local_explanation["scores"])[-1] > 1000: raise ValueError("Exceeds maximum number of features for " "visualization (1000). Please regenerate" " the explanation using fewer features.") local_explanation["intercept"] = convert_to_list( local_explanation["intercept"]) # We can ignore perf explanation data. # Note if it is added back at any point, # the numpy values will need to be converted to python, # otherwise serialization fails. local_explanation["perf"] = None self.dashboard_input[ExplanationDashboardInterface. LOCAL_EXPLANATIONS] = local_explanation except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Unsupported local explanation type," "inner error: {}".format(ex_str)) if list_dataset is not None: row_length, feature_length = np.shape(list_dataset) local_dim = np.shape(local_explanation["scores"]) if len(local_dim) != 2 and len(local_dim) != 3: raise ValueError( "Local explanation expected to be a 2D or 3D list") if len(local_dim) == 2 and (local_dim[1] != feature_length or local_dim[0] != row_length): raise ValueError("Shape mismatch: local explanation" "length differs from dataset") if len(local_dim) == 3 and (local_dim[2] != feature_length or local_dim[1] != row_length): raise ValueError("Shape mismatch: local explanation" " length differs from dataset") if classes is not None and len(classes) != local_dim[0]: raise ValueError("Class vector length mismatch:" "class names length differs from" "local explanations dimension") if local_explanation is None and global_explanation is not None: try: global_explanation["scores"] = convert_to_list( global_explanation["scores"]) if 'intercept' in global_explanation: global_explanation["intercept"] = convert_to_list( global_explanation["intercept"]) self.dashboard_input[ExplanationDashboardInterface. GLOBAL_EXPLANATION] = global_explanation except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Unsupported global explanation type," "inner error: {}".format(ex_str)) if ebm_explanation is not None: try: self.dashboard_input[ExplanationDashboardInterface. EBM_EXPLANATION] = ebm_explanation except Exception as ex: ex_str = _format_exception(ex) raise ValueError( "Unsupported ebm explanation type: {}".format(ex_str))
def setup_local(self, explanation, model, dataset, true_y, classes, features, categorical_features, true_y_dataset, pred_y, pred_y_dataset, model_task, metric, max_depth, num_leaves, min_child_samples, sample_dataset, model_available): full_dataset = dataset if true_y_dataset is None: full_true_y = true_y else: full_true_y = true_y_dataset if pred_y_dataset is None: full_pred_y = pred_y else: full_pred_y = pred_y_dataset has_explanation = explanation is not None probability_y = None if has_explanation: if classes is None: has_classes_attr = hasattr(explanation, 'classes') if has_classes_attr and explanation.classes is not None: classes = explanation.classes dataset, true_y = self.input_explanation(explanation, dataset, true_y) row_length = len(dataset) # Only check dataset on explanation for row length bounds if row_length > 100000: raise ValueError("Exceeds maximum number of rows" "for visualization (100000)") elif sample_dataset is not None: dataset = sample_dataset if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'): self._dataframeColumns = dataset.columns self._dfdtypes = dataset.dtypes try: list_dataset = convert_to_list(dataset) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( "Unsupported dataset type, inner error: {}".format(ex_str)) if has_explanation: self.input_explanation_data(list_dataset, classes) if features is None and hasattr(explanation, 'features'): features = explanation.features if model_available: predicted_y = self.compute_predicted_y(model, dataset) else: predicted_y = self.predicted_y_to_list(pred_y) self.setup_visualization_input(classes, predicted_y, list_dataset, true_y, features) if model_available and is_classifier(model) and \ dataset is not None: try: probability_y = model.predict_proba(dataset) except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Model does not support predict_proba method" " for given dataset type," " inner error: {}".format(ex_str)) try: probability_y = convert_to_list(probability_y) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( "Model predict_proba output of unsupported type," "inner error: {}".format(ex_str)) self.dashboard_input[ ExplanationDashboardInterface.PROBABILITY_Y] = probability_y if model_available: self._error_analyzer = ModelAnalyzer(model, full_dataset, full_true_y, features, categorical_features, model_task, metric, classes) else: # Model task cannot be unknown when passing predictions # Assume classification for backwards compatibility if model_task == ModelTask.UNKNOWN: model_task = ModelTask.CLASSIFICATION self._error_analyzer = PredictionsAnalyzer( full_pred_y, full_dataset, full_true_y, features, categorical_features, model_task, metric, classes) if self._categorical_features: self.dashboard_input[ExplanationDashboardInterface. CATEGORICAL_MAP] = serialize_json_safe( self._error_analyzer.category_dictionary) # Compute metrics on all data cohort if self._error_analyzer.model_task == ModelTask.CLASSIFICATION: if self._error_analyzer.metric is None: metric = Metrics.ERROR_RATE else: metric = self._error_analyzer.metric else: if self._error_analyzer.metric is None: metric = Metrics.MEAN_SQUARED_ERROR else: metric = self._error_analyzer.metric if model_available: full_pred_y = self.compute_predicted_y(model, full_dataset) # If we don't have an explanation or model/probabilities specified # we can try to use model task to figure out the method if not has_explanation and probability_y is None: method = MethodConstants.REGRESSION if self._error_analyzer.model_task == ModelTask.CLASSIFICATION: if (len(np.unique(predicted_y)) > 2): method = MethodConstants.MULTICLASS else: method = MethodConstants.BINARY self.dashboard_input[ ErrorAnalysisDashboardInterface.METHOD] = method