Example #1
0
    def __init__(
        self,
        predicted: np.ndarray,
        expected: np.ndarray = None,
        featuresize: int = None,
        split_origin: np.array = None,
        **kwargs,
    ):
        """
        Instantiates a QQ plot

        Parameters
        ----------
        predicted: nd.array
            The predicted values
        expected: np.ndarray
            Optional, the true values. If this attribute is None, the predicted array is assumed to contain the already
            standardized residuals.
        featuresize: int
            number of features
        split_origin: np.ndarray
            Optional, if the data used for the predictions includes unseen test data.
            These residuals can be marked explicitly in the plot. This attribute must have the same dimensionality
            as the predictions and expected array. Each entry in this array must be one of the strings ['train', 'test']
            to denote from which split this observation originates.
        """

        if expected is not None:
            std_res = helper.calculate_standardized_residual(
                predicted, expected=expected, featuresize=featuresize
            )
        else:
            std_res = predicted
        plot = self.__qq_plot(std_res, split_origin)
        super(QQPlotWidget, self).__init__(plot, **kwargs)
Example #2
0
    def update_values(
        self,
        predicted: np.ndarray,
        expected: np.ndarray = None,
        featuresize: int = None,
        split_origin: np.ndarray = None,
    ):
        """
        Update the QQ plot values

        Parameters
        ----------
        predicted: nd.array
            The predicted values
        expected: np.ndarray
            Optional, the true values. If this attribute is None, the predicted array is assumed to contain the already
            standardized residuals.
        featuresize: int
            number of features
        split_origin: np.ndarray
            Optional, if the data used for the predictions includes unseen test data.
            These residuals can be marked explicitly in the plot. This attribute must have the same dimensionality
            as the predictions and expected array. Each entry in this array must be one of the strings ['train', 'test']
            to denote from which split this observation originates.
        """
        plot = self.__qq_plot(
            standardized_residuals=helper.calculate_standardized_residual(
                predicted, expected, featuresize
            ),
            split_origin=split_origin,
        )
        self.update({"data": plot.data}, overwrite=True)
        self.update_layout()
Example #3
0
    def __create_resplots(
        self,
        model,
        x: np.ndarray,
        y: np.ndarray,
        x_test: np.ndarray = None,
        y_test: np.ndarray = None,
    ) -> widgets.VBox:
        logger = get_logger()

        with fit_if_not_fitted(model, x, y) as fitted_model:
            fitted = fitted_model.predict(x)
            fitted_residuals = fitted - y

            if x_test is not None and y_test is not None:
                pred = fitted_model.predict(x_test)
                prediction_residuals = pred - y_test

                predictions = np.concatenate((fitted, pred))
                residuals = np.concatenate((fitted_residuals, prediction_residuals))
                split_origin = np.concatenate(
                    (np.repeat("train", fitted.shape[0]), np.repeat("test", pred.shape[0]))
                )

                x = np.concatenate((x, x_test))
                y = np.concatenate((y, y_test))

            else:
                predictions = fitted
                residuals = fitted_residuals
                split_origin = None

        logger.info("Calculated model residuals")
        self.display.move_progress()

        tukey_anscombe_widget = TukeyAnscombeWidget(
            predictions, residuals, split_origin=split_origin
        )
        logger.info("Calculated Tunkey-Anscombe Plot")
        self.figures.append(tukey_anscombe_widget)
        self.display.move_progress()

        qq_plot_widget = QQPlotWidget(
            predictions, y, split_origin=split_origin, featuresize=x.shape[1]
        )
        logger.info("Calculated Normal QQ Plot")
        self.figures.append(qq_plot_widget)
        self.display.move_progress()

        standardized_residuals = helper.calculate_standardized_residual(
            predictions, y, None
        )
        model_norm_residuals_abs_sqrt = np.sqrt(np.abs(standardized_residuals))
        scale_location_widget = ScaleLocationWidget(
            predictions, model_norm_residuals_abs_sqrt, split_origin=split_origin
        )
        logger.info("Calculated Scale-Location Plot")
        self.figures.append(scale_location_widget)
        self.display.move_progress()

        leverage = helper.leverage_statistic(np.array(x))

        n_model_params = len(model.get_params())
        distance = helper.cooks_distance(
            standardized_residuals, leverage, n_model_params=n_model_params
        )
        cooks_distance_widget = CooksDistanceWidget(
            leverage,
            distance,
            standardized_residuals,
            n_model_params,
            split_origin=split_origin,
        )
        logger.info("Calculated Residual vs Leverage Plot inc. Cook's distance")
        self.figures.append(cooks_distance_widget)
        self.display.move_progress()

        items_layout = Layout(width="1000px")
        h0 = widgets.HBox(self.figures[:2], layout=items_layout)
        h1 = widgets.HBox(self.figures[2:], layout=items_layout)
        return widgets.VBox([h0, h1])