Exemple #1
0
def test_initialise2():
    with catch_warnings(record=True) as w:
        print("Testing initialisation2")
        X, Y = data_create(20, 5)
        zero = np.zeros(5)
        alpha, beta = initialise_candidates2(X, Y, 0.1)
        assert beta > 0
        assert loss_smooth(alpha, X, Y, 0.1, beta=beta) <= loss_smooth(
            zero, X, Y, 0.1, beta=beta)
        X, Y = data_create(20, 12)
        zero = np.zeros(12)
        alpha, beta = initialise_candidates2(X, Y, 0.1)
        assert beta > 0
        assert loss_smooth(alpha, X, Y, 0.1, beta=beta) <= loss_smooth(
            zero, X, Y, 0.1, beta=beta)
        X, Y = data_create(20, 11)
        X = add_intercept_column(X)
        zero = np.zeros(12)
        alpha, beta = initialise_candidates2(X, Y, 0.1)
        assert beta > 0
        assert loss_smooth(alpha, X, Y, 0.1, beta=beta) <= loss_smooth(
            zero, X, Y, 0.1, beta=beta)
        X, Y = data_create(20, 8)
        w = np.random.uniform(size=20)
        zero = np.zeros(8)
        alpha, beta = initialise_candidates2(X, Y, 0.1, w)
        assert beta > 0
        assert loss_smooth(alpha, X, Y, 0.1, beta=beta,
                           weight=w) <= loss_smooth(
                               zero, X, Y, 0.1, beta=beta, weight=w)
Exemple #2
0
    def get_impact(self,
                   normalised: bool = False,
                   x: Union[None, np.ndarray] = None) -> np.ndarray:
        """Get the "impact" of different variables on the outcome.
            The impact is the (normalised) model times the (normalised) item.

        Args:
            normalised (bool, optional): Return the normalised impact (if normalisation is used). Defaults to False.
            x (Union[None, np.ndarray], optional): The item to calculate the impact for (uses the explained item if None). Defaults to None.

        Returns:
            np.ndarray: The impact vector.
        """
        if x is None:
            x = self._x
        if normalised and self._normalise:
            x = add_constant_columns(self._scale.scale_x(x),
                                     self._scale.columns, False)
            return add_intercept_column(x) * self.coefficients
        else:
            return add_intercept_column(x) * self.coefficients
Exemple #3
0
def test_scaling():
    print("Testing scaling")
    for i in (4, 6, 8):
        X, Y = data_create(i * 30, i, 100000)
        X2, center, scale = normalise_robust(X)
        assert np.allclose(scale_same(X, center, scale), X2)
        assert np.allclose(X2[0, ], scale_same(X[0, ], center, scale))
        X3 = add_intercept_column(X2)
        assert np.allclose(X2, remove_intercept_column(X3))
        X4, mask = remove_constant_columns(X3)
        assert np.allclose(X2, X4)
        assert np.allclose(mask, np.array([False] + [True] * i))
        assert np.allclose(X3[:, 1:], add_constant_columns(X2, mask)[:, 1:])
        Y2, center2, scale2 = normalise_robust(Y)
        assert np.allclose(scale_same(Y, center2, scale2), Y2)
        assert np.allclose(scale_same(Y[0], center2, scale2), Y2[0])
Exemple #4
0
    def fit(
        self,
        X: np.ndarray,
        Y: np.ndarray,
        weight: Optional[np.ndarray] = None,
        init: Union[None, np.ndarray, Tuple[np.ndarray, float]] = None,
    ) -> SliseRegression:
        """Robustly fit a linear regression to a dataset

        Args:
            X (np.ndarray): Data matrix.
            Y (np.ndarray): Response vector.
            weight (Optional[np.ndarray], optional): Weight vector for the data items. Defaults to None.
            init (Union[None, np.ndarray, Tuple[np.ndarray, float]], optional): Use this alpha (and beta) value instead of the initialisation function. Defaults to None.

        Returns:
            SliseRegression: `self` (containing the regression result).
        """
        X = np.array(X)
        Y = np.array(Y)
        if len(X.shape) == 1:
            X.shape = X.shape + (1, )
        assert X.shape[0] == Y.shape[
            0], "X and Y must have the same number of items!"
        self._X = X
        self._Y = Y
        if weight is None:
            self._weight = None
        else:
            self._weight = np.array(weight)
            assert len(self._weight) == len(
                self._Y), "Y and weight must have the same number of items!"
            assert np.all(self._weight >= 0.0), "Weights must not be negative!"
        # Preprocessing
        if self._normalise:
            X, x_cols = remove_constant_columns(X)
            if self._X.shape[1] == X.shape[1]:
                x_cols = None
            X, x_center, x_scale = normalise_robust(X)
            Y, y_center, y_scale = normalise_robust(Y)
            self._scale = DataScaling(x_center, x_scale, y_center, y_scale,
                                      x_cols)
        if self._intercept:
            X = add_intercept_column(X)
        # Initialisation
        threads = set_threads(self.num_threads)
        if init is None:
            alpha, beta = self.init_fn(X, Y, self.epsilon, self._weight)
        else:
            alpha, beta = initialise_fixed(init, X, Y, self.epsilon,
                                           self._weight)
        # Optimisation
        alpha = graduated_optimisation(
            alpha=alpha,
            X=X,
            Y=Y,
            epsilon=self.epsilon,
            beta=beta,
            lambda1=self.lambda1,
            lambda2=self.lambda2,
            weight=self._weight,
            beta_max=self.beta_max,
            max_approx=self.max_approx,
            max_iterations=self.max_iterations,
            debug=self.debug,
        )
        set_threads(threads)
        self._alpha = alpha
        if self._normalise:
            alpha2 = self._scale.unscale_model(alpha)
            if not self._intercept:
                if np.abs(alpha2[0]) > 1e-8:
                    warn(
                        "Intercept introduced due to scaling, consider setting intercept=True (or normalise=False)",
                        SliseWarning,
                    )
                    self._intercept = True
                    self._alpha = np.concatenate(([0], alpha))
                else:
                    alpha2 = alpha2[1:]
            self._coefficients = alpha2
        else:
            self._coefficients = alpha
        return self