def test_initialise2(): with catch_warnings(record=True) as w: print("Testing initialisation2") X, Y = data_create(20, 5) zero = np.zeros(5) alpha, beta = initialise_candidates2(X, Y, 0.1) assert beta > 0 assert loss_smooth(alpha, X, Y, 0.1, beta=beta) <= loss_smooth( zero, X, Y, 0.1, beta=beta) X, Y = data_create(20, 12) zero = np.zeros(12) alpha, beta = initialise_candidates2(X, Y, 0.1) assert beta > 0 assert loss_smooth(alpha, X, Y, 0.1, beta=beta) <= loss_smooth( zero, X, Y, 0.1, beta=beta) X, Y = data_create(20, 11) X = add_intercept_column(X) zero = np.zeros(12) alpha, beta = initialise_candidates2(X, Y, 0.1) assert beta > 0 assert loss_smooth(alpha, X, Y, 0.1, beta=beta) <= loss_smooth( zero, X, Y, 0.1, beta=beta) X, Y = data_create(20, 8) w = np.random.uniform(size=20) zero = np.zeros(8) alpha, beta = initialise_candidates2(X, Y, 0.1, w) assert beta > 0 assert loss_smooth(alpha, X, Y, 0.1, beta=beta, weight=w) <= loss_smooth( zero, X, Y, 0.1, beta=beta, weight=w)
def get_impact(self, normalised: bool = False, x: Union[None, np.ndarray] = None) -> np.ndarray: """Get the "impact" of different variables on the outcome. The impact is the (normalised) model times the (normalised) item. Args: normalised (bool, optional): Return the normalised impact (if normalisation is used). Defaults to False. x (Union[None, np.ndarray], optional): The item to calculate the impact for (uses the explained item if None). Defaults to None. Returns: np.ndarray: The impact vector. """ if x is None: x = self._x if normalised and self._normalise: x = add_constant_columns(self._scale.scale_x(x), self._scale.columns, False) return add_intercept_column(x) * self.coefficients else: return add_intercept_column(x) * self.coefficients
def test_scaling(): print("Testing scaling") for i in (4, 6, 8): X, Y = data_create(i * 30, i, 100000) X2, center, scale = normalise_robust(X) assert np.allclose(scale_same(X, center, scale), X2) assert np.allclose(X2[0, ], scale_same(X[0, ], center, scale)) X3 = add_intercept_column(X2) assert np.allclose(X2, remove_intercept_column(X3)) X4, mask = remove_constant_columns(X3) assert np.allclose(X2, X4) assert np.allclose(mask, np.array([False] + [True] * i)) assert np.allclose(X3[:, 1:], add_constant_columns(X2, mask)[:, 1:]) Y2, center2, scale2 = normalise_robust(Y) assert np.allclose(scale_same(Y, center2, scale2), Y2) assert np.allclose(scale_same(Y[0], center2, scale2), Y2[0])
def fit( self, X: np.ndarray, Y: np.ndarray, weight: Optional[np.ndarray] = None, init: Union[None, np.ndarray, Tuple[np.ndarray, float]] = None, ) -> SliseRegression: """Robustly fit a linear regression to a dataset Args: X (np.ndarray): Data matrix. Y (np.ndarray): Response vector. weight (Optional[np.ndarray], optional): Weight vector for the data items. Defaults to None. init (Union[None, np.ndarray, Tuple[np.ndarray, float]], optional): Use this alpha (and beta) value instead of the initialisation function. Defaults to None. Returns: SliseRegression: `self` (containing the regression result). """ X = np.array(X) Y = np.array(Y) if len(X.shape) == 1: X.shape = X.shape + (1, ) assert X.shape[0] == Y.shape[ 0], "X and Y must have the same number of items!" self._X = X self._Y = Y if weight is None: self._weight = None else: self._weight = np.array(weight) assert len(self._weight) == len( self._Y), "Y and weight must have the same number of items!" assert np.all(self._weight >= 0.0), "Weights must not be negative!" # Preprocessing if self._normalise: X, x_cols = remove_constant_columns(X) if self._X.shape[1] == X.shape[1]: x_cols = None X, x_center, x_scale = normalise_robust(X) Y, y_center, y_scale = normalise_robust(Y) self._scale = DataScaling(x_center, x_scale, y_center, y_scale, x_cols) if self._intercept: X = add_intercept_column(X) # Initialisation threads = set_threads(self.num_threads) if init is None: alpha, beta = self.init_fn(X, Y, self.epsilon, self._weight) else: alpha, beta = initialise_fixed(init, X, Y, self.epsilon, self._weight) # Optimisation alpha = graduated_optimisation( alpha=alpha, X=X, Y=Y, epsilon=self.epsilon, beta=beta, lambda1=self.lambda1, lambda2=self.lambda2, weight=self._weight, beta_max=self.beta_max, max_approx=self.max_approx, max_iterations=self.max_iterations, debug=self.debug, ) set_threads(threads) self._alpha = alpha if self._normalise: alpha2 = self._scale.unscale_model(alpha) if not self._intercept: if np.abs(alpha2[0]) > 1e-8: warn( "Intercept introduced due to scaling, consider setting intercept=True (or normalise=False)", SliseWarning, ) self._intercept = True self._alpha = np.concatenate(([0], alpha)) else: alpha2 = alpha2[1:] self._coefficients = alpha2 else: self._coefficients = alpha return self