コード例 #1
0
    def predict(self, X: Union[np.ndarray, None] = None) -> np.ndarray:
        """Use the fitted model to predict new responses.

        Args:
            X (Union[np.ndarray, None], optional): Data matrix to predict, or None for using the fitted dataset. Defaults to None.

        Returns:
            np.ndarray: Predicted response.
        """
        if X is None:
            return mat_mul_inter(self._X, self.coefficients)
        else:
            return mat_mul_inter(X, self.coefficients)
コード例 #2
0
def loss_sharp(
    alpha: np.ndarray,
    X: np.ndarray,
    Y: np.ndarray,
    epsilon: float,
    lambda1: float = 0,
    lambda2: float = 0,
    weight: Optional[np.ndarray] = None,
) -> float:
    """
    Exact version of the loss.
    """
    epsilon *= epsilon
    distances = (Y - mat_mul_inter(X, alpha))**2
    if weight is None:
        loss = np.sum(distances[distances <= epsilon] -
                      (epsilon * len(Y))) / len(Y)
    else:
        sumw = np.sum(weight)
        mask = distances <= epsilon
        loss = np.sum(
            (distances[mask] - (epsilon * sumw)) * weight[mask]) / sumw
    if lambda1 > 0:
        loss += lambda1 * np.sum(np.abs(alpha))
    if lambda2 > 0:
        loss += lambda2 * np.sum(alpha * alpha)
    return loss
コード例 #3
0
    def predict(self, X: Union[np.ndarray, None] = None) -> np.ndarray:
        """Use the approximating linear model to predict new outcomes.

        Args:
            X (Union[np.ndarray, None], optional): Sata matrix to predict, or None for using the fitted dataset. Defaults to None.

        Returns:
            np.ndarray: Prediction vector.
        """
        if X is None:
            Y = mat_mul_inter(self._X, self.coefficients)
        else:
            Y = mat_mul_inter(X, self.coefficients)
        if self._logit:
            Y = sigmoid(Y)
        return Y
コード例 #4
0
ファイル: test_data.py プロジェクト: edahelsinki/pyslise
def test_model_scaling():
    print("Testing model scaling")
    for i in (4, 6, 8):
        X, Y, model2 = data_create2(i * 30, i)
        X2, x_center, x_scale = normalise_robust(X)
        Y2, y_center, y_scale = normalise_robust(Y)
        model2 = np.random.normal(size=i)
        model = unscale_model(model2, x_center, x_scale, y_center, y_scale)
        Z1 = mat_mul_inter(X, model)
        Z2 = mat_mul_inter(X2, model2)
        Z3 = scale_same(Z1, y_center, y_scale)
        assert np.allclose(Z2, Z3), f"Max Diff {np.max(np.abs(Z2 - Z3))}"
        model2 = np.random.normal(size=i + 1)
        model = unscale_model(model2, x_center, x_scale, y_center, y_scale)
        Z1 = mat_mul_inter(X, model)
        Z2 = mat_mul_inter(X2, model2)
        Z3 = scale_same(Z1, y_center, y_scale)
        assert np.allclose(Z2, Z3), f"Max Diff {np.max(np.abs(Z2 - Z3))}"
コード例 #5
0
    def subset(self,
               X: Union[np.ndarray, None] = None,
               Y: Union[np.ndarray, None] = None) -> np.ndarray:
        """Get the subset (of non-outliers) used for the robust regression model.

        Args:
            X (Union[np.ndarray, None], optional): Data matrix, or None for using the fitted dataset. Defaults to None.
            Y (Union[np.ndarray, None], optional): Response vector, or None for using the fitted dataset. Defaults to None.

        Returns:
            np.ndarray: The selected subset as a boolean mask.
        """
        if X is None or Y is None:
            X = self._X
            Y = self._Y
        Y2 = mat_mul_inter(X, self.coefficients)
        return (Y2 - Y)**2 < self.scaled_epsilon**2
コード例 #6
0
    def subset(self,
               X: Union[np.ndarray, None] = None,
               Y: Union[np.ndarray, None] = None) -> np.ndarray:
        """Get the subset / neighbourhood used for the approximation (explanation).

        Args:
            X (Union[np.ndarray, None], optional): Data matrix, or None for using the fitted dataset. Defaults to None.
            Y (Union[np.ndarray, None], optional): Response vector, or None for using the fitted dataset. Defaults to None.

        Returns:
            np.ndarray: The subset as a boolean mask.
        """
        if X is None or Y is None:
            X = self._X
            Y = self._Y
        if self._logit:
            Y = limited_logit(Y)
        res = mat_mul_inter(X, self.coefficients) - Y
        return res**2 < self.scaled_epsilon**2
コード例 #7
0
def plot_2d(
    X: np.ndarray,
    Y: np.ndarray,
    model: np.ndarray,
    epsilon: float,
    x: Optional[np.ndarray] = None,
    y: Optional[float] = None,
    logit: bool = False,
    title: str = "SLISE for Robust Regression",
    label_x: str = "x",
    label_y: str = "y",
    decimals: int = 3,
    fig: Optional[Figure] = None,
):
    """Plot the regression/explanation in a 2D scatter plot with a line for the regression model (and the explained item marked).

    Args:
        X (np.ndarray): Data matrix.
        Y (np.ndarray): Response vector.
        model (np.ndarray): Linear model.
        epsilon (float): Error tolerance.
        x (Optional[np.ndarray], optional): Explained item. Defaults to None.
        y (Optional[float], optional): Explained outcome. Defaults to None.
        logit (bool, optional): Should Y be logit-transformed. Defaults to False.
        title (str, optional): Plot title. Defaults to "SLISE for Robust Regression".
        label_x (str, optional): X-axis label. Defaults to "x".
        label_y (str, optional): Y-axis label. Defaults to "y".
        decimals (int, optional): Number of decimals when writing numbers. Defaults to 3.
        fig (Optional[Figure], optional): Pyplot figure to plot on, if None then a new plot is created and shown. Defaults to None.

    Raises:
        SliseException: If the data has too many dimensions.
    """
    if fig is None:
        plot = True
        fig, ax = plt.subplots()
    else:
        ax = fig.subplots()
        plot = False
    if X.size != Y.size:
        raise SliseException(
            f"Can only plot 1D data, |Y| = {Y.size} != {X.size} = |X|")
    x_limits = extended_limits(X, 0.03, 20 if logit else 2)
    y_limits = mat_mul_inter(x_limits[:, None], model)
    if logit:
        ax.fill_between(
            x_limits,
            sigmoid(y_limits + epsilon),
            sigmoid(y_limits - epsilon),
            color=SLISE_PURPLE + "33",
            label="Subset",
        )
        y_limits = sigmoid(y_limits)
    else:
        ax.fill_between(
            x_limits,
            y_limits + epsilon,
            y_limits - epsilon,
            color=SLISE_PURPLE + "33",
            label="Subset",
        )
    ax.plot(X.ravel(), Y, "o", color="black", label="Dataset")
    if x is not None and y is not None:
        ax.plot(x_limits, y_limits, "-", color=SLISE_PURPLE, label="Model")
        ax.plot(x, y, "o", color=SLISE_ORANGE, label="Explained Item")
    else:
        ax.plot(x_limits, y_limits, "-", color=SLISE_ORANGE, label="Model")
    formula = ""
    if isinstance(model, float) or len(model) == 1:
        formula = f"{float(model):.{decimals}f} * {label_x}"
    elif np.abs(model[0]) > 1e-8:
        sign = "-" if model[1] < 0.0 else "+"
        formula = f"{model[0]:.{decimals}f} {sign} {abs(model[1]):.{decimals}f} $\\cdot$ {label_x}"
    else:
        formula = f"{model[1]:.{decimals}f} * {label_x}"
    if logit:
        formula = f"$\\sigma$({formula})"
    ax.legend()
    ax.set_xlabel(label_x)
    ax.set_ylabel(label_y)
    ax.set_title(f"{title}: {label_y} = {formula}")
    fig.tight_layout()
    if plot:
        plt.show()
コード例 #8
0
ファイル: test_slise.py プロジェクト: edahelsinki/pyslise
def test_slise_reg():
    print("Testing slise regression")
    X, Y, mod = data_create2(40, 5)
    w = np.random.uniform(size=40) + 0.5
    reg1 = regression(X,
                      Y,
                      epsilon=0.1,
                      lambda1=1e-4,
                      lambda2=1e-4,
                      intercept=True,
                      normalise=True)
    reg1.print()
    Yp = mat_mul_inter(X, reg1.coefficients)
    Yn = reg1._scale.scale_y(Y)
    Ynp = mat_mul_inter(reg1._scale.scale_x(X), reg1._alpha)
    Ypn = reg1._scale.scale_y(Yp)
    # S = (Y - Yp) ** 2 < reg1.epsilon ** 2
    # Sn = (Yn - Ynp) ** 2 < reg1.epsilon_orig ** 2
    assert np.allclose(
        Ypn,
        Ynp,
    ), f"The predicted Y's are not the same {np.max(np.abs(Ynp - Ypn))}"
    assert (
        reg1.score() <= 0
    ), f"SLISE loss should be negative ({reg1.score():.2f}, {reg1.subset().mean():.2f})"
    assert 1.0 >= reg1.subset().mean() > 0.75
    reg2 = regression(
        X,
        Y,
        epsilon=0.1,
        lambda1=1e-4,
        lambda2=1e-4,
        intercept=True,
        normalise=False,
    )
    reg2.print()
    assert (
        reg2.score() <= 0
    ), f"SLISE loss should be negative ({reg2.score():.2f}, {reg2.subset().mean():.2f})"
    assert 1.0 >= reg2.subset().mean() > 0.5
    reg3 = regression(
        X,
        Y,
        epsilon=0.1,
        lambda1=0,
        lambda2=0,
        intercept=True,
        normalise=False,
    )
    reg3.print()
    assert (
        reg3.score() <= 0
    ), f"SLISE loss should be negative ({reg3.score():.2f}, {reg3.subset().mean():.2f})"
    assert 1.0 >= reg3.subset().mean() > 0.5
    reg4 = regression(
        X,
        Y,
        epsilon=0.1,
        lambda1=1e-4,
        lambda2=1e-4,
        intercept=True,
        normalise=False,
        weight=w,
    )
    reg4.print()
    assert (
        reg4.score() <= 0
    ), f"SLISE loss should be negative ({reg4.score():.2f}, {reg4.subset().mean():.2f})"
    assert 1.0 >= reg4.subset().mean() > 0.4