예제 #1
0
def test_bad_parameters(data_derivative_1d):
    x, x_dot = data_derivative_1d

    with pytest.raises(ValueError):
        STLSQ(threshold=-1)

    with pytest.raises(ValueError):
        STLSQ(alpha=-1)

    with pytest.raises(ValueError):
        STLSQ(max_iter=0)

    with pytest.raises(ValueError):
        SR3(threshold=-1)

    with pytest.raises(ValueError):
        SR3(nu=0)

    with pytest.raises(ValueError):
        SR3(tol=0)

    with pytest.raises(NotImplementedError):
        SR3(thresholder="l2")

    with pytest.raises(ValueError):
        SR3(max_iter=0)
예제 #2
0
    def TrainSTLSQ(X: np.ndarray,
                   y: np.ndarray,
                   alpha: float,
                   delta_threshold: float,
                   max_iterations: int = 100,
                   test_size: float = 0.2,
                   random_state: int = 0) -> np.ndarray:
        """[summary]

        Args:
            X (np.ndarray): [description]
            y (np.ndarray): [description]
            alpha (float): [description]
            delta_threshold (float): [description]
            max_iterations (int, optional): [description]. Defaults to 100.
            test_size (float, optional): [description]. Defaults to 0.2.
            random_state (int, optional): [description]. Defaults to 0.

        Returns:
            np.ndarray: [description]
        """
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=random_state)

        # Set up the initial tolerance l0 penalty and estimates
        l0 = 1e-3 * np.linalg.cond(X)
        delta_t = delta_threshold  # for interal use, can be updated

        # Initial estimate
        optimizer = STLSQ(threshold=0, alpha=0.0,
                          fit_intercept=False)  # Now similar to LSTSQ
        y_predict = optimizer.fit(X_train, y_train).predict(X_test)
        min_loss = np.linalg.norm(y_predict - y_test,
                                  2) + l0 * np.count_nonzero(optimizer.coef_)

        # Setting alpha and tolerance
        best_threshold = delta_t
        threshold = delta_t

        for iteration in np.arange(max_iterations):
            optimizer.set_params(alpha=alpha, threshold=threshold)
            y_predict = optimizer.fit(X_train, y_train).predict(X_test)
            loss = np.linalg.norm(y_predict - y_test,
                                  2) + l0 * np.count_nonzero(optimizer.coef_)

            if (loss <= min_loss) and not (np.all(optimizer.coef_ == 0)):
                min_loss = loss
                best_threshold = threshold
                threshold += delta_threshold

            else:  # if loss increases, we need to a) lower the current threshold and/or decrease step size
                new_lower_threshold = np.max([0, threshold - 2 * delta_t])
                delta_t = 2 * delta_t / (max_iterations - iteration)
                threshold = new_lower_threshold + delta_t

        optimizer.set_params(alpha=alpha, threshold=best_threshold)
        optimizer.fit(X_train, y_train)

        return optimizer.coef_
예제 #3
0
def test_bad_parameters():
    with pytest.raises(ValueError):
        STLSQ(threshold=-1)

    with pytest.raises(ValueError):
        STLSQ(alpha=-1)

    with pytest.raises(ValueError):
        STLSQ(max_iter=0)

    with pytest.raises(ValueError):
        SR3(threshold=-1)

    with pytest.raises(ValueError):
        SR3(nu=0)

    with pytest.raises(ValueError):
        SR3(tol=0)

    with pytest.raises(NotImplementedError):
        SR3(thresholder="l2")

    with pytest.raises(ValueError):
        SR3(max_iter=0)

    with pytest.raises(ValueError):
        SR3(trimming_fraction=-1)

    with pytest.raises(ValueError):
        SR3(trimming_fraction=2)
예제 #4
0
 def __init__(
     self,
     optimizer=None,
     feature_library=None,
     differentiation_method=None,
     feature_names=None,
     t_default=1,
     discrete_time=False,
     n_jobs=1,
 ):
     if optimizer is None:
         optimizer = STLSQ()
     self.optimizer = optimizer
     if feature_library is None:
         feature_library = PolynomialLibrary()
     self.feature_library = feature_library
     if differentiation_method is None:
         differentiation_method = FiniteDifference()
     self.differentiation_method = differentiation_method
     if not isinstance(t_default, float) and not isinstance(t_default, int):
         raise ValueError("t_default must be a positive number")
     elif t_default <= 0:
         raise ValueError("t_default must be a positive number")
     else:
         self.t_default = t_default
     self.feature_names = feature_names
     self.discrete_time = discrete_time
     self.n_jobs = n_jobs
예제 #5
0
def test_unbias(data_derivative_1d):
    x, x_dot = data_derivative_1d
    x = x.reshape(-1, 1)

    optimizer_biased = SINDyOptimizer(STLSQ(threshold=0.01,
                                            alpha=0.1,
                                            max_iter=1),
                                      unbias=False)
    optimizer_biased.fit(x, x_dot)

    optimizer_unbiased = SINDyOptimizer(STLSQ(threshold=0.01,
                                              alpha=0.1,
                                              max_iter=1),
                                        unbias=True)
    optimizer_unbiased.fit(x, x_dot)

    assert (norm(optimizer_biased.coef_ - optimizer_unbiased.coef_) /
            norm(optimizer_unbiased.coef_) > 1e-9)
예제 #6
0
def test_fit_warn(data_lorenz, params, warning):
    x, t = data_lorenz
    model = SINDy(optimizer=STLSQ(**params))

    with pytest.warns(warning):
        model.fit(x, t)

    with pytest.warns(None) as warn_record:
        model.fit(x, t, quiet=True)

    assert len(warn_record) == 0
예제 #7
0
def test_alternate_parameters(data_derivative_1d, kwargs):
    x, x_dot = data_derivative_1d
    x = x.reshape(-1, 1)

    model = STLSQ(**kwargs)
    model.fit(x, x_dot)
    model.fit(x, x_dot, sample_weight=x[:, 0])

    check_is_fitted(model)
예제 #8
0
 def __init__(
     self,
     optimizer=STLSQ(),
     feature_library=PolynomialFeatures(),
     differentiation_method=FiniteDifference(),
     feature_names=None,
     discrete_time=False,
     n_jobs=1,
 ):
     self.optimizer = optimizer
     self.feature_library = feature_library
     self.differentiation_method = differentiation_method
     self.feature_names = feature_names
     self.discrete_time = discrete_time
     self.n_jobs = n_jobs
예제 #9
0
def test_complexity(n_samples, n_features, n_informative, random_state):
    """Behaviour test for complexity.

    We assume that more regularized optimizers are less complex on the same dataset.
    """
    assume(n_informative < n_features)

    # Average complexity over multiple datasets
    n_datasets = 5
    complexities = [0] * 7

    seed(random_state)
    for rs in randint(low=0, high=2**32 - 1, size=n_datasets):

        x, y = make_regression(
            n_samples=n_samples,
            n_features=n_features,
            n_informative=n_informative,
            n_targets=1,
            bias=0,
            noise=0.1,
            random_state=rs,
        )
        y = y.reshape(-1, 1)

        opt_kwargs = dict(fit_intercept=True, normalize=False)
        optimizers = [
            SR3(thresholder="l0", threshold=0.1, **opt_kwargs),
            SR3(thresholder="l1", threshold=0.1, **opt_kwargs),
            Lasso(**opt_kwargs),
            STLSQ(**opt_kwargs),
            ElasticNet(**opt_kwargs),
            Ridge(**opt_kwargs),
            LinearRegression(**opt_kwargs),
        ]

        optimizers = [SINDyOptimizer(o, unbias=True) for o in optimizers]

        for k, opt in enumerate(optimizers):
            opt.fit(x, y)
            complexities[k] += opt.complexity

    for less_complex, more_complex in zip(complexities, complexities[1:]):
        # relax the condition to account for
        # noise and non-normalized threshold parameters
        assert less_complex <= more_complex + 5
def test_complexity(n_samples, n_features, n_informative, random_state):
    """Behaviour test for complexity.

    We assume that more regularized optimizers are less complex on the same dataset.
    """
    assume(n_informative < n_features)

    x, y = make_regression(n_samples,
                           n_features,
                           n_informative,
                           1,
                           0,
                           noise=0.1,
                           random_state=random_state)
    y = y.reshape(-1, 1)

    opt_kwargs = dict(fit_intercept=True, normalize=False)
    optimizers = [
        SR3(thresholder="l0", threshold=0.1, **opt_kwargs),
        SR3(thresholder="l1", threshold=0.1, **opt_kwargs),
        Lasso(**opt_kwargs),
        STLSQ(**opt_kwargs),
        ElasticNet(**opt_kwargs),
        Ridge(**opt_kwargs),
        LinearRegression(**opt_kwargs),
    ]

    optimizers = [SINDyOptimizer(o, unbias=True) for o in optimizers]

    for opt in optimizers:
        opt.fit(x, y)

    for less_complex, more_complex in zip(optimizers, optimizers[1:]):
        # relax the condition to account for
        # noise and non-normalized threshold parameters
        assert less_complex.complexity <= more_complex.complexity + 1
예제 #11
0
    # Two points in t out of order
    t[2], t[4] = t[4], t[2]
    with pytest.raises(ValueError):
        model.fit(x, t)
    t[2], t[4] = t[4], t[2]

    # Two matching times in t
    t[3] = t[5]
    with pytest.raises(ValueError):
        model.fit(x, t)


@pytest.mark.parametrize(
    "data, optimizer",
    [
        (pytest.lazy_fixture("data_1d"), STLSQ()),
        (pytest.lazy_fixture("data_lorenz"), STLSQ()),
        (pytest.lazy_fixture("data_1d"), SR3()),
        (pytest.lazy_fixture("data_lorenz"), SR3()),
        (pytest.lazy_fixture("data_1d"), Lasso(fit_intercept=False)),
        (pytest.lazy_fixture("data_lorenz"), Lasso(fit_intercept=False)),
        (pytest.lazy_fixture("data_1d"), ElasticNet(fit_intercept=False)),
        (pytest.lazy_fixture("data_lorenz"), ElasticNet(fit_intercept=False)),
    ],
)
def test_predict(data, optimizer):
    x, t = data
    model = SINDy(optimizer=optimizer)
    model.fit(x, t)
    x_dot = model.predict(x)
예제 #12
0
    "cls, support",
    [(Lasso, True), (STLSQ, True), (SR3, True), (DummyLinearModel, False)],
)
def test_supports_multiple_targets(cls, support):
    assert supports_multiple_targets(cls()) == support


@pytest.fixture(params=["data_derivative_1d", "data_derivative_2d"])
def data(request):
    return request.getfixturevalue(request.param)


@pytest.mark.parametrize(
    "optimizer",
    [
        STLSQ(),
        SR3(),
        Lasso(fit_intercept=False),
        ElasticNet(fit_intercept=False),
        DummyLinearModel(),
    ],
)
def test_fit(data, optimizer):
    x, x_dot = data
    if len(x.shape) == 1:
        x = x.reshape(-1, 1)
    opt = SINDyOptimizer(optimizer, unbias=False)
    opt.fit(x, x_dot)

    check_is_fitted(opt)
    assert opt.complexity >= 0
예제 #13
0
        (DummyLinearModel, False),
    ],
)
def test_supports_multiple_targets(cls, support):
    assert supports_multiple_targets(cls()) == support


@pytest.fixture(params=["data_derivative_1d", "data_derivative_2d"])
def data(request):
    return request.getfixturevalue(request.param)


@pytest.mark.parametrize(
    "optimizer",
    [
        STLSQ(),
        SR3(),
        ConstrainedSR3(),
        TrappingSR3(),
        Lasso(fit_intercept=False),
        ElasticNet(fit_intercept=False),
        DummyLinearModel(),
    ],
)
def test_fit(data, optimizer):
    x, x_dot = data
    if len(x.shape) == 1:
        x = x.reshape(-1, 1)
    opt = SINDyOptimizer(optimizer, unbias=False)
    opt.fit(x, x_dot)
예제 #14
0
    np.testing.assert_allclose(model.coefficients(),
                               model_t_default.coefficients())
    np.testing.assert_almost_equal(model.score(x, t=dt),
                                   model_t_default.score(x))
    np.testing.assert_almost_equal(model.differentiate(x, t=dt),
                                   model_t_default.differentiate(x))


@pytest.mark.parametrize(
    "data",
    [pytest.lazy_fixture("data_1d"),
     pytest.lazy_fixture("data_lorenz")])
@pytest.mark.parametrize(
    "optimizer",
    [
        STLSQ(),
        SR3(),
        ConstrainedSR3(),
        Lasso(fit_intercept=False),
        ElasticNet(fit_intercept=False),
    ],
)
def test_predict(data, optimizer):
    x, t = data
    model = SINDy(optimizer=optimizer)
    model.fit(x, t)
    x_dot = model.predict(x)

    assert x.shape == x_dot.shape

예제 #15
0
        (DummyLinearModel, False),
    ],
)
def test_supports_multiple_targets(cls, support):
    assert supports_multiple_targets(cls()) == support


@pytest.fixture(params=["data_derivative_1d", "data_derivative_2d"])
def data(request):
    return request.getfixturevalue(request.param)


@pytest.mark.parametrize(
    "optimizer",
    [
        STLSQ(),
        SR3(),
        ConstrainedSR3(),
        Lasso(fit_intercept=False),
        ElasticNet(fit_intercept=False),
        DummyLinearModel(),
    ],
)
def test_fit(data, optimizer):
    x, x_dot = data
    if len(x.shape) == 1:
        x = x.reshape(-1, 1)
    opt = SINDyOptimizer(optimizer, unbias=False)
    opt.fit(x, x_dot)

    check_is_fitted(opt)
예제 #16
0
    def TrainSTLSQ(
        X: np.ndarray,
        y: np.ndarray,
        alpha: float,
        delta_threshold: float,
        max_iterations: int = 100,
        test_size: float = 0.2,
        random_state: int = 0,
    ) -> np.ndarray:
        """PDE-FIND sparsity selection algorithm. Based on method described by Rudy et al. (10.1126/sciadv.1602614).

        Args:
            X (np.ndarray): Training input data of shape (n_samples, n_features).
            y (np.ndarray): Training target data of shape (n_samples, n_outputs).
            alpha (float): Magnitude of the L2 regularization.
            delta_threshold (float): Initial stepsize for the search of the threshold
            max_iterations (int, optional): Maximum number of iterations. Defaults to 100.
            test_size (float, optional): Fraction of the data that is assigned to the test-set. Defaults to 0.2.
            random_state (int, optional): Defaults to 0.

        Returns:
            np.ndarray: Coefficient vector.
        """
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=random_state)

        # Set up the initial tolerance l0 penalty and estimates
        l0 = 1e-3 * np.linalg.cond(X)
        delta_t = delta_threshold  # for interal use, can be updated

        # Initial estimate
        optimizer = STLSQ(threshold=0, alpha=0.0,
                          fit_intercept=False)  # Now similar to LSTSQ
        y_predict = optimizer.fit(X_train, y_train).predict(X_test)
        min_loss = np.linalg.norm(y_predict - y_test,
                                  2) + l0 * np.count_nonzero(optimizer.coef_)

        # Setting alpha and tolerance
        best_threshold = delta_t
        threshold = delta_t

        for iteration in np.arange(max_iterations):
            optimizer.set_params(alpha=alpha, threshold=threshold)
            y_predict = optimizer.fit(X_train, y_train).predict(X_test)
            loss = np.linalg.norm(y_predict - y_test,
                                  2) + l0 * np.count_nonzero(optimizer.coef_)

            if (loss <= min_loss) and not (np.all(optimizer.coef_ == 0)):
                min_loss = loss
                best_threshold = threshold
                threshold += delta_threshold

            else:  # if loss increases, we need to a) lower the current threshold and/or decrease step size
                new_lower_threshold = np.max([0, threshold - 2 * delta_t])
                delta_t = 2 * delta_t / (max_iterations - iteration)
                threshold = new_lower_threshold + delta_t

        optimizer.set_params(alpha=alpha, threshold=best_threshold)
        optimizer.fit(X_train, y_train)

        return optimizer.coef_
예제 #17
0
    "cls, support",
    [(Lasso, True), (STLSQ, True), (SR3, True), (DummyLinearModel, False)],
)
def test_supports_multiple_targets(cls, support):
    assert supports_multiple_targets(cls()) == support


@pytest.fixture(params=["data_derivative_1d", "data_derivative_2d"])
def data(request):
    return request.getfixturevalue(request.param)


@pytest.mark.parametrize(
    "optimizer",
    [
        STLSQ(),
        SR3(),
        Lasso(fit_intercept=False),
        ElasticNet(fit_intercept=False),
        DummyLinearModel(),
    ],
)
def test_fit(data, optimizer):
    x, x_dot = data
    if len(x.shape) == 1:
        x = x.reshape(-1, 1)
    opt = SINDyOptimizer(optimizer, unbias=False)
    opt.fit(x, x_dot)

    check_is_fitted(opt)
    assert opt.complexity >= 0