Example #1
0
def test_fit(data, optimizer):
    x, x_dot = data
    if len(x.shape) == 1:
        x = x.reshape(-1, 1)
    opt = SINDyOptimizer(optimizer, unbias=False)
    opt.fit(x, x_dot)

    check_is_fitted(opt)
    assert opt.complexity >= 0
    if len(x_dot.shape) > 1:
        assert opt.coef_.shape == (x.shape[1], x_dot.shape[1])
    else:
        assert opt.coef_.shape == (1, x.shape[1])
Example #2
0
def test_complexity_parameter(opt_cls, reg_name, n_samples, n_features,
                              n_informative, random_state):
    """Behaviour test for complexity 2.

    We assume that a model with a bigger regularization parameter is less complex.
    """
    assume(n_informative <= n_features)

    x, y = make_regression(n_samples,
                           n_features,
                           n_informative,
                           1,
                           0,
                           noise=0.1,
                           random_state=random_state)
    y = y.reshape(-1, 1)

    optimizers = [
        SINDyOptimizer(opt_cls(**{reg_name: reg_value}), unbias=True)
        for reg_value in [3, 1, 0.3, 0.1, 0.01]
    ]

    for opt in optimizers:
        opt.fit(x, y)

    for less_complex, more_complex in zip(optimizers, optimizers[1:]):
        assert less_complex.complexity <= more_complex.complexity
Example #3
0
def test_bad_optimizers(data_derivative_1d):
    x, x_dot = data_derivative_1d
    x = x.reshape(-1, 1)

    with pytest.raises(AttributeError):
        opt = SINDyOptimizer(DummyEmptyModel())

    with pytest.raises(AttributeError):
        opt = SINDyOptimizer(DummyModelNoCoef())
        opt.fit(x, x_dot)
Example #4
0
def test_unbias_external(data_derivative_1d):
    x, x_dot = data_derivative_1d
    x = x.reshape(-1, 1)

    optimizer_biased = SINDyOptimizer(Lasso(alpha=0.1,
                                            fit_intercept=False,
                                            max_iter=1),
                                      unbias=False)
    optimizer_biased.fit(x, x_dot)

    optimizer_unbiased = SINDyOptimizer(Lasso(alpha=0.1,
                                              fit_intercept=False,
                                              max_iter=1),
                                        unbias=True)
    optimizer_unbiased.fit(x, x_dot)

    assert (norm(optimizer_biased.coef_ - optimizer_unbiased.coef_) /
            (norm(optimizer_unbiased.coef_) + 1e-5) > 1e-9)
Example #5
0
def test_unbias(data_derivative_1d):
    x, x_dot = data_derivative_1d
    x = x.reshape(-1, 1)

    optimizer_biased = SINDyOptimizer(STLSQ(threshold=0.01,
                                            alpha=0.1,
                                            max_iter=1),
                                      unbias=False)
    optimizer_biased.fit(x, x_dot)

    optimizer_unbiased = SINDyOptimizer(STLSQ(threshold=0.01,
                                              alpha=0.1,
                                              max_iter=1),
                                        unbias=True)
    optimizer_unbiased.fit(x, x_dot)

    assert (norm(optimizer_biased.coef_ - optimizer_unbiased.coef_) /
            norm(optimizer_unbiased.coef_) > 1e-9)
Example #6
0
def test_complexity(n_samples, n_features, n_informative, random_state):
    """Behaviour test for complexity.

    We assume that more regularized optimizers are less complex on the same dataset.
    """
    assume(n_informative < n_features)

    # Average complexity over multiple datasets
    n_datasets = 5
    complexities = [0] * 7

    seed(random_state)
    for rs in randint(low=0, high=2**32 - 1, size=n_datasets):

        x, y = make_regression(
            n_samples=n_samples,
            n_features=n_features,
            n_informative=n_informative,
            n_targets=1,
            bias=0,
            noise=0.1,
            random_state=rs,
        )
        y = y.reshape(-1, 1)

        opt_kwargs = dict(fit_intercept=True, normalize=False)
        optimizers = [
            SR3(thresholder="l0", threshold=0.1, **opt_kwargs),
            SR3(thresholder="l1", threshold=0.1, **opt_kwargs),
            Lasso(**opt_kwargs),
            STLSQ(**opt_kwargs),
            ElasticNet(**opt_kwargs),
            Ridge(**opt_kwargs),
            LinearRegression(**opt_kwargs),
        ]

        optimizers = [SINDyOptimizer(o, unbias=True) for o in optimizers]

        for k, opt in enumerate(optimizers):
            opt.fit(x, y)
            complexities[k] += opt.complexity

    for less_complex, more_complex in zip(complexities, complexities[1:]):
        # relax the condition to account for
        # noise and non-normalized threshold parameters
        assert less_complex <= more_complex + 5
Example #7
0
def test_sr3_trimming(optimizer, data_linear_oscillator_corrupted):
    X, X_dot, trimming_array = data_linear_oscillator_corrupted

    optimizer_without_trimming = SINDyOptimizer(optimizer(), unbias=False)
    optimizer_without_trimming.fit(X, X_dot)

    optimizer_trimming = SINDyOptimizer(optimizer(trimming_fraction=0.15),
                                        unbias=False)
    optimizer_trimming.fit(X, X_dot)

    # Check that trimming found the right samples to remove
    np.testing.assert_array_equal(optimizer_trimming.optimizer.trimming_array,
                                  trimming_array)

    # Check that the coefficients found by the optimizer with trimming
    # are closer to the true coefficients than the coefficients found by the
    # optimizer without trimming
    true_coef = np.array([[-2.0, 0.0], [0.0, 1.0]])
    assert norm(true_coef - optimizer_trimming.coef_) < norm(
        true_coef - optimizer_without_trimming.coef_)
Example #8
0
def test_sr3_trimming(data_linear_oscillator_corrupted):
    X, X_dot, trimming_array = data_linear_oscillator_corrupted

    optimizer_without_trimming = SINDyOptimizer(SR3(), unbias=False)
    optimizer_without_trimming.fit(X, X_dot)

    optimizer_trimming = SINDyOptimizer(SR3(trimming_fraction=0.15),
                                        unbias=False)
    optimizer_trimming.fit(X, X_dot)

    # Check that trimming found the right samples to remove
    assert (np.sum(
        np.abs(optimizer_trimming.optimizer.trimming_array -
               trimming_array)) == 0.0)

    # Check that the coefficients found by the optimizer with trimming are closer to
    # the true coefficients than the coefficients found by the optimizer without
    # trimming
    true_coef = np.array([[-2.0, 0.0], [0.0, 1.0]])
    assert norm(true_coef - optimizer_trimming.coef_) < norm(
        true_coef - optimizer_without_trimming.coef_)
def test_complexity(n_samples, n_features, n_informative, random_state):
    """Behaviour test for complexity.

    We assume that more regularized optimizers are less complex on the same dataset.
    """
    assume(n_informative < n_features)

    x, y = make_regression(n_samples,
                           n_features,
                           n_informative,
                           1,
                           0,
                           noise=0.1,
                           random_state=random_state)
    y = y.reshape(-1, 1)

    opt_kwargs = dict(fit_intercept=True, normalize=False)
    optimizers = [
        SR3(thresholder="l0", threshold=0.1, **opt_kwargs),
        SR3(thresholder="l1", threshold=0.1, **opt_kwargs),
        Lasso(**opt_kwargs),
        STLSQ(**opt_kwargs),
        ElasticNet(**opt_kwargs),
        Ridge(**opt_kwargs),
        LinearRegression(**opt_kwargs),
    ]

    optimizers = [SINDyOptimizer(o, unbias=True) for o in optimizers]

    for opt in optimizers:
        opt.fit(x, y)

    for less_complex, more_complex in zip(optimizers, optimizers[1:]):
        # relax the condition to account for
        # noise and non-normalized threshold parameters
        assert less_complex.complexity <= more_complex.complexity + 1
Example #10
0
    def fit(
        self,
        x,
        t=None,
        x_dot=None,
        u=None,
        multiple_trajectories=False,
        unbias=True,
        quiet=False,
    ):
        """
        Fit the SINDy model.

        Parameters
        ----------
        x: array-like or list of array-like, shape (n_samples, n_input_features)
            Training data. If training data contains multiple trajectories,
            x should be a list containing data for each trajectory. Individual
            trajectories may contain different numbers of samples.

        t: float, numpy array of shape [n_samples], or list of numpy arrays, optional \
                (default None)
            If t is a float, it specifies the timestep between each sample.
            If array-like, it specifies the time at which each sample was
            collected.
            In this case the values in t must be strictly increasing.
            In the case of multi-trajectory training data, t may also be a list
            of arrays containing the collection times for each individual
            trajectory.
            If None, the default time step ``t_default`` will be used.

        x_dot: array-like or list of array-like, shape (n_samples, n_input_features), \
                optional (default None)
            Optional pre-computed derivatives of the training data. If not
            provided, the time derivatives of the training data will be
            computed using the specified differentiation method. If x_dot is
            provided, it must match the shape of the training data and these
            values will be used as the time derivatives.

        u: array-like or list of array-like, shape (n_samples, n_control_features), \
                optional (default None)
            Control variables/inputs. Include this variable to use sparse
            identification for nonlinear dynamical systems for control (SINDYc).
            If training data contains multiple trajectories (i.e. if x is a list of
            array-like), then u should be a list containing control variable data
            for each trajectory. Individual trajectories may contain different
            numbers of samples.

        multiple_trajectories: boolean, optional, (default False)
            Whether or not the training data includes multiple trajectories. If
            True, the training data must be a list of arrays containing data
            for each trajectory. If False, the training data must be a single
            array.

        unbias: boolean, optional (default True)
            Whether to perform an extra step of unregularized linear regression to
            unbias the coefficients for the identified support.
            If the optimizer (``SINDy.optimizer``) applies any type of regularization,
            that regularization may bias coefficients toward particular values,
            improving the conditioning of the problem but harming the quality of the
            fit. Setting ``unbias=True`` enables an extra step wherein unregularized
            linear regression is applied, but only for the coefficients in the support
            identified by the optimizer. This helps to remove the bias introduced by
            regularization.

        quiet: boolean, optional (default False)
            Whether or not to suppress warnings during model fitting.

        Returns
        -------
        self: returns an instance of self
        """
        if t is None:
            t = self.t_default
        if u is None:
            self.n_control_features_ = 0
        else:
            trim_last_point = self.discrete_time and (x_dot is None)
            u = validate_control_variables(
                x,
                u,
                multiple_trajectories=multiple_trajectories,
                trim_last_point=trim_last_point,
            )
            self.n_control_features_ = u.shape[1]

        if multiple_trajectories:
            x, x_dot = self._process_multiple_trajectories(x, t, x_dot)
        else:
            x = validate_input(x, t)

            if self.discrete_time:
                if x_dot is None:
                    x_dot = x[1:]
                    x = x[:-1]
                else:
                    x_dot = validate_input(x_dot)
            else:
                if x_dot is None:
                    x_dot = self.differentiation_method(x, t)
                else:
                    x_dot = validate_input(x_dot, t)

        # Append control variables
        if self.n_control_features_ > 0:
            x = concatenate((x, u), axis=1)

        # Drop rows where derivative isn't known
        x, x_dot = drop_nan_rows(x, x_dot)

        optimizer = SINDyOptimizer(self.optimizer, unbias=unbias)
        steps = [("features", self.feature_library), ("model", optimizer)]
        self.model = Pipeline(steps)

        action = "ignore" if quiet else "default"
        with warnings.catch_warnings():
            warnings.filterwarnings(action, category=ConvergenceWarning)
            warnings.filterwarnings(action, category=LinAlgWarning)
            warnings.filterwarnings(action, category=UserWarning)

            self.model.fit(x, x_dot)

        self.n_input_features_ = self.model.steps[0][1].n_input_features_
        self.n_output_features_ = self.model.steps[0][1].n_output_features_

        if self.feature_names is None:
            feature_names = []
            for i in range(self.n_input_features_ - self.n_control_features_):
                feature_names.append("x" + str(i))
            for i in range(self.n_control_features_):
                feature_names.append("u" + str(i))
            self.feature_names = feature_names

        return self