def test_fit(data, optimizer): x, x_dot = data if len(x.shape) == 1: x = x.reshape(-1, 1) opt = SINDyOptimizer(optimizer, unbias=False) opt.fit(x, x_dot) check_is_fitted(opt) assert opt.complexity >= 0 if len(x_dot.shape) > 1: assert opt.coef_.shape == (x.shape[1], x_dot.shape[1]) else: assert opt.coef_.shape == (1, x.shape[1])
def test_complexity_parameter(opt_cls, reg_name, n_samples, n_features, n_informative, random_state): """Behaviour test for complexity 2. We assume that a model with a bigger regularization parameter is less complex. """ assume(n_informative <= n_features) x, y = make_regression(n_samples, n_features, n_informative, 1, 0, noise=0.1, random_state=random_state) y = y.reshape(-1, 1) optimizers = [ SINDyOptimizer(opt_cls(**{reg_name: reg_value}), unbias=True) for reg_value in [3, 1, 0.3, 0.1, 0.01] ] for opt in optimizers: opt.fit(x, y) for less_complex, more_complex in zip(optimizers, optimizers[1:]): assert less_complex.complexity <= more_complex.complexity
def test_bad_optimizers(data_derivative_1d): x, x_dot = data_derivative_1d x = x.reshape(-1, 1) with pytest.raises(AttributeError): opt = SINDyOptimizer(DummyEmptyModel()) with pytest.raises(AttributeError): opt = SINDyOptimizer(DummyModelNoCoef()) opt.fit(x, x_dot)
def test_unbias_external(data_derivative_1d): x, x_dot = data_derivative_1d x = x.reshape(-1, 1) optimizer_biased = SINDyOptimizer(Lasso(alpha=0.1, fit_intercept=False, max_iter=1), unbias=False) optimizer_biased.fit(x, x_dot) optimizer_unbiased = SINDyOptimizer(Lasso(alpha=0.1, fit_intercept=False, max_iter=1), unbias=True) optimizer_unbiased.fit(x, x_dot) assert (norm(optimizer_biased.coef_ - optimizer_unbiased.coef_) / (norm(optimizer_unbiased.coef_) + 1e-5) > 1e-9)
def test_unbias(data_derivative_1d): x, x_dot = data_derivative_1d x = x.reshape(-1, 1) optimizer_biased = SINDyOptimizer(STLSQ(threshold=0.01, alpha=0.1, max_iter=1), unbias=False) optimizer_biased.fit(x, x_dot) optimizer_unbiased = SINDyOptimizer(STLSQ(threshold=0.01, alpha=0.1, max_iter=1), unbias=True) optimizer_unbiased.fit(x, x_dot) assert (norm(optimizer_biased.coef_ - optimizer_unbiased.coef_) / norm(optimizer_unbiased.coef_) > 1e-9)
def test_complexity(n_samples, n_features, n_informative, random_state): """Behaviour test for complexity. We assume that more regularized optimizers are less complex on the same dataset. """ assume(n_informative < n_features) # Average complexity over multiple datasets n_datasets = 5 complexities = [0] * 7 seed(random_state) for rs in randint(low=0, high=2**32 - 1, size=n_datasets): x, y = make_regression( n_samples=n_samples, n_features=n_features, n_informative=n_informative, n_targets=1, bias=0, noise=0.1, random_state=rs, ) y = y.reshape(-1, 1) opt_kwargs = dict(fit_intercept=True, normalize=False) optimizers = [ SR3(thresholder="l0", threshold=0.1, **opt_kwargs), SR3(thresholder="l1", threshold=0.1, **opt_kwargs), Lasso(**opt_kwargs), STLSQ(**opt_kwargs), ElasticNet(**opt_kwargs), Ridge(**opt_kwargs), LinearRegression(**opt_kwargs), ] optimizers = [SINDyOptimizer(o, unbias=True) for o in optimizers] for k, opt in enumerate(optimizers): opt.fit(x, y) complexities[k] += opt.complexity for less_complex, more_complex in zip(complexities, complexities[1:]): # relax the condition to account for # noise and non-normalized threshold parameters assert less_complex <= more_complex + 5
def test_sr3_trimming(optimizer, data_linear_oscillator_corrupted): X, X_dot, trimming_array = data_linear_oscillator_corrupted optimizer_without_trimming = SINDyOptimizer(optimizer(), unbias=False) optimizer_without_trimming.fit(X, X_dot) optimizer_trimming = SINDyOptimizer(optimizer(trimming_fraction=0.15), unbias=False) optimizer_trimming.fit(X, X_dot) # Check that trimming found the right samples to remove np.testing.assert_array_equal(optimizer_trimming.optimizer.trimming_array, trimming_array) # Check that the coefficients found by the optimizer with trimming # are closer to the true coefficients than the coefficients found by the # optimizer without trimming true_coef = np.array([[-2.0, 0.0], [0.0, 1.0]]) assert norm(true_coef - optimizer_trimming.coef_) < norm( true_coef - optimizer_without_trimming.coef_)
def test_sr3_trimming(data_linear_oscillator_corrupted): X, X_dot, trimming_array = data_linear_oscillator_corrupted optimizer_without_trimming = SINDyOptimizer(SR3(), unbias=False) optimizer_without_trimming.fit(X, X_dot) optimizer_trimming = SINDyOptimizer(SR3(trimming_fraction=0.15), unbias=False) optimizer_trimming.fit(X, X_dot) # Check that trimming found the right samples to remove assert (np.sum( np.abs(optimizer_trimming.optimizer.trimming_array - trimming_array)) == 0.0) # Check that the coefficients found by the optimizer with trimming are closer to # the true coefficients than the coefficients found by the optimizer without # trimming true_coef = np.array([[-2.0, 0.0], [0.0, 1.0]]) assert norm(true_coef - optimizer_trimming.coef_) < norm( true_coef - optimizer_without_trimming.coef_)
def test_complexity(n_samples, n_features, n_informative, random_state): """Behaviour test for complexity. We assume that more regularized optimizers are less complex on the same dataset. """ assume(n_informative < n_features) x, y = make_regression(n_samples, n_features, n_informative, 1, 0, noise=0.1, random_state=random_state) y = y.reshape(-1, 1) opt_kwargs = dict(fit_intercept=True, normalize=False) optimizers = [ SR3(thresholder="l0", threshold=0.1, **opt_kwargs), SR3(thresholder="l1", threshold=0.1, **opt_kwargs), Lasso(**opt_kwargs), STLSQ(**opt_kwargs), ElasticNet(**opt_kwargs), Ridge(**opt_kwargs), LinearRegression(**opt_kwargs), ] optimizers = [SINDyOptimizer(o, unbias=True) for o in optimizers] for opt in optimizers: opt.fit(x, y) for less_complex, more_complex in zip(optimizers, optimizers[1:]): # relax the condition to account for # noise and non-normalized threshold parameters assert less_complex.complexity <= more_complex.complexity + 1
def fit( self, x, t=None, x_dot=None, u=None, multiple_trajectories=False, unbias=True, quiet=False, ): """ Fit the SINDy model. Parameters ---------- x: array-like or list of array-like, shape (n_samples, n_input_features) Training data. If training data contains multiple trajectories, x should be a list containing data for each trajectory. Individual trajectories may contain different numbers of samples. t: float, numpy array of shape [n_samples], or list of numpy arrays, optional \ (default None) If t is a float, it specifies the timestep between each sample. If array-like, it specifies the time at which each sample was collected. In this case the values in t must be strictly increasing. In the case of multi-trajectory training data, t may also be a list of arrays containing the collection times for each individual trajectory. If None, the default time step ``t_default`` will be used. x_dot: array-like or list of array-like, shape (n_samples, n_input_features), \ optional (default None) Optional pre-computed derivatives of the training data. If not provided, the time derivatives of the training data will be computed using the specified differentiation method. If x_dot is provided, it must match the shape of the training data and these values will be used as the time derivatives. u: array-like or list of array-like, shape (n_samples, n_control_features), \ optional (default None) Control variables/inputs. Include this variable to use sparse identification for nonlinear dynamical systems for control (SINDYc). If training data contains multiple trajectories (i.e. if x is a list of array-like), then u should be a list containing control variable data for each trajectory. Individual trajectories may contain different numbers of samples. multiple_trajectories: boolean, optional, (default False) Whether or not the training data includes multiple trajectories. If True, the training data must be a list of arrays containing data for each trajectory. If False, the training data must be a single array. unbias: boolean, optional (default True) Whether to perform an extra step of unregularized linear regression to unbias the coefficients for the identified support. If the optimizer (``SINDy.optimizer``) applies any type of regularization, that regularization may bias coefficients toward particular values, improving the conditioning of the problem but harming the quality of the fit. Setting ``unbias=True`` enables an extra step wherein unregularized linear regression is applied, but only for the coefficients in the support identified by the optimizer. This helps to remove the bias introduced by regularization. quiet: boolean, optional (default False) Whether or not to suppress warnings during model fitting. Returns ------- self: returns an instance of self """ if t is None: t = self.t_default if u is None: self.n_control_features_ = 0 else: trim_last_point = self.discrete_time and (x_dot is None) u = validate_control_variables( x, u, multiple_trajectories=multiple_trajectories, trim_last_point=trim_last_point, ) self.n_control_features_ = u.shape[1] if multiple_trajectories: x, x_dot = self._process_multiple_trajectories(x, t, x_dot) else: x = validate_input(x, t) if self.discrete_time: if x_dot is None: x_dot = x[1:] x = x[:-1] else: x_dot = validate_input(x_dot) else: if x_dot is None: x_dot = self.differentiation_method(x, t) else: x_dot = validate_input(x_dot, t) # Append control variables if self.n_control_features_ > 0: x = concatenate((x, u), axis=1) # Drop rows where derivative isn't known x, x_dot = drop_nan_rows(x, x_dot) optimizer = SINDyOptimizer(self.optimizer, unbias=unbias) steps = [("features", self.feature_library), ("model", optimizer)] self.model = Pipeline(steps) action = "ignore" if quiet else "default" with warnings.catch_warnings(): warnings.filterwarnings(action, category=ConvergenceWarning) warnings.filterwarnings(action, category=LinAlgWarning) warnings.filterwarnings(action, category=UserWarning) self.model.fit(x, x_dot) self.n_input_features_ = self.model.steps[0][1].n_input_features_ self.n_output_features_ = self.model.steps[0][1].n_output_features_ if self.feature_names is None: feature_names = [] for i in range(self.n_input_features_ - self.n_control_features_): feature_names.append("x" + str(i)) for i in range(self.n_control_features_): feature_names.append("u" + str(i)) self.feature_names = feature_names return self