def test_bad_parameters(data_derivative_1d): x, x_dot = data_derivative_1d with pytest.raises(ValueError): STLSQ(threshold=-1) with pytest.raises(ValueError): STLSQ(alpha=-1) with pytest.raises(ValueError): STLSQ(max_iter=0) with pytest.raises(ValueError): SR3(threshold=-1) with pytest.raises(ValueError): SR3(nu=0) with pytest.raises(ValueError): SR3(tol=0) with pytest.raises(NotImplementedError): SR3(thresholder="l2") with pytest.raises(ValueError): SR3(max_iter=0)
def TrainSTLSQ(X: np.ndarray, y: np.ndarray, alpha: float, delta_threshold: float, max_iterations: int = 100, test_size: float = 0.2, random_state: int = 0) -> np.ndarray: """[summary] Args: X (np.ndarray): [description] y (np.ndarray): [description] alpha (float): [description] delta_threshold (float): [description] max_iterations (int, optional): [description]. Defaults to 100. test_size (float, optional): [description]. Defaults to 0.2. random_state (int, optional): [description]. Defaults to 0. Returns: np.ndarray: [description] """ # Split data X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state) # Set up the initial tolerance l0 penalty and estimates l0 = 1e-3 * np.linalg.cond(X) delta_t = delta_threshold # for interal use, can be updated # Initial estimate optimizer = STLSQ(threshold=0, alpha=0.0, fit_intercept=False) # Now similar to LSTSQ y_predict = optimizer.fit(X_train, y_train).predict(X_test) min_loss = np.linalg.norm(y_predict - y_test, 2) + l0 * np.count_nonzero(optimizer.coef_) # Setting alpha and tolerance best_threshold = delta_t threshold = delta_t for iteration in np.arange(max_iterations): optimizer.set_params(alpha=alpha, threshold=threshold) y_predict = optimizer.fit(X_train, y_train).predict(X_test) loss = np.linalg.norm(y_predict - y_test, 2) + l0 * np.count_nonzero(optimizer.coef_) if (loss <= min_loss) and not (np.all(optimizer.coef_ == 0)): min_loss = loss best_threshold = threshold threshold += delta_threshold else: # if loss increases, we need to a) lower the current threshold and/or decrease step size new_lower_threshold = np.max([0, threshold - 2 * delta_t]) delta_t = 2 * delta_t / (max_iterations - iteration) threshold = new_lower_threshold + delta_t optimizer.set_params(alpha=alpha, threshold=best_threshold) optimizer.fit(X_train, y_train) return optimizer.coef_
def test_bad_parameters(): with pytest.raises(ValueError): STLSQ(threshold=-1) with pytest.raises(ValueError): STLSQ(alpha=-1) with pytest.raises(ValueError): STLSQ(max_iter=0) with pytest.raises(ValueError): SR3(threshold=-1) with pytest.raises(ValueError): SR3(nu=0) with pytest.raises(ValueError): SR3(tol=0) with pytest.raises(NotImplementedError): SR3(thresholder="l2") with pytest.raises(ValueError): SR3(max_iter=0) with pytest.raises(ValueError): SR3(trimming_fraction=-1) with pytest.raises(ValueError): SR3(trimming_fraction=2)
def __init__( self, optimizer=None, feature_library=None, differentiation_method=None, feature_names=None, t_default=1, discrete_time=False, n_jobs=1, ): if optimizer is None: optimizer = STLSQ() self.optimizer = optimizer if feature_library is None: feature_library = PolynomialLibrary() self.feature_library = feature_library if differentiation_method is None: differentiation_method = FiniteDifference() self.differentiation_method = differentiation_method if not isinstance(t_default, float) and not isinstance(t_default, int): raise ValueError("t_default must be a positive number") elif t_default <= 0: raise ValueError("t_default must be a positive number") else: self.t_default = t_default self.feature_names = feature_names self.discrete_time = discrete_time self.n_jobs = n_jobs
def test_unbias(data_derivative_1d): x, x_dot = data_derivative_1d x = x.reshape(-1, 1) optimizer_biased = SINDyOptimizer(STLSQ(threshold=0.01, alpha=0.1, max_iter=1), unbias=False) optimizer_biased.fit(x, x_dot) optimizer_unbiased = SINDyOptimizer(STLSQ(threshold=0.01, alpha=0.1, max_iter=1), unbias=True) optimizer_unbiased.fit(x, x_dot) assert (norm(optimizer_biased.coef_ - optimizer_unbiased.coef_) / norm(optimizer_unbiased.coef_) > 1e-9)
def test_fit_warn(data_lorenz, params, warning): x, t = data_lorenz model = SINDy(optimizer=STLSQ(**params)) with pytest.warns(warning): model.fit(x, t) with pytest.warns(None) as warn_record: model.fit(x, t, quiet=True) assert len(warn_record) == 0
def test_alternate_parameters(data_derivative_1d, kwargs): x, x_dot = data_derivative_1d x = x.reshape(-1, 1) model = STLSQ(**kwargs) model.fit(x, x_dot) model.fit(x, x_dot, sample_weight=x[:, 0]) check_is_fitted(model)
def __init__( self, optimizer=STLSQ(), feature_library=PolynomialFeatures(), differentiation_method=FiniteDifference(), feature_names=None, discrete_time=False, n_jobs=1, ): self.optimizer = optimizer self.feature_library = feature_library self.differentiation_method = differentiation_method self.feature_names = feature_names self.discrete_time = discrete_time self.n_jobs = n_jobs
def test_complexity(n_samples, n_features, n_informative, random_state): """Behaviour test for complexity. We assume that more regularized optimizers are less complex on the same dataset. """ assume(n_informative < n_features) # Average complexity over multiple datasets n_datasets = 5 complexities = [0] * 7 seed(random_state) for rs in randint(low=0, high=2**32 - 1, size=n_datasets): x, y = make_regression( n_samples=n_samples, n_features=n_features, n_informative=n_informative, n_targets=1, bias=0, noise=0.1, random_state=rs, ) y = y.reshape(-1, 1) opt_kwargs = dict(fit_intercept=True, normalize=False) optimizers = [ SR3(thresholder="l0", threshold=0.1, **opt_kwargs), SR3(thresholder="l1", threshold=0.1, **opt_kwargs), Lasso(**opt_kwargs), STLSQ(**opt_kwargs), ElasticNet(**opt_kwargs), Ridge(**opt_kwargs), LinearRegression(**opt_kwargs), ] optimizers = [SINDyOptimizer(o, unbias=True) for o in optimizers] for k, opt in enumerate(optimizers): opt.fit(x, y) complexities[k] += opt.complexity for less_complex, more_complex in zip(complexities, complexities[1:]): # relax the condition to account for # noise and non-normalized threshold parameters assert less_complex <= more_complex + 5
def test_complexity(n_samples, n_features, n_informative, random_state): """Behaviour test for complexity. We assume that more regularized optimizers are less complex on the same dataset. """ assume(n_informative < n_features) x, y = make_regression(n_samples, n_features, n_informative, 1, 0, noise=0.1, random_state=random_state) y = y.reshape(-1, 1) opt_kwargs = dict(fit_intercept=True, normalize=False) optimizers = [ SR3(thresholder="l0", threshold=0.1, **opt_kwargs), SR3(thresholder="l1", threshold=0.1, **opt_kwargs), Lasso(**opt_kwargs), STLSQ(**opt_kwargs), ElasticNet(**opt_kwargs), Ridge(**opt_kwargs), LinearRegression(**opt_kwargs), ] optimizers = [SINDyOptimizer(o, unbias=True) for o in optimizers] for opt in optimizers: opt.fit(x, y) for less_complex, more_complex in zip(optimizers, optimizers[1:]): # relax the condition to account for # noise and non-normalized threshold parameters assert less_complex.complexity <= more_complex.complexity + 1
# Two points in t out of order t[2], t[4] = t[4], t[2] with pytest.raises(ValueError): model.fit(x, t) t[2], t[4] = t[4], t[2] # Two matching times in t t[3] = t[5] with pytest.raises(ValueError): model.fit(x, t) @pytest.mark.parametrize( "data, optimizer", [ (pytest.lazy_fixture("data_1d"), STLSQ()), (pytest.lazy_fixture("data_lorenz"), STLSQ()), (pytest.lazy_fixture("data_1d"), SR3()), (pytest.lazy_fixture("data_lorenz"), SR3()), (pytest.lazy_fixture("data_1d"), Lasso(fit_intercept=False)), (pytest.lazy_fixture("data_lorenz"), Lasso(fit_intercept=False)), (pytest.lazy_fixture("data_1d"), ElasticNet(fit_intercept=False)), (pytest.lazy_fixture("data_lorenz"), ElasticNet(fit_intercept=False)), ], ) def test_predict(data, optimizer): x, t = data model = SINDy(optimizer=optimizer) model.fit(x, t) x_dot = model.predict(x)
"cls, support", [(Lasso, True), (STLSQ, True), (SR3, True), (DummyLinearModel, False)], ) def test_supports_multiple_targets(cls, support): assert supports_multiple_targets(cls()) == support @pytest.fixture(params=["data_derivative_1d", "data_derivative_2d"]) def data(request): return request.getfixturevalue(request.param) @pytest.mark.parametrize( "optimizer", [ STLSQ(), SR3(), Lasso(fit_intercept=False), ElasticNet(fit_intercept=False), DummyLinearModel(), ], ) def test_fit(data, optimizer): x, x_dot = data if len(x.shape) == 1: x = x.reshape(-1, 1) opt = SINDyOptimizer(optimizer, unbias=False) opt.fit(x, x_dot) check_is_fitted(opt) assert opt.complexity >= 0
(DummyLinearModel, False), ], ) def test_supports_multiple_targets(cls, support): assert supports_multiple_targets(cls()) == support @pytest.fixture(params=["data_derivative_1d", "data_derivative_2d"]) def data(request): return request.getfixturevalue(request.param) @pytest.mark.parametrize( "optimizer", [ STLSQ(), SR3(), ConstrainedSR3(), TrappingSR3(), Lasso(fit_intercept=False), ElasticNet(fit_intercept=False), DummyLinearModel(), ], ) def test_fit(data, optimizer): x, x_dot = data if len(x.shape) == 1: x = x.reshape(-1, 1) opt = SINDyOptimizer(optimizer, unbias=False) opt.fit(x, x_dot)
np.testing.assert_allclose(model.coefficients(), model_t_default.coefficients()) np.testing.assert_almost_equal(model.score(x, t=dt), model_t_default.score(x)) np.testing.assert_almost_equal(model.differentiate(x, t=dt), model_t_default.differentiate(x)) @pytest.mark.parametrize( "data", [pytest.lazy_fixture("data_1d"), pytest.lazy_fixture("data_lorenz")]) @pytest.mark.parametrize( "optimizer", [ STLSQ(), SR3(), ConstrainedSR3(), Lasso(fit_intercept=False), ElasticNet(fit_intercept=False), ], ) def test_predict(data, optimizer): x, t = data model = SINDy(optimizer=optimizer) model.fit(x, t) x_dot = model.predict(x) assert x.shape == x_dot.shape
(DummyLinearModel, False), ], ) def test_supports_multiple_targets(cls, support): assert supports_multiple_targets(cls()) == support @pytest.fixture(params=["data_derivative_1d", "data_derivative_2d"]) def data(request): return request.getfixturevalue(request.param) @pytest.mark.parametrize( "optimizer", [ STLSQ(), SR3(), ConstrainedSR3(), Lasso(fit_intercept=False), ElasticNet(fit_intercept=False), DummyLinearModel(), ], ) def test_fit(data, optimizer): x, x_dot = data if len(x.shape) == 1: x = x.reshape(-1, 1) opt = SINDyOptimizer(optimizer, unbias=False) opt.fit(x, x_dot) check_is_fitted(opt)
def TrainSTLSQ( X: np.ndarray, y: np.ndarray, alpha: float, delta_threshold: float, max_iterations: int = 100, test_size: float = 0.2, random_state: int = 0, ) -> np.ndarray: """PDE-FIND sparsity selection algorithm. Based on method described by Rudy et al. (10.1126/sciadv.1602614). Args: X (np.ndarray): Training input data of shape (n_samples, n_features). y (np.ndarray): Training target data of shape (n_samples, n_outputs). alpha (float): Magnitude of the L2 regularization. delta_threshold (float): Initial stepsize for the search of the threshold max_iterations (int, optional): Maximum number of iterations. Defaults to 100. test_size (float, optional): Fraction of the data that is assigned to the test-set. Defaults to 0.2. random_state (int, optional): Defaults to 0. Returns: np.ndarray: Coefficient vector. """ # Split data X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state) # Set up the initial tolerance l0 penalty and estimates l0 = 1e-3 * np.linalg.cond(X) delta_t = delta_threshold # for interal use, can be updated # Initial estimate optimizer = STLSQ(threshold=0, alpha=0.0, fit_intercept=False) # Now similar to LSTSQ y_predict = optimizer.fit(X_train, y_train).predict(X_test) min_loss = np.linalg.norm(y_predict - y_test, 2) + l0 * np.count_nonzero(optimizer.coef_) # Setting alpha and tolerance best_threshold = delta_t threshold = delta_t for iteration in np.arange(max_iterations): optimizer.set_params(alpha=alpha, threshold=threshold) y_predict = optimizer.fit(X_train, y_train).predict(X_test) loss = np.linalg.norm(y_predict - y_test, 2) + l0 * np.count_nonzero(optimizer.coef_) if (loss <= min_loss) and not (np.all(optimizer.coef_ == 0)): min_loss = loss best_threshold = threshold threshold += delta_threshold else: # if loss increases, we need to a) lower the current threshold and/or decrease step size new_lower_threshold = np.max([0, threshold - 2 * delta_t]) delta_t = 2 * delta_t / (max_iterations - iteration) threshold = new_lower_threshold + delta_t optimizer.set_params(alpha=alpha, threshold=best_threshold) optimizer.fit(X_train, y_train) return optimizer.coef_