def predict(self, x, multiple_trajectories=False): """ Predict the time derivatives using the SINDy model. Parameters ---------- x: array-like or list of array-like, shape (n_samples, n_input_features) Samples. multiple_trajectories: boolean, optional (default False) If True, x contains multiple trajectories and must be a list of data from each trajectory. If False, x is a single trajectory. Returns ------- x_dot: array-like or list of array-like, shape (n_samples, n_input_features) Predicted time derivatives """ check_is_fitted(self, "model") if multiple_trajectories: x = [validate_input(xi) for xi in x] return [self.model.predict(xi) for xi in x] else: x = validate_input(x) return self.model.predict(x)
def predict(self, x, forcing_input, multiple_trajectories=False): """ Predict the time derivatives using the SINDy model. Parameters ---------- x: array-like or list of array-like, shape (n_samples, n_input_features) Samples. multiple_trajectories: boolean, optional (default False) If True, x contains multiple trajectories and must be a list of data from each trajectory. If False, x is a single trajectory. Returns ------- x_dot: array-like or list of array-like, shape (n_samples, n_input_features) Predicted time derivatives """ if hasattr(self, "model"): if multiple_trajectories: x = [validate_input(xi) for xi in x] return [self.model.predict(xi) for xi in x] else: x = validate_input(x) if hasattr(self, "model"): return self.model.predict(x, forcing_input=forcing_input) else: raise NotFittedError("SINDy model must be fit before predict can be called")
def fit( self, x, t=1, x_dot=None, forcing_input=None, initial_forcing_params=None, multiple_trajectories=False, unbias=True, quiet=False, ): if multiple_trajectories: x, x_dot = self.process_multiple_trajectories(x, t, x_dot) else: x = validate_input(x, t) if self.discrete_time: if x_dot is None: x_dot = x[1:] x = x[:-1] else: x_dot = validate_input(x) else: if x_dot is None: x_dot = self.differentiation_method(x, t) else: x_dot = validate_input(x_dot, t) # Drop rows where derivative isn't known x, x_dot = drop_nan_rows(x, x_dot) steps = [("features", self.feature_library), ("model", self.optimizer)] self.model = Pipeline(steps) action = "ignore" if quiet else "default" with warnings.catch_warnings(): warnings.filterwarnings(action, category=ConvergenceWarning) warnings.filterwarnings(action, category=LinAlgWarning) warnings.filterwarnings(action, category=UserWarning) self.model.fit( x, x_dot, model__forcing_input=forcing_input, model__initial_forcing_params=initial_forcing_params, ) self.n_input_features_ = self.model.steps[0][1].n_input_features_ self.n_output_features_ = self.model.steps[0][1].n_output_features_ if self.feature_names is None: feature_names = [] for i in range(self.n_input_features_): feature_names.append("x" + str(i)) self.feature_names = feature_names return self
def predict(self, x, u=None, multiple_trajectories=False): """ Predict the time derivatives using the SINDy model. Parameters ---------- x: array-like or list of array-like, shape (n_samples, n_input_features) Samples. u: array-like or list of array-like, shape(n_samples, n_control_features), \ (default None) Control variables. If ``multiple_trajectories=True`` then u must be a list of control variable data from each trajectory. If the model was fit with control variables then u is not optional. multiple_trajectories: boolean, optional (default False) If True, x contains multiple trajectories and must be a list of data from each trajectory. If False, x is a single trajectory. Returns ------- x_dot: array-like or list of array-like, shape (n_samples, n_input_features) Predicted time derivatives """ check_is_fitted(self, "model") if u is None or self.n_control_features_ == 0: if self.n_control_features_ > 0: raise TypeError( "Model was fit using control variables, so u is required" ) elif u is not None: warnings.warn( "Control variables u were ignored because control variables were" " not used when the model was fit" ) if multiple_trajectories: x = [validate_input(xi) for xi in x] return [self.model.predict(xi) for xi in x] else: x = validate_input(x) return self.model.predict(x) else: if multiple_trajectories: x = [validate_input(xi) for xi in x] u = validate_control_variables( x, u, multiple_trajectories=True, return_array=False ) return [ self.model.predict(concatenate((xi, ui), axis=1)) for xi, ui in zip(x, u) ] else: x = validate_input(x) u = validate_control_variables(x, u) return self.model.predict(concatenate((x, u), axis=1))
def differentiate(self, x, t=1, multiple_trajectories=False): """ Apply the model's differentiation method to data. Parameters ---------- x: array-like or list of array-like, shape (n_samples, n_input_features) Data to be differentiated. t: int, numpy array of shape [n_samples], or list of numpy arrays, optional \ (default 1) Time step between samples or array of collection times. Default is a time step of 1 between samples. multiple_trajectories: boolean, optional (default False) If True, x contains multiple trajectories and must be a list of data from each trajectory. If False, x is a single trajectory. Returns ------- x_dot: array-like or list of array-like, shape (n_samples, n_input_features) Time derivatives computed by using the model's differentiation method """ if self.discrete_time: raise RuntimeError("No differentiation implemented for discrete time model") if multiple_trajectories: return self.process_multiple_trajectories(x, t, None, return_array=False)[1] else: x = validate_input(x, t) return self.differentiation_method(x, t)
def process_multiple_trajectories(self, x, t, x_dot, return_array=True): """ Handle input data that contains multiple trajectories by doing the necessary validation, reshaping, and computation of derivatives. """ if not isinstance(x, Sequence): raise TypeError("Input x must be a list") if self.discrete_time: x = [validate_input(xi) for xi in x] if x_dot is None: x_dot = [xi[1:] for xi in x] x = [xi[:-1] for xi in x] else: if not isinstance(x_dot, Sequence): raise TypeError( "x_dot must be a list if used with x of list type " "(i.e. for multiple trajectories)") x_dot = [validate_input(xd) for xd in x_dot] else: if x_dot is None: if isinstance(t, Sequence): x = [validate_input(xi, ti) for xi, ti in zip(x, t)] x_dot = [ self.differentiation_method(xi, ti) for xi, ti in zip(x, t) ] else: x = [validate_input(xi, t) for xi in x] x_dot = [self.differentiation_method(xi, t) for xi in x] else: if not isinstance(x_dot, Sequence): raise TypeError( "x_dot must be a list if used with x of list type " "(i.e. for multiple trajectories)") if isinstance(t, Sequence): x = [validate_input(xi, ti) for xi, ti in zip(x, t)] x_dot = [ validate_input(xd, ti) for xd, ti in zip(x_dot, t) ] else: x = [validate_input(xi, t) for xi in x] x_dot = [validate_input(xd, t) for xd in x_dot] if return_array: return vstack(x), vstack(x_dot) else: return x, x_dot
def score( self, x, t=None, x_dot=None, u=None, multiple_trajectories=False, metric=r2_score, **metric_kws ): """ Returns a score for the time derivative prediction. Parameters ---------- x: array-like or list of array-like, shape (n_samples, n_input_features) Samples t: float, numpy array of shape [n_samples], or list of numpy arrays, optional \ (default None) Time step between samples or array of collection times. Optional, used to compute the time derivatives of the samples if x_dot is not provided. If None, the default time step ``t_default`` will be used. x_dot: array-like or list of array-like, shape (n_samples, n_input_features), \ optional (default None) Optional pre-computed derivatives of the samples. If provided, these values will be used to compute the score. If not provided, the time derivatives of the training data will be computed using the specified differentiation method. u: array-like or list of array-like, shape(n_samples, n_control_features), \ optional (default None) Control variables. If ``multiple_trajectories=True`` then u must be a list of control variable data from each trajectory. If the model was fit with control variables then u is not optional. multiple_trajectories: boolean, optional (default False) If True, x contains multiple trajectories and must be a list of data from each trajectory. If False, x is a single trajectory. metric: metric function, optional Metric function with which to score the prediction. Default is the coefficient of determination R^2. metric_kws: dict, optional Optional keyword arguments to pass to the metric function. Returns ------- score: float Metric function value for the model prediction of x_dot. """ if t is None: t = self.t_default if u is None or self.n_control_features_ == 0: if self.n_control_features_ > 0: raise TypeError( "Model was fit using control variables, so u is required" ) elif u is not None: warnings.warn( "Control variables u were ignored because control variables were" " not used when the model was fit" ) else: trim_last_point = self.discrete_time and (x_dot is None) u = validate_control_variables( x, u, multiple_trajectories=multiple_trajectories, trim_last_point=trim_last_point, ) if multiple_trajectories: x, x_dot = self._process_multiple_trajectories( x, t, x_dot, return_array=True ) else: x = validate_input(x, t) if x_dot is None: if self.discrete_time: x_dot = x[1:] x = x[:-1] else: x_dot = self.differentiation_method(x, t) if ndim(x_dot) == 1: x_dot = x_dot.reshape(-1, 1) # Append control variables if u is not None and self.n_control_features_ > 0: x = concatenate((x, u), axis=1) # Drop rows where derivative isn't known (usually endpoints) x, x_dot = drop_nan_rows(x, x_dot) x_dot_predict = self.model.predict(x) return metric(x_dot, x_dot_predict, **metric_kws)
def fit( self, x, t=None, x_dot=None, u=None, multiple_trajectories=False, unbias=True, quiet=False, ): """ Fit the SINDy model. Parameters ---------- x: array-like or list of array-like, shape (n_samples, n_input_features) Training data. If training data contains multiple trajectories, x should be a list containing data for each trajectory. Individual trajectories may contain different numbers of samples. t: float, numpy array of shape [n_samples], or list of numpy arrays, optional \ (default None) If t is a float, it specifies the timestep between each sample. If array-like, it specifies the time at which each sample was collected. In this case the values in t must be strictly increasing. In the case of multi-trajectory training data, t may also be a list of arrays containing the collection times for each individual trajectory. If None, the default time step ``t_default`` will be used. x_dot: array-like or list of array-like, shape (n_samples, n_input_features), \ optional (default None) Optional pre-computed derivatives of the training data. If not provided, the time derivatives of the training data will be computed using the specified differentiation method. If x_dot is provided, it must match the shape of the training data and these values will be used as the time derivatives. u: array-like or list of array-like, shape (n_samples, n_control_features), \ optional (default None) Control variables/inputs. Include this variable to use sparse identification for nonlinear dynamical systems for control (SINDYc). If training data contains multiple trajectories (i.e. if x is a list of array-like), then u should be a list containing control variable data for each trajectory. Individual trajectories may contain different numbers of samples. multiple_trajectories: boolean, optional, (default False) Whether or not the training data includes multiple trajectories. If True, the training data must be a list of arrays containing data for each trajectory. If False, the training data must be a single array. unbias: boolean, optional (default True) Whether to perform an extra step of unregularized linear regression to unbias the coefficients for the identified support. If the optimizer (``SINDy.optimizer``) applies any type of regularization, that regularization may bias coefficients toward particular values, improving the conditioning of the problem but harming the quality of the fit. Setting ``unbias=True`` enables an extra step wherein unregularized linear regression is applied, but only for the coefficients in the support identified by the optimizer. This helps to remove the bias introduced by regularization. quiet: boolean, optional (default False) Whether or not to suppress warnings during model fitting. Returns ------- self: returns an instance of self """ if t is None: t = self.t_default if u is None: self.n_control_features_ = 0 else: trim_last_point = self.discrete_time and (x_dot is None) u = validate_control_variables( x, u, multiple_trajectories=multiple_trajectories, trim_last_point=trim_last_point, ) self.n_control_features_ = u.shape[1] if multiple_trajectories: x, x_dot = self._process_multiple_trajectories(x, t, x_dot) else: x = validate_input(x, t) if self.discrete_time: if x_dot is None: x_dot = x[1:] x = x[:-1] else: x_dot = validate_input(x_dot) else: if x_dot is None: x_dot = self.differentiation_method(x, t) else: x_dot = validate_input(x_dot, t) # Append control variables if self.n_control_features_ > 0: x = concatenate((x, u), axis=1) # Drop rows where derivative isn't known x, x_dot = drop_nan_rows(x, x_dot) optimizer = SINDyOptimizer(self.optimizer, unbias=unbias) steps = [("features", self.feature_library), ("model", optimizer)] self.model = Pipeline(steps) action = "ignore" if quiet else "default" with warnings.catch_warnings(): warnings.filterwarnings(action, category=ConvergenceWarning) warnings.filterwarnings(action, category=LinAlgWarning) warnings.filterwarnings(action, category=UserWarning) self.model.fit(x, x_dot) self.n_input_features_ = self.model.steps[0][1].n_input_features_ self.n_output_features_ = self.model.steps[0][1].n_output_features_ if self.feature_names is None: feature_names = [] for i in range(self.n_input_features_ - self.n_control_features_): feature_names.append("x" + str(i)) for i in range(self.n_control_features_): feature_names.append("u" + str(i)) self.feature_names = feature_names return self
def __call__(self, x, t=1): x = validate_input(x, t=t) return self._differentiate(x, t)
def score( self, x, t=1, x_dot=None, multiple_trajectories=False, metric=r2_score, **metric_kws ): """ Returns a score for the time derivative prediction. Parameters ---------- x: array-like or list of array-like, shape (n_samples, n_input_features) Samples t: float, numpy array of shape [n_samples], or list of numpy arrays, optional \ (default 1) Time step between samples or array of collection times. Optional, used to compute the time derivatives of the samples if x_dot is not provided. x_dot: array-like or list of array-like, shape (n_samples, n_input_features), \ optional Optional pre-computed derivatives of the samples. If provided, these values will be used to compute the score. If not provided, the time derivatives of the training data will be computed using the specified differentiation method. multiple_trajectories: boolean, optional (default False) If True, x contains multiple trajectories and must be a list of data from each trajectory. If False, x is a single trajectory. metric: metric function, optional Metric function with which to score the prediction. Default is the coefficient of determination R^2. metric_kws: dict, optional Optional keyword arguments to pass to the metric function. Returns ------- score: float Metric function value for the model prediction of x_dot. """ if multiple_trajectories: x, x_dot = self.process_multiple_trajectories( x, t, x_dot, return_array=True ) else: x = validate_input(x, t) if x_dot is None: if self.discrete_time: x_dot = x[1:] x = x[:-1] else: x_dot = self.differentiation_method(x, t) if ndim(x_dot) == 1: x_dot = x_dot.reshape(-1, 1) # Drop rows where derivative isn't known (usually endpoints) x, x_dot = drop_nan_rows(x, x_dot) x_dot_predict = self.model.predict(x) return metric(x_dot_predict, x_dot, **metric_kws)