def __init__(self, grid: FDataGrid, smoothed=False): self.init_grid = grid.copy() self.sample_points = self.init_grid.sample_points[0] self._nSeries = self.init_grid.data_matrix.shape[0] self._nObs = self.init_grid.data_matrix.shape[1] self._nVar = self.init_grid.data_matrix.shape[2] self.coordinates_grids = list(self.init_grid.coordinates) self.coordinate_names = self.init_grid.coordinate_names self._smoothed = smoothed if self._smoothed == True: self.coordinates_grids_dx1 = [ grid.derivative(order=1) for grid in self.coordinates_grids ] self.coordinates_grids_dx2 = [ grid.derivative(order=2) for grid in self.coordinates_grids ] self._scaled = False
def _fit_grid(self, X: FDataGrid, y=None): r"""Computes the n_components first principal components and saves them. The eigenvalues associated with these principal components are also saved. For more details about how it is implemented please view the referenced book, chapter 8. In summary, we are performing standard multivariate PCA over :math:`\frac{1}{\sqrt{N}} \mathbf{X} \mathbf{W}^{1/2}` where :math:`N` is the number of samples in the dataset, :math:`\mathbf{X}` is the data matrix and :math:`\mathbf{W}` is the weight matrix (this matrix defines the numerical integration). By default the weight matrix is obtained using the trapezoidal rule. Args: X (FDataGrid): the functional data object to be analysed in basis representation y (None, not used): only present for convention of a fit function Returns: self (object) References: .. [RS05-8-4-1] Ramsay, J., Silverman, B. W. (2005). Discretizing the functions. In *Functional Data Analysis* (p. 161). Springer. """ # check that the number of components is smaller than the sample size if self.n_components > X.n_samples: raise AttributeError("The sample size must be bigger than the " "number of components") # check that we do not exceed limits for n_components as it should # be smaller than the number of attributes of the funcional data object if self.n_components > X.data_matrix.shape[1]: raise AttributeError("The number of components should be " "smaller than the number of discretization " "points of the functional data object.") # data matrix initialization fd_data = X.data_matrix.reshape(X.data_matrix.shape[:-1]) # get the number of samples and the number of points of descretization n_samples, n_points_discretization = fd_data.shape # if centering is True then subtract the mean function to each function # in FDataBasis X = self._center_if_necessary(X) # establish weights for each point of discretization if not self.weights: # sample_points is a list with one array in the 1D case # in trapezoidal rule, suppose \deltax_k = x_k - x_{k-1}, the weight # vector is as follows: [\deltax_1/2, \deltax_1/2 + \deltax_2/2, # \deltax_2/2 + \deltax_3/2, ... , \deltax_n/2] differences = np.diff(X.sample_points[0]) differences = np.concatenate(((0, ), differences, (0, ))) self.weights = (differences[:-1] + differences[1:]) / 2 elif callable(self.weights): self.weights = self.weights(X.sample_points[0]) # if its a FDataGrid then we need to reduce the dimension to 1-D # array if isinstance(self.weights, FDataGrid): self.weights = np.squeeze(self.weights.data_matrix) weights_matrix = np.diag(self.weights) basis = FDataGrid(data_matrix=np.identity(n_points_discretization), sample_points=X.sample_points) regularization_matrix = compute_penalty_matrix( basis_iterable=(basis, ), regularization_parameter=1, regularization=self.regularization) fd_data = np.transpose( np.linalg.solve( np.transpose(basis.data_matrix[..., 0] + regularization_matrix), np.transpose(fd_data))) # see docstring for more information final_matrix = fd_data @ np.sqrt(weights_matrix) / np.sqrt(n_samples) pca = PCA(n_components=self.n_components) pca.fit(final_matrix) self.components_ = X.copy(data_matrix=np.transpose( np.linalg.solve(np.sqrt(weights_matrix), np.transpose(pca.components_)))) self.explained_variance_ratio_ = pca.explained_variance_ratio_ self.explained_variance_ = pca.explained_variance_ return self