Ejemplo n.º 1
0
    def test_grid_fpca_transform_result(self):

        n_components = 1

        fd_data = fetch_weather()['data'].coordinates[0]

        fpca = FPCA(n_components=n_components, weights=[1] * 365)
        fpca.fit(fd_data)
        scores = fpca.transform(fd_data)

        # results obtained
        results = [[-77.05020176], [-90.56072204], [-82.39565947],
                   [-114.45375934], [-69.99735931], [-64.44894047],
                   [135.58336775], [-14.93460852], [0.75024737], [-36.4781038],
                   [-42.35637749], [-73.98910492], [-67.11253749],
                   [-103.68269798], [-104.65948079],
                   [-7.42817782], [7.48125036], [56.29792942], [181.00258791],
                   [-3.53294736], [37.94673912], [124.43819913], [-7.04274676],
                   [-49.61134859], [-136.86256785], [-184.03502398],
                   [-181.72835749], [-51.06323208], [-137.85606731],
                   [50.10941466], [151.68118097], [159.01360046],
                   [217.17981302], [234.40195237], [345.39374006]]
        results = np.array(results)

        np.testing.assert_allclose(scores, results, rtol=1e-6)
Ejemplo n.º 2
0
    def test_basis_fpca_transform_result(self):

        n_basis = 9
        n_components = 3

        fd_data = fetch_weather()['data'].coordinates[0]
        fd_data = FDataGrid(np.squeeze(fd_data.data_matrix),
                            np.arange(0.5, 365, 1))

        # initialize basis data
        basis = Fourier(n_basis=n_basis, domain_range=(0, 365))
        fd_basis = fd_data.to_basis(basis)

        fpca = FPCA(n_components=n_components,
                    regularization=TikhonovRegularization(
                        LinearDifferentialOperator(2),
                        regularization_parameter=1e5))
        fpca.fit(fd_basis)
        scores = fpca.transform(fd_basis)

        # results obtained using Ramsay's R package
        results = [[-7.68307641e+01, 5.69034443e+01, -1.22440149e+01],
                   [-9.02873996e+01, 1.46262257e+01, -1.78574536e+01],
                   [-8.21155683e+01, 3.19159491e+01, -2.56212328e+01],
                   [-1.14163637e+02, 3.66425562e+01, -1.00810836e+01],
                   [-6.97263223e+01, 1.22817168e+01, -2.39417618e+01],
                   [-6.41886364e+01, -1.07261045e+01, -1.10587407e+01],
                   [1.35824412e+02, 2.03484658e+01, -9.04815324e+00],
                   [-1.46816399e+01, -2.66867491e+01, -1.20233465e+01],
                   [1.02507511e+00, -2.29840736e+01, -9.06081296e+00],
                   [-3.62936903e+01, -2.09520442e+01, -1.14799951e+01],
                   [-4.20649313e+01, -1.13618094e+01, -6.24909009e+00],
                   [-7.38115985e+01, -3.18423866e+01, -1.50298626e+01],
                   [-6.69822456e+01, -3.35518632e+01, -1.25167352e+01],
                   [-1.03534763e+02, -1.29513941e+01, -1.49103879e+01],
                   [-1.04542036e+02, -1.36794907e+01, -1.41555965e+01],
                   [-7.35863347e+00, -1.41171956e+01, -2.97562788e+00],
                   [7.28804530e+00, -5.34421830e+01, -3.39823418e+00],
                   [5.59974094e+01, -4.02154080e+01, 3.78800103e-01],
                   [1.80778702e+02, 1.87798201e+01, -1.99043247e+01],
                   [-3.69700617e+00, -4.19441020e+01, 6.45820740e+00],
                   [3.76527216e+01, -4.23056953e+01, 1.04221757e+01],
                   [1.23850646e+02, -4.24648130e+01, -2.22336786e-01],
                   [-7.23588457e+00, -1.20579536e+01, 2.07502089e+01],
                   [-4.96871011e+01, 8.88483448e+00, 2.02882768e+01],
                   [-1.36726355e+02, -1.86472599e+01, 1.89076217e+01],
                   [-1.83878661e+02, 4.12118550e+01, 1.78960356e+01],
                   [-1.81568820e+02, 5.20817910e+01, 2.01078870e+01],
                   [-5.08775852e+01, 1.34600555e+01, 3.18602712e+01],
                   [-1.37633866e+02, 7.50809631e+01, 2.42320782e+01],
                   [4.98276375e+01, 1.33401270e+00, 3.50611066e+01],
                   [1.51149934e+02, -5.47417776e+01, 3.97592325e+01],
                   [1.58366096e+02, -3.80762686e+01, -5.62415023e+00],
                   [2.17139548e+02, 6.34055987e+01, -1.98853635e+01],
                   [2.33615480e+02, -7.90787574e-02, 2.69069525e+00],
                   [3.45371437e+02, 9.58703622e+01, 8.47570770e+00]]
        results = np.array(results)

        # compare results
        np.testing.assert_allclose(scores, results, atol=1e-7)
Ejemplo n.º 3
0
class BasisRegression(object):
    """Class implementing functional linear models with basis functions for vector-valued covariates.

    Parameters
    ----------
    nbasis: int
        Number of basis functions.

    basis_type: str, default='bspline'
        Type of basis used, possible values are 'bspline', 'fourier' and 'fPCA'

    Attributes
    ----------
    reg: object
        Instance of skfda.ml.regression.LinearRegression

    coef: array, default=None
        Regression coefficients

    fpca_basis: object
        If basis_type='fPCA', instance of skfda.preprocessing.dim_reduction.projection.FPCA().
    """
    def __init__(self, nbasis, basis_type='bspline'):
        self.nbasis = nbasis
        self.reg = LinearRegression()
        self.basis_type = basis_type
        self.coef = None
        if self.basis_type == 'fPCA':
            self.fpca_basis = FPCA(self.nbasis)

    def data_to_basis(self, X, fit_fPCA=True):
        """Project the data to basis functions.

        Parameters
        ----------
        X: array, shape (n,n_points,d)
            Array of paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise linear paths,
            each composed of n_points.

        fit_fPCA: boolean, default=True
            If n_basis='fPCA' and fit_fPCA=True, the basis functions are fitted to be the functional principal
            components of X.

        Returns
        -------
        fd_basis: object
            Instance of skfda.representation.basis.FDataBasis, the basis representation of X, where the type of basis is
            determined by self.n_basis.
        """
        grid_points = np.linspace(0, 1, X.shape[1])
        fd = FDataGrid(X, grid_points)
        basis_vec = []
        for i in range(X.shape[2]):
            if self.basis_type == 'bspline':
                basis_vec.append(BSpline(n_basis=self.nbasis))
            elif self.basis_type == 'fourier':
                basis_vec.append(Fourier(n_basis=self.nbasis))
            elif self.basis_type == 'fPCA':
                basis_vec.append(BSpline(n_basis=7))

        basis = VectorValued(basis_vec)
        fd_basis = fd.to_basis(basis)
        if self.basis_type == 'fPCA':
            if fit_fPCA:
                self.fpca_basis = self.fpca_basis.fit(fd_basis)
            fd_basis = self.fpca_basis.transform(fd_basis)
        return fd_basis

    def fit(self, X, Y):
        """Fit the functional linear model to X and Y

        Parameters
        ----------
        X: array, shape (n,n_points,d)
            Array of training paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise
            linear paths, each composed of n_points.

        Y: array, shape (n)
            Array of target values.

        Returns
        -------
        reg: object
            Instance of skfda.ml.regression.LinearRegression
        """
        fd_basis = self.data_to_basis(X)
        self.reg.fit(fd_basis, Y)
        self.coef = self.reg.coef_
        return self.reg

    def predict(self, X):
        """Predict the output of self.reg for X.

        Parameters
        ----------
        X: array, shape (n,n_points,d)
            Array of training paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise
            linear paths, each composed of n_points.

        Returns
        -------
        Ypred: array, shape (n)
            Array of predicted values.
        """
        fd_basis = self.data_to_basis(X, fit_fPCA=False)
        return self.reg.predict(fd_basis)

    def get_loss(self, X, Y, plot=False):
        """Computes the empirical squared loss obtained with the functional linear model

        Parameters
        ----------
        X: array, shape (n,n_points,d)
            Array of training paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise
            linear paths, each composed of n_points.

        Y: array, shape (n)
            Array of target values.

        plot: boolean, default=False
            If True, plots the regression coefficients and a scatter plot of the target values Y against its predicted
            values Ypred to assess the quality of the fit.

        Returns
        -------
        hatL: float
            The squared loss, that is the sum of the squares of Y-Ypred, where Ypred are the fitted values of the Ridge
            regression of Y against signatures of X truncated at k.
        """
        Ypred = self.predict(X)
        if plot:
            plt.scatter(Y, Ypred)
            plt.plot([0.9 * np.min(Y), 1.1 * np.max(Y)],
                     [0.9 * np.min(Y), 1.1 * np.max(Y)],
                     '--',
                     color='black')
            plt.title("Ypred against Y")
            plt.show()
        return np.mean((Y - Ypred)**2)