Exemple #1
0
    def test_lp_error_grid_points(self):
        grid_points = [1, 2, 4, 4.3, 5]
        fd2 = FDataGrid([[2, 3, 4, 5, 6], [1, 4, 9, 16, 25]],
                        grid_points=grid_points)

        with np.testing.assert_raises(ValueError):
            lp_distance(self.fd, fd2)
Exemple #2
0
    def test_lp_error_domain_ranges(self):
        sample_points = [2, 3, 4, 5, 6]
        fd2 = FDataGrid([[2, 3, 4, 5, 6], [1, 4, 9, 16, 25]],
                        sample_points=sample_points)

        with np.testing.assert_raises(ValueError):
            lp_distance(self.fd, fd2)
Exemple #3
0
    def test_lp_grid_basis(self):

        np.testing.assert_allclose(lp_distance(self.fd, self.fd_basis), 0)
        np.testing.assert_allclose(lp_distance(self.fd_basis, self.fd), 0)
        np.testing.assert_allclose(
            lp_distance(self.fd_basis,
                        self.fd_basis,
                        eval_points=[1, 2, 3, 4, 5]), 0)
        np.testing.assert_allclose(lp_distance(self.fd_basis, self.fd_basis),
                                   0)
    def test_kneighbors(self):
        """Test k neighbor searches for all k-neighbors estimators"""

        nn = NearestNeighbors()
        nn.fit(self.X)

        lof = LocalOutlierFactor(n_neighbors=5)
        lof.fit(self.X)

        knn = KNeighborsClassifier()
        knn.fit(self.X, self.y)

        knnr = KNeighborsRegressor()
        knnr.fit(self.X, self.modes_location)

        for neigh in [nn, knn, knnr, lof]:

            dist, links = neigh.kneighbors(self.X[:4])

            np.testing.assert_array_equal(links, [[0, 7, 21, 23, 15],
                                                  [1, 12, 19, 18, 17],
                                                  [2, 17, 22, 27, 26],
                                                  [3, 4, 9, 5, 25]])

            graph = neigh.kneighbors_graph(self.X[:4])

            dist_kneigh = lp_distance(self.X[0], self.X[7])

            np.testing.assert_array_almost_equal(dist[0, 1], dist_kneigh)

            for i in range(30):
                self.assertEqual(graph[0, i] == 1.0, i in links[0])
                self.assertEqual(graph[0, i] == 0.0, i not in links[0])
    def test_radius_neighbors(self):
        """Test query with radius"""
        nn = NearestNeighbors(radius=.1)
        nn.fit(self.X)

        knn = RadiusNeighborsClassifier(radius=.1)
        knn.fit(self.X, self.y)

        knnr = RadiusNeighborsRegressor(radius=.1)
        knnr.fit(self.X, self.modes_location)

        for neigh in [nn, knn, knnr]:

            dist, links = neigh.radius_neighbors(self.X[:4])

            np.testing.assert_array_equal(links[0], np.array([0, 7]))
            np.testing.assert_array_equal(links[1], np.array([1]))
            np.testing.assert_array_equal(links[2], np.array([2, 17, 22, 27]))
            np.testing.assert_array_equal(links[3], np.array([3, 4, 9]))

            dist_kneigh = lp_distance(self.X[0], self.X[7])

            np.testing.assert_array_almost_equal(dist[0][1], dist_kneigh)

            graph = neigh.radius_neighbors_graph(self.X[:4])

            for i in range(30):
                self.assertEqual(graph[0, i] == 1.0, i in links[0])
                self.assertEqual(graph[0, i] == 0.0, i not in links[0])
Exemple #6
0
    def test_lp_error_dimensions(self):
        # Case internal arrays
        with np.testing.assert_raises(ValueError):
            lp_distance(self.fd, self.fd_surface)

        with np.testing.assert_raises(ValueError):
            lp_distance(self.fd, self.fd_curve)

        with np.testing.assert_raises(ValueError):
            lp_distance(self.fd_surface, self.fd_curve)
Exemple #7
0
def v_sample_stat(fd, weights, p=2):
    r"""
    Calculates a statistic that measures the variability between groups of
    samples in a :class:`skfda.representation.FData` object.

    The statistic defined as below is calculated between all the samples in a
    :class:`skfda.representation.FData` object with a given set of
    weights.

    Let :math:`\{f_i\}_{i=1}^k` be a set of samples in a FData object.
    Let :math:`\{w_j\}_{j=1}^k` be a set of weights, where :math:`w_i` is
    related to the sample :math:`f_i` for :math:`i=1,\dots,k`.
    The statistic is defined as:

    .. math::
        V_n = \sum_{i<j}^kw_i\|f_i-f_j\|^2

    This statistic is defined in Cuevas[1].

    Args:
         fd (FData): Object containing all the samples for which we want
            to calculate the statistic.
         weights (list of int): Weights related to each sample. Each
            weight is expected to appear in the same position as its
            corresponding sample in the FData object.
        p (int, optional): p of the lp norm. Must be greater or equal
            than 1. If p='inf' or p=np.inf it is used the L infinity metric.
            Defaults to 2.

    Returns:
        The value of the statistic.

    Raises:
        ValueError

    Examples:

        >>> from skfda.inference.anova import v_sample_stat
        >>> from skfda.representation.grid import FDataGrid
        >>> import numpy as np

        We create different trajectories to be applied in the statistic and a
        set of weights.

        >>> t = np.linspace(0, 1, 50)
        >>> x1 = t * (1 - t) ** 5
        >>> x2 = t ** 2 * (1 - t) ** 4
        >>> x3 = t ** 3 * (1 - t) ** 3
        >>> fd = FDataGrid([x1, x2, x3], sample_points=t)
        >>> weights = [10, 20, 30]

        Finally the value of the statistic is calculated:

        >>> v_sample_stat(fd, weights)
        0.01649448843348894

    References:
        [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An
        anova test for functional data". *Computational Statistics  Data
        Analysis*, 47:111-112, 02 2004
    """

    weights = np.asarray(weights)
    if not isinstance(fd, FData):
        raise ValueError("Argument type must inherit FData.")
    if len(weights) != fd.n_samples:
        raise ValueError("Number of weights must match number of samples.")

    t_ind = np.tril_indices(fd.n_samples, -1)
    coef = weights[t_ind[1]]
    return np.sum(coef * lp_distance(fd[t_ind[0]], fd[t_ind[1]], p=p) ** p)