def test_lp_error_grid_points(self): grid_points = [1, 2, 4, 4.3, 5] fd2 = FDataGrid([[2, 3, 4, 5, 6], [1, 4, 9, 16, 25]], grid_points=grid_points) with np.testing.assert_raises(ValueError): lp_distance(self.fd, fd2)
def test_lp_error_domain_ranges(self): sample_points = [2, 3, 4, 5, 6] fd2 = FDataGrid([[2, 3, 4, 5, 6], [1, 4, 9, 16, 25]], sample_points=sample_points) with np.testing.assert_raises(ValueError): lp_distance(self.fd, fd2)
def test_lp_grid_basis(self): np.testing.assert_allclose(lp_distance(self.fd, self.fd_basis), 0) np.testing.assert_allclose(lp_distance(self.fd_basis, self.fd), 0) np.testing.assert_allclose( lp_distance(self.fd_basis, self.fd_basis, eval_points=[1, 2, 3, 4, 5]), 0) np.testing.assert_allclose(lp_distance(self.fd_basis, self.fd_basis), 0)
def test_kneighbors(self): """Test k neighbor searches for all k-neighbors estimators""" nn = NearestNeighbors() nn.fit(self.X) lof = LocalOutlierFactor(n_neighbors=5) lof.fit(self.X) knn = KNeighborsClassifier() knn.fit(self.X, self.y) knnr = KNeighborsRegressor() knnr.fit(self.X, self.modes_location) for neigh in [nn, knn, knnr, lof]: dist, links = neigh.kneighbors(self.X[:4]) np.testing.assert_array_equal(links, [[0, 7, 21, 23, 15], [1, 12, 19, 18, 17], [2, 17, 22, 27, 26], [3, 4, 9, 5, 25]]) graph = neigh.kneighbors_graph(self.X[:4]) dist_kneigh = lp_distance(self.X[0], self.X[7]) np.testing.assert_array_almost_equal(dist[0, 1], dist_kneigh) for i in range(30): self.assertEqual(graph[0, i] == 1.0, i in links[0]) self.assertEqual(graph[0, i] == 0.0, i not in links[0])
def test_radius_neighbors(self): """Test query with radius""" nn = NearestNeighbors(radius=.1) nn.fit(self.X) knn = RadiusNeighborsClassifier(radius=.1) knn.fit(self.X, self.y) knnr = RadiusNeighborsRegressor(radius=.1) knnr.fit(self.X, self.modes_location) for neigh in [nn, knn, knnr]: dist, links = neigh.radius_neighbors(self.X[:4]) np.testing.assert_array_equal(links[0], np.array([0, 7])) np.testing.assert_array_equal(links[1], np.array([1])) np.testing.assert_array_equal(links[2], np.array([2, 17, 22, 27])) np.testing.assert_array_equal(links[3], np.array([3, 4, 9])) dist_kneigh = lp_distance(self.X[0], self.X[7]) np.testing.assert_array_almost_equal(dist[0][1], dist_kneigh) graph = neigh.radius_neighbors_graph(self.X[:4]) for i in range(30): self.assertEqual(graph[0, i] == 1.0, i in links[0]) self.assertEqual(graph[0, i] == 0.0, i not in links[0])
def test_lp_error_dimensions(self): # Case internal arrays with np.testing.assert_raises(ValueError): lp_distance(self.fd, self.fd_surface) with np.testing.assert_raises(ValueError): lp_distance(self.fd, self.fd_curve) with np.testing.assert_raises(ValueError): lp_distance(self.fd_surface, self.fd_curve)
def v_sample_stat(fd, weights, p=2): r""" Calculates a statistic that measures the variability between groups of samples in a :class:`skfda.representation.FData` object. The statistic defined as below is calculated between all the samples in a :class:`skfda.representation.FData` object with a given set of weights. Let :math:`\{f_i\}_{i=1}^k` be a set of samples in a FData object. Let :math:`\{w_j\}_{j=1}^k` be a set of weights, where :math:`w_i` is related to the sample :math:`f_i` for :math:`i=1,\dots,k`. The statistic is defined as: .. math:: V_n = \sum_{i<j}^kw_i\|f_i-f_j\|^2 This statistic is defined in Cuevas[1]. Args: fd (FData): Object containing all the samples for which we want to calculate the statistic. weights (list of int): Weights related to each sample. Each weight is expected to appear in the same position as its corresponding sample in the FData object. p (int, optional): p of the lp norm. Must be greater or equal than 1. If p='inf' or p=np.inf it is used the L infinity metric. Defaults to 2. Returns: The value of the statistic. Raises: ValueError Examples: >>> from skfda.inference.anova import v_sample_stat >>> from skfda.representation.grid import FDataGrid >>> import numpy as np We create different trajectories to be applied in the statistic and a set of weights. >>> t = np.linspace(0, 1, 50) >>> x1 = t * (1 - t) ** 5 >>> x2 = t ** 2 * (1 - t) ** 4 >>> x3 = t ** 3 * (1 - t) ** 3 >>> fd = FDataGrid([x1, x2, x3], sample_points=t) >>> weights = [10, 20, 30] Finally the value of the statistic is calculated: >>> v_sample_stat(fd, weights) 0.01649448843348894 References: [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An anova test for functional data". *Computational Statistics Data Analysis*, 47:111-112, 02 2004 """ weights = np.asarray(weights) if not isinstance(fd, FData): raise ValueError("Argument type must inherit FData.") if len(weights) != fd.n_samples: raise ValueError("Number of weights must match number of samples.") t_ind = np.tril_indices(fd.n_samples, -1) coef = weights[t_ind[1]] return np.sum(coef * lp_distance(fd[t_ind[0]], fd[t_ind[1]], p=p) ** p)