def test_rmh(self): n_samples = 10000 n_features = 100 def mean_1(t): return (np.abs(t - 0.25) - 2 * np.abs(t - 0.5) + np.abs(t - 0.75)) X_0 = make_gaussian_process(n_samples=n_samples // 2, n_features=n_features, random_state=0) X_1 = make_gaussian_process(n_samples=n_samples // 2, n_features=n_features, mean=mean_1, random_state=1) X = skfda.concatenate((X_0, X_1)) y = np.zeros(n_samples) y[n_samples // 2:] = 1 correction = vs.recursive_maxima_hunting.GaussianSampleCorrection() stopping_condition = vs.recursive_maxima_hunting.ScoreThresholdStop( threshold=0.05) rmh = vs.RecursiveMaximaHunting(correction=correction, stopping_condition=stopping_condition) _ = rmh.fit(X, y) point_mask = rmh.get_support() points = X.grid_points[0][point_mask] np.testing.assert_allclose(points, [0.25, 0.5, 0.75], rtol=1e-1)
def get_XY_gaussian_process(self, n): """ Generates n gaussian processes with a linear trend Parameters ---------- n: int Number of samples. Returns ------- X: array, shape (n, self.npoints, self.d) Array of sample paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise linear paths, each composed of npoints. """ X = np.zeros((n, self.npoints, self.d)) times = np.repeat(np.expand_dims(np.linspace(0, 1, self.npoints), -1), n * self.d, 1) times = times.reshape((self.npoints, n, self.d)).transpose((1, 0, 2)) slope = 3 * (2 * np.random.random((n, self.d)) - 1) Y = np.sqrt(np.sum(slope ** 2, axis=1)) slope = np.repeat(np.expand_dims(slope, 0), self.npoints, 0).transpose((1, 0, 2)) for i in range(n): gp = make_gaussian_process(n_features=self.npoints, n_samples=self.d, cov=Exponential()) X[i, :, :] = gp.data_matrix.T[0] X = X + slope * times return X, Y
def _anova_bootstrap(fd_grouped, n_reps, random_state=None, p=2, equal_var=True): n_groups = len(fd_grouped) if n_groups < 2: raise ValueError("At least two groups must be passed in fd_grouped.") for fd in fd_grouped[1:]: if not np.array_equal(fd.domain_range, fd_grouped[0].domain_range): raise ValueError("Domain range must match for every FData in " "fd_grouped.") start, stop = fd_grouped[0].domain_range[0] sizes = [fd.n_samples for fd in fd_grouped] # List with sizes of each group # Instance a random state object in case random_state is an int random_state = check_random_state(random_state) if equal_var: k_est = concatenate(fd_grouped).cov().data_matrix[0, ..., 0] k_est = [k_est] * len(fd_grouped) else: # Estimating covariances for each group k_est = [fd.cov().data_matrix[0, ..., 0] for fd in fd_grouped] # Number of sample points for gaussian processes have to match # the features of the covariances. n_features = k_est[0].shape[0] # Simulating n_reps observations for each of the n_groups gaussian # processes sim = [make_gaussian_process(n_reps, n_features=n_features, start=start, stop=stop, cov=k_est[i], random_state=random_state) for i in range(n_groups)] v_samples = np.empty(n_reps) for i in range(n_reps): fd = FDataGrid([s.data_matrix[i, ..., 0] for s in sim]) v_samples[i] = v_asymptotic_stat(fd, sizes, p=p) return v_samples
groups = np.full(n_samples * n_groups, 'Sample 1') groups[10:20] = 'Sample 2' groups[20:] = 'Sample 3' ############################################################################### # First simulation uses a low :math:`\sigma^2 = 0.01` value. In this case the # differences between the means of each group should be clear, and the # p-value for the test should be near to zero. sigma2 = 0.01 cov = WhiteNoise(variance=sigma2) fd1 = make_gaussian_process(n_samples, mean=m1, cov=cov, n_features=n_features, random_state=1, start=start, stop=stop) fd2 = make_gaussian_process(n_samples, mean=m2, cov=cov, n_features=n_features, random_state=2, start=start, stop=stop) fd3 = make_gaussian_process(n_samples, mean=m3, cov=cov, n_features=n_features, random_state=3,
# In order to instantiate a # :class:`~skfda.exploratory.visualization.SurfaceBoxplot`, a functional data # object with bidimensional domain must be generated. In this example, a # FDataGrid representing a function # :math:`f : \mathbb{R}^2\longmapsto\mathbb{R}` is constructed, # using as an example a Brownian process extruded into another dimension. # # The values of the Brownian process are generated using # :func:`~skfda.datasets.make_gaussian_process`, # Those functions return FDataGrid objects whose ``data_matrix`` # store the values needed. n_samples = 10 n_features = 10 fd = make_gaussian_process(n_samples=n_samples, n_features=n_features, random_state=1) fd.dataset_name = "Brownian process" ############################################################################## # After, those values generated for one dimension on the domain are extruded # along another dimension, obtaining a three-dimensional matrix or cube # (two-dimensional domain and one-dimensional image). cube = np.repeat(fd.data_matrix, n_features).reshape( (n_samples, n_features, n_features)) ############################################################################## # We can plot now the extruded trajectories. fd_2 = FDataGrid(data_matrix=cube,