def test_regression_issue_grids(self): """ This test is based on Issue 15, raised by @blasern. https://github.com/tommyod/KDEpy/issues/15 """ # The original example by @blasern. Should NOT pass the verification. grid_size = 20 min_X = np.array([-2.6, -4.0]) max_X = np.array([3.2, 7.7]) grid_margins = tuple( np.linspace(mn, mx, grid_size) for mn, mx in zip(min_X, max_X)) grid = np.stack(np.meshgrid(*grid_margins), -1).reshape(-1, len(grid_margins)) assert not grid_is_sorted(grid) # More minimal example, should also fail. grid_x = np.linspace(-2, 2, 2**5) grid_y = np.linspace(-2, 2, 2**4) grid = np.stack(np.meshgrid(grid_x, grid_y), -1).reshape(-1, 2) assert not grid_is_sorted(grid) # Changing the above slightly, should also fail. grid = np.stack(np.meshgrid(grid_y, grid_x), -1).reshape(-1, 2) assert not grid_is_sorted(grid) # Swapping the indices should work grid = np.stack(np.meshgrid(grid_y, grid_x), -1).reshape(-1, 2) grid[:, [0, 1]] = grid[:, [1, 0]] # Swap indices. assert grid_is_sorted(grid)
def test_on_bad_grids(self): """ Test on grids that are good. """ grid = np.array([[0], [0], [2], [1], [1], [1], [2], [2], [2]], dtype=float) assert not grid_is_sorted(grid) grid = np.array( [[0, 0], [0, 1], [0, 2], [1, 0], [1, 2], [1, 1], [2, 0], [2, 1], [2, 2]], dtype=float, ) assert not grid_is_sorted(grid) grid = np.array([[1, 1], [3, 3], [2, 2]], dtype=float) assert not grid_is_sorted(grid) grid = np.array( [ [-4.1, -4.1, -4.1], [-4.1, -4.1, 4.1], [-4.1, 0.0, -4.1], [-4.1, 0.0, 4.1], [-4.1, 4.1, -4.1], [-4.1, 4.1, 4.1], [-1.4, -4.1, -4.1], [-1.4, 0.0, -4.1], [-1.4, -4.1, 4.1], [-1.4, 0.0, 4.1], [-1.4, 4.1, -4.1], [-1.4, 4.1, 4.1], [1.4, -4.1, -4.1], [1.4, -4.1, 4.1], [1.4, 0.0, -4.1], [1.4, 0.0, 4.1], [1.4, 4.1, -4.1], [1.4, 4.1, 4.1], [4.1, -4.1, -4.1], [4.1, -4.1, 4.1], [4.1, 0.0, -4.1], [4.1, 0.0, 4.1], [4.1, 4.1, -4.1], [4.1, 4.1, 4.1], ], dtype=float, ) assert not grid_is_sorted(grid)
def evaluate(self, grid_points=None): """ Evaluate on equidistant grid points. Parameters ---------- grid_points: array-like, int, tuple or None A grid (mesh) to evaluate on. High dimensional grids must have shape (obs, dims). If an integer is passed, it's the number of grid points on an equidistant grid. If a tuple is passed, it's the number of grid points in each dimension. If None, a grid will be automatically created. Returns ------- y: array-like If a grid is supplied, `y` is returned. If no grid is supplied, a tuple (`x`, `y`) is returned. Examples -------- >>> kde = FFTKDE().fit([1, 3, 4, 7]) >>> # Three ways to evaluate a fitted KDE object: >>> x, y = kde.evaluate() # (1) Auto grid >>> x, y = kde.evaluate(256) # (2) Auto grid with 256 points >>> # (3) Use a custom grid (make sure it's wider than the data) >>> x_grid = np.linspace(-10, 25, num=2**10) # <- Must be equidistant >>> y = kde.evaluate(x_grid) # Notice that only y is returned """ # This method sets self.grid_points and verifies it super().evaluate(grid_points) # Extra verification for FFTKDE (checking the sorting property) if not grid_is_sorted(self.grid_points): raise ValueError("The grid must be sorted.") if isinstance(self.bw, numbers.Number) and self.bw > 0: bw = self.bw else: raise ValueError("The bw must be a number.") self.bw = bw # Step 0 - Make sure data points are inside of the grid min_grid = np.min(self.grid_points, axis=0) max_grid = np.max(self.grid_points, axis=0) min_data = np.min(self.data, axis=0) max_data = np.max(self.data, axis=0) if not ((min_grid < min_data).all() and (max_grid > max_data).all()): raise ValueError("Every data point must be inside of the grid.") # Step 1 - Obtaining the grid counts # TODO: Consider moving this to the fitting phase instead data = linear_binning(self.data, grid_points=self.grid_points, weights=self.weights) # Step 2 - Computing kernel weights g_shape = self.grid_points.shape[1] num_grid_points = np.array( list( len(np.unique(self.grid_points[:, i])) for i in range(g_shape))) num_intervals = num_grid_points - 1 dx = (max_grid - min_grid) / num_intervals # Find the real bandwidth, the support times the desired bw factor if self.kernel.finite_support: real_bw = self.kernel.support * self.bw else: # The parent class should compute this already. If not, compute # it again. This optimization only dominates a little bit with # few data points try: real_bw = self._kernel_practical_support except AttributeError: real_bw = self.kernel.practical_support(self.bw) # Compute L, the number of dx'es to move out from 0 in kernel L = np.minimum(np.floor(real_bw / dx), num_intervals + 1) assert (dx * L <= real_bw).all() # Evaluate the kernel once grids = [ np.linspace(-dx * L, dx * L, int(L * 2 + 1)) for (dx, L) in zip(dx, L) ] kernel_grid = cartesian(grids) kernel_weights = self.kernel(kernel_grid, bw=self.bw, norm=self.norm) # Reshape in preparation to kernel_weights = kernel_weights.reshape(*[int(k * 2 + 1) for k in L]) data = data.reshape(*tuple(num_grid_points)) # Step 3 - Performing the convolution # The following code block surpressed the warning: # anaconda3/lib/python3.6/site-packages/mkl_fft/_numpy_fft.py: # FutureWarning: Using a non-tuple sequence for multidimensional ... # output = mkl_fft.rfftn_numpy(a, s, axes) with warnings.catch_warnings(): warnings.simplefilter("ignore") ans = convolve(data, kernel_weights, mode="same").reshape(-1, 1) return self._evalate_return_logic(ans, self.grid_points)