コード例 #1
0
ファイル: test_sorted_grid.py プロジェクト: stuart23/KDEpy
    def test_regression_issue_grids(self):
        """
        This test is based on Issue 15, raised by @blasern.

        https://github.com/tommyod/KDEpy/issues/15
        """

        # The original example by @blasern. Should NOT pass the verification.
        grid_size = 20
        min_X = np.array([-2.6, -4.0])
        max_X = np.array([3.2, 7.7])
        grid_margins = tuple(
            np.linspace(mn, mx, grid_size) for mn, mx in zip(min_X, max_X))
        grid = np.stack(np.meshgrid(*grid_margins),
                        -1).reshape(-1, len(grid_margins))
        assert not grid_is_sorted(grid)

        # More minimal example, should also fail.
        grid_x = np.linspace(-2, 2, 2**5)
        grid_y = np.linspace(-2, 2, 2**4)
        grid = np.stack(np.meshgrid(grid_x, grid_y), -1).reshape(-1, 2)
        assert not grid_is_sorted(grid)

        # Changing the above slightly, should also fail.
        grid = np.stack(np.meshgrid(grid_y, grid_x), -1).reshape(-1, 2)
        assert not grid_is_sorted(grid)

        # Swapping the indices should work
        grid = np.stack(np.meshgrid(grid_y, grid_x), -1).reshape(-1, 2)
        grid[:, [0, 1]] = grid[:, [1, 0]]  # Swap indices.
        assert grid_is_sorted(grid)
コード例 #2
0
ファイル: test_sorted_grid.py プロジェクト: stuart23/KDEpy
    def test_on_bad_grids(self):
        """
        Test on grids that are good.
        """
        grid = np.array([[0], [0], [2], [1], [1], [1], [2], [2], [2]],
                        dtype=float)
        assert not grid_is_sorted(grid)

        grid = np.array(
            [[0, 0], [0, 1], [0, 2], [1, 0], [1, 2], [1, 1], [2, 0], [2, 1],
             [2, 2]],
            dtype=float,
        )
        assert not grid_is_sorted(grid)

        grid = np.array([[1, 1], [3, 3], [2, 2]], dtype=float)
        assert not grid_is_sorted(grid)

        grid = np.array(
            [
                [-4.1, -4.1, -4.1],
                [-4.1, -4.1, 4.1],
                [-4.1, 0.0, -4.1],
                [-4.1, 0.0, 4.1],
                [-4.1, 4.1, -4.1],
                [-4.1, 4.1, 4.1],
                [-1.4, -4.1, -4.1],
                [-1.4, 0.0, -4.1],
                [-1.4, -4.1, 4.1],
                [-1.4, 0.0, 4.1],
                [-1.4, 4.1, -4.1],
                [-1.4, 4.1, 4.1],
                [1.4, -4.1, -4.1],
                [1.4, -4.1, 4.1],
                [1.4, 0.0, -4.1],
                [1.4, 0.0, 4.1],
                [1.4, 4.1, -4.1],
                [1.4, 4.1, 4.1],
                [4.1, -4.1, -4.1],
                [4.1, -4.1, 4.1],
                [4.1, 0.0, -4.1],
                [4.1, 0.0, 4.1],
                [4.1, 4.1, -4.1],
                [4.1, 4.1, 4.1],
            ],
            dtype=float,
        )
        assert not grid_is_sorted(grid)
コード例 #3
0
ファイル: FFTKDE.py プロジェクト: stuart23/KDEpy
    def evaluate(self, grid_points=None):
        """
        Evaluate on equidistant grid points.

        Parameters
        ----------
        grid_points: array-like, int, tuple or None
            A grid (mesh) to evaluate on. High dimensional grids must have
            shape (obs, dims). If an integer is passed, it's the number of grid
            points on an equidistant grid. If a tuple is passed, it's the
            number of grid points in each dimension. If None, a grid will be
            automatically created.

        Returns
        -------
        y: array-like
            If a grid is supplied, `y` is returned. If no grid is supplied,
            a tuple (`x`, `y`) is returned.

        Examples
        --------
        >>> kde = FFTKDE().fit([1, 3, 4, 7])
        >>> # Three ways to evaluate a fitted KDE object:
        >>> x, y = kde.evaluate()  # (1) Auto grid
        >>> x, y = kde.evaluate(256)  # (2) Auto grid with 256 points
        >>> # (3) Use a custom grid (make sure it's wider than the data)
        >>> x_grid = np.linspace(-10, 25, num=2**10)  # <- Must be equidistant
        >>> y = kde.evaluate(x_grid)  # Notice that only y is returned
        """

        # This method sets self.grid_points and verifies it
        super().evaluate(grid_points)

        # Extra verification for FFTKDE (checking the sorting property)
        if not grid_is_sorted(self.grid_points):
            raise ValueError("The grid must be sorted.")

        if isinstance(self.bw, numbers.Number) and self.bw > 0:
            bw = self.bw
        else:
            raise ValueError("The bw must be a number.")
        self.bw = bw

        # Step 0 - Make sure data points are inside of the grid
        min_grid = np.min(self.grid_points, axis=0)
        max_grid = np.max(self.grid_points, axis=0)

        min_data = np.min(self.data, axis=0)
        max_data = np.max(self.data, axis=0)
        if not ((min_grid < min_data).all() and (max_grid > max_data).all()):
            raise ValueError("Every data point must be inside of the grid.")

        # Step 1 - Obtaining the grid counts
        # TODO: Consider moving this to the fitting phase instead
        data = linear_binning(self.data,
                              grid_points=self.grid_points,
                              weights=self.weights)

        # Step 2 - Computing kernel weights
        g_shape = self.grid_points.shape[1]
        num_grid_points = np.array(
            list(
                len(np.unique(self.grid_points[:, i]))
                for i in range(g_shape)))

        num_intervals = num_grid_points - 1
        dx = (max_grid - min_grid) / num_intervals

        # Find the real bandwidth, the support times the desired bw factor
        if self.kernel.finite_support:
            real_bw = self.kernel.support * self.bw
        else:
            # The parent class should compute this already. If not, compute
            # it again. This optimization only dominates a little bit with
            # few data points
            try:
                real_bw = self._kernel_practical_support
            except AttributeError:
                real_bw = self.kernel.practical_support(self.bw)

        # Compute L, the number of dx'es to move out from 0 in kernel
        L = np.minimum(np.floor(real_bw / dx), num_intervals + 1)
        assert (dx * L <= real_bw).all()

        # Evaluate the kernel once
        grids = [
            np.linspace(-dx * L, dx * L, int(L * 2 + 1))
            for (dx, L) in zip(dx, L)
        ]
        kernel_grid = cartesian(grids)
        kernel_weights = self.kernel(kernel_grid, bw=self.bw, norm=self.norm)

        # Reshape in preparation to
        kernel_weights = kernel_weights.reshape(*[int(k * 2 + 1) for k in L])
        data = data.reshape(*tuple(num_grid_points))

        # Step 3 - Performing the convolution

        # The following code block surpressed the warning:
        #        anaconda3/lib/python3.6/site-packages/mkl_fft/_numpy_fft.py:
        #            FutureWarning: Using a non-tuple sequence for multidimensional ...
        #        output = mkl_fft.rfftn_numpy(a, s, axes)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            ans = convolve(data, kernel_weights, mode="same").reshape(-1, 1)

        return self._evalate_return_logic(ans, self.grid_points)