Exemple #1
0
    def test_digitize(self):
        for x in self.data:
            x_sparse = csr_matrix(x)
            bins = np.arange(-2, 2)

            x_shape = x.shape
            np.testing.assert_array_equal(
                np.digitize(x.flatten(), bins).reshape(x_shape),
                digitize(x, bins), 'Digitize fails on dense data')
            np.testing.assert_array_equal(
                np.digitize(x.flatten(), bins).reshape(x_shape),
                digitize(x_sparse, bins), 'Digitize fails on sparse data')
Exemple #2
0
    def test_digitize_1d_array(self):
        """A consistent return shape must be returned for both sparse and dense."""
        x = np.array([0, 1, 1, 0, np.nan, 0, 1])
        x_sparse = csr_matrix(x)
        bins = np.arange(-2, 2)

        x_shape = x.shape
        np.testing.assert_array_equal(
            [np.digitize(x.flatten(), bins).reshape(x_shape)],
            digitize(x, bins), 'Digitize fails on 1d dense data')
        np.testing.assert_array_equal(
            [np.digitize(x.flatten(), bins).reshape(x_shape)],
            digitize(x_sparse, bins), 'Digitize fails on 1d sparse data')
Exemple #3
0
    def test_digitize_right(self):
        for x in self.data:
            x_sparse = csr_matrix(x)
            bins = np.arange(-2, 2)

            x_shape = x.shape
            np.testing.assert_array_equal(
                np.digitize(x.flatten(), bins, right=True).reshape(x_shape),
                digitize(x, bins, right=True),
                'Digitize fails on dense data'
            )
            np.testing.assert_array_equal(
                np.digitize(x.flatten(), bins, right=True).reshape(x_shape),
                digitize(x_sparse, bins, right=True),
                'Digitize fails on sparse data'
            )
Exemple #4
0
    def _get_colors(self):
        """Compute colors for different kinds of histograms."""
        if self.target_var and self.target_var.is_discrete:
            colors = [[QColor(*color)
                       for color in self.target_var.colors]] * self.n_bins

        elif self.target_var and self.target_var.is_continuous:
            palette = ContinuousPaletteGenerator(*self.target_var.colors)

            bins = np.arange(self.n_bins)[:, np.newaxis]
            edges = self.edges if self.attribute.is_discrete else self.edges[
                1:-1]
            # Need to digitize on `right` here so the samples will be assigned
            # to the correct bin for coloring
            bin_indices = ut.digitize(self.x, bins=edges, right=True)
            mask = bin_indices == bins

            colors = []
            for bin_idx in range(self.n_bins):
                biny = self.y[mask[bin_idx]]
                if np.isfinite(biny).any():
                    mean = ut.nanmean(biny) / ut.nanmax(self.y)
                else:
                    mean = 0  # bin is empty, color does not matter
                colors.append([palette[mean]])

        else:
            colors = [[QColor('#ccc')]] * self.n_bins

        return colors
Exemple #5
0
    def _get_colors(self):
        """Compute colors for different kinds of histograms."""
        target = self.target_var
        if target and target.is_discrete:
            colors = [list(target.palette)[:len(target.values)]] * self.n_bins

        elif self.target_var and self.target_var.is_continuous:
            palette = self.target_var.palette

            bins = np.arange(self.n_bins)[:, np.newaxis]
            edges = self.edges if self.attribute.is_discrete else self.edges[
                1:-1]
            bin_indices = ut.digitize(self.x, bins=edges)
            mask = bin_indices == bins

            colors = []
            for bin_idx in range(self.n_bins):
                biny = self.y[mask[bin_idx]]
                if np.isfinite(biny).any():
                    mean = ut.nanmean(biny) / ut.nanmax(self.y)
                else:
                    mean = 0  # bin is empty, color does not matter
                colors.append([palette.value_to_qcolor(mean)])

        else:
            colors = [[QColor('#ccc')]] * self.n_bins

        return colors
Exemple #6
0
 def test_digitize_sparse_zeroth_bin(self):
     # Setup the data so that the '0's will fit into the '0'th bin.
     data = csr_matrix([[0, 0, 0, 1, 1, 0, 0, 1, 0],
                        [0, 0, 1, 1, 0, 0, 1, 0, 0]])
     bins = np.array([1])
     # Then digitize should return a sparse matrix
     self.assertTrue(issparse(digitize(data, bins)))
Exemple #7
0
    def _get_colors(self):
        """Compute colors for different kinds of histograms."""
        if self.target_var and self.target_var.is_discrete:
            colors = [[QColor(*color) for color in self.target_var.colors]] * self.n_bins

        elif self.target_var and self.target_var.is_continuous:
            palette = ContinuousPaletteGenerator(*self.target_var.colors)

            bins = np.arange(self.n_bins)[:, np.newaxis]
            edges = self.edges if self.attribute.is_discrete else self.edges[1:-1]
            # Need to digitize on `right` here so the samples will be assigned
            # to the correct bin for coloring
            bin_indices = ut.digitize(self.x, bins=edges, right=True)
            mask = bin_indices == bins

            colors = []
            for bin_idx in range(self.n_bins):
                biny = self.y[mask[bin_idx]]
                if np.isfinite(biny).any():
                    mean = ut.nanmean(biny) / ut.nanmax(self.y)
                else:
                    mean = 0  # bin is empty, color does not matter
                colors.append([palette[mean]])

        else:
            colors = [[QColor('#ccc')]] * self.n_bins

        return colors
Exemple #8
0
    def test_digitize_1d_array(self):
        """A consistent return shape must be returned for both sparse and dense."""
        x = np.array([0, 1, 1, 0, np.nan, 0, 1])
        x_sparse = csr_matrix(x)
        bins = np.arange(-2, 2)

        x_shape = x.shape
        np.testing.assert_array_equal(
            [np.digitize(x.flatten(), bins).reshape(x_shape)],
            digitize(x, bins),
            'Digitize fails on 1d dense data'
        )
        np.testing.assert_array_equal(
            [np.digitize(x.flatten(), bins).reshape(x_shape)],
            digitize(x_sparse, bins),
            'Digitize fails on 1d sparse data'
        )
Exemple #9
0
 def test_digitize_sparse_zeroth_bin(self):
     # Setup the data so that the '0's will fit into the '0'th bin.
     data = csr_matrix([
         [0, 0, 0, 1, 1, 0, 0, 1, 0],
         [0, 0, 1, 1, 0, 0, 1, 0, 0],
     ])
     bins = np.array([1])
     # Then digitize should return a sparse matrix
     self.assertTrue(issparse(digitize(data, bins)))
Exemple #10
0
    def test_digitize_right(self, array):
        for x_original in self.data:
            x = array(x_original)
            bins = np.arange(-2, 2)

            x_shape = x.shape
            np.testing.assert_array_equal(
                np.digitize(x_original.flatten(), bins,
                            right=True).reshape(x_shape),
                digitize(x, bins, right=True))
Exemple #11
0
    def test_digitize_right(self, array):
        for x_original in self.data:
            x = array(x_original)
            bins = np.arange(-2, 2)

            x_shape = x.shape
            np.testing.assert_array_equal(
                np.digitize(x_original.flatten(), bins, right=True).reshape(x_shape),
                digitize(x, bins, right=True)
            )
Exemple #12
0
    def test_digitize_1d_array(self, array):
        """A consistent return shape must be returned for both sparse and dense."""
        x_original = np.array([0, 1, 1, 0, np.nan, 0, 1])
        x = array(x_original)
        bins = np.arange(-2, 2)

        x_shape = x_original.shape
        np.testing.assert_array_equal(
            [np.digitize(x_original.flatten(), bins).reshape(x_shape)],
            digitize(x, bins),
        )
Exemple #13
0
    def test_digitize_1d_array(self, array):
        """A consistent return shape must be returned for both sparse and dense."""
        x_original = np.array([0, 1, 1, 0, np.nan, 0, 1])
        x = array(x_original)
        bins = np.arange(-2, 2)

        x_shape = x_original.shape
        np.testing.assert_array_equal(
            [np.digitize(x_original.flatten(), bins).reshape(x_shape)],
            digitize(x, bins),
        )
Exemple #14
0
    def _histogram(self):
        assert self.x.size > 0, 'Cannot calculate histogram on empty array'
        edges = self._get_histogram_edges()

        if self.attribute.is_discrete:
            bin_indices = self.x
            # TODO It probably isn't a very good idea to convert a sparse row
            # to a dense array... Converts sparse to 1d numpy array
            if sp.issparse(bin_indices):
                bin_indices = np.squeeze(
                    np.asarray(bin_indices.todense(), dtype=np.int64))
        elif self.attribute.is_continuous:
            bin_indices = ut.digitize(self.x, bins=edges[1:-1]).flatten()

        distributions = self._get_bin_distributions(bin_indices)

        return edges, distributions
    def _histogram(self):
        edges = self._get_histogram_edges()

        if self.attribute.is_discrete:
            bin_indices = self.x
            # TODO It probably isn't a very good idea to convert a sparse row
            # to a dense array... Converts sparse to 1d numpy array
            if sp.issparse(bin_indices):
                bin_indices = np.squeeze(
                    np.asarray(bin_indices.todense(), dtype=np.int64))
        elif self.attribute.is_continuous:
            # TODO: Digitize throws nans into first bin. This is incorrect.
            bin_indices = ut.digitize(self.x, bins=edges[1:-1]).flatten()

        distributions = self._get_bin_distributions(bin_indices)

        return edges, distributions
Exemple #16
0
    def _histogram(self):
        assert self.x.size > 0, 'Cannot calculate histogram on empty array'
        edges = self._get_histogram_edges()

        if self.attribute.is_discrete:
            bin_indices = self.x
            # TODO It probably isn't a very good idea to convert a sparse row
            # to a dense array... Converts sparse to 1d numpy array
            if sp.issparse(bin_indices):
                bin_indices = np.squeeze(np.asarray(
                    bin_indices.todense(), dtype=np.int64
                ))
        elif self.attribute.is_continuous:
            bin_indices = ut.digitize(self.x, bins=edges[1:-1]).flatten()

        distributions = self._get_bin_distributions(bin_indices)

        return edges, distributions
    def _get_colors(self):
        """Compute colors for different kinds of histograms."""
        if self.target_var and self.target_var.is_discrete:
            colors = [[QColor(*color)
                       for color in self.target_var.colors]] * self.n_bins

        elif self.target_var and self.target_var.is_continuous:
            palette = ContinuousPaletteGenerator(*self.target_var.colors)

            bins = np.arange(self.n_bins)[:, np.newaxis]
            edges = self.edges if self.attribute.is_discrete else self.edges[
                1:-1]
            bin_indices = ut.digitize(self.x, bins=edges)
            mask = bin_indices == bins

            colors = []
            for bin_idx in range(self.n_bins):
                mean = ut.nanmean(self.y[mask[bin_idx]], axis=0) / self.y.max()
                colors.append([palette[mean]])

        else:
            colors = [[QColor('#ccc')]] * self.n_bins

        return colors