예제 #1
0
def test_single_categorical_col_samples(model):
    n = 1000
    x = [int(y) for y in mu.sample([model], [0], n=n).flatten()]
    cts = np.bincount(x, minlength=3)

    assert cts[2] > cts[1]
    assert cts[0] > cts[2]
예제 #2
0
def test_single_categorical_col_samples(model):
    n = 1000
    x = [int(y) for y in mu.sample([model], [0], n=n).flatten()]
    cts = np.bincount(x, minlength=3)

    assert cts[2] > cts[1]
    assert cts[0] > cts[2]
예제 #3
0
    def sample(self, cols, given=None, n=1):
        """ Draw samples from cols

        Parameters
        ----------
        cols : list(index)
            List of columns from which to jointly draw.
        given : list(tuple(int, value))
            List of column-value tuples that specify conditions
        n : int
            The number of samples to draw

        Examples
        --------
        Draw whether an animal is fast and agile given that it is bulbous.

        >>> engine = Engine.load('examples/zoo.bcmodels')
        >>> engine.sample(['fast', 'agile'], given=[('bulbous': 1]))
        """
        # FIXME: make sure that given does not contain columns from cols
        if given is not None:
            given = du.convert_given(given, self._dtypes, self._converters)

        col_idxs = [self._converters['col2idx'][col] for col in cols]

        data_out = mu.sample(self._models, col_idxs, given=given, n=n)

        x = du.convert_data(data_out,
                            cols,
                            self._dtypes,
                            self._converters,
                            to_val=True)

        if x.shape == (
                1,
                1,
        ):
            return x[0, 0]
        elif x.shape[0] == 1:
            return x[0, :]
        else:
            return x
예제 #4
0
    def entropy(self, col, n_samples=500):
        """ The entropy of a column.

        Notes
        -----
        Returns differential entropy for continuous feature (obviously).

        Parameters
        ----------
        col : indexer
            The name of the column
        n_samples : int
            The number of samples to use for the Monte Carlo approximation
            (if `col` is categorical).

        Returns
        -------
        h : float
            The entropy of `col`.
        """

        col_idx = self._converters['col2idx'][col]
        dtype = self._dtypes[col_idx]

        # Unless x is enumerable (is categorical), we approximate h(x) using
        # an importance sampling extimate of h(x) using p(x) as the importance
        # function.
        if dtype == 'categorical':
            k = self._distargs[col_idx][0]
            x = np.array([[i] for i in range(k)])
            logps = mu.probability(x, self._models, (col_idx, ))
            assert logps.shape == (k, )
            h = -np.sum(np.exp(logps) * logps)
        else:
            x = mu.sample(self._models, (col_idx, ), n=n_samples)
            logps = mu.probability(x, self._models, (col_idx, ))

            h = -np.sum(logps) / n_samples

        return h
예제 #5
0
    def entropy(self, col, n_samples=500):
        """ The entropy of a column.

        Notes
        -----
        Returns differential entropy for continuous feature (obviously).

        Parameters
        ----------
        col : indexer
            The name of the column
        n_samples : int
            The number of samples to use for the Monte Carlo approximation
            (if `col` is categorical).

        Returns
        -------
        h : float
            The entropy of `col`.
        """

        col_idx = self._converters['col2idx'][col]
        dtype = self._dtypes[col_idx]

        # Unless x is enumerable (is categorical), we approximate h(x) using
        # an importance sampling extimate of h(x) using p(x) as the importance
        # function.
        if dtype == 'categorical':
            k = self._distargs[col_idx][0]
            x = np.array([[i] for i in range(k)])
            logps = mu.probability(x, self._models, (col_idx,))
            assert logps.shape == (k,)
            h = -np.sum(np.exp(logps)*logps)
        else:
            x = mu.sample(self._models, (col_idx,), n=n_samples)
            logps = mu.probability(x, self._models, (col_idx,))

            h = -np.sum(logps) / n_samples

        return h
예제 #6
0
    def sample(self, cols, given=None, n=1):
        """ Draw samples from cols

        Parameters
        ----------
        cols : list(index)
            List of columns from which to jointly draw.
        given : list(tuple(int, value))
            List of column-value tuples that specify conditions
        n : int
            The number of samples to draw

        Examples
        --------
        Draw whether an animal is fast and agile given that it is bulbous.

        >>> engine = Engine.load('examples/zoo.bcmodels')
        >>> engine.sample(['fast', 'agile'], given=[('bulbous': 1]))
        """
        # FIXME: make sure that given does not contain columns from cols
        if given is not None:
            given = du.convert_given(given, self._dtypes, self._converters)

        col_idxs = [self._converters['col2idx'][col] for col in cols]

        data_out = mu.sample(self._models, col_idxs, given=given, n=n)

        x = du.convert_data(data_out, cols, self._dtypes, self._converters,
                            to_val=True)

        if x.shape == (1, 1,):
            return x[0, 0]
        elif x.shape[0] == 1:
            return x[0, :]
        else:
            return x