Beispiel #1
0
    def probability(self, x, cols, given=None):
        """ Predictive probability of x_1, ..., x_n given y_1, ..., y_n

        Parameters
        ----------
        x : numpy.ndarray(2,)
            2-D numpy array where each row is a set of observations and each
            column corresponds to a feature.
        cols : list
            The names of each column/feature of `x`.
        given : list(tuple)
            List of (name, value,) conditional contraints for the probability

        Returns
        -------
        logps : numpy.ndarray

        Examples
        --------
        The probability that an animal is fast and agile given that it is
        bulbous.

        >>> engine = Engine.load('examples/zoo.bcmodels')
        >>> engine.probability(np.array([[1, 1]]), ['fast', 'agile'],
        ...                    given=[('bulbous': 1]))
        """
        # TODO: make sure that given goes not caontain columns rom cols
        x = du.format_query_data(x)
        col_idxs = [self._converters['col2idx'][col] for col in cols]
        x_cnv = du.convert_data(x, cols, self._dtypes, self._converters)

        if given is not None:
            given = du.convert_given(given, self._dtypes, self._converters)

        return mu.probability(x_cnv, self._models, col_idxs, given=given)
Beispiel #2
0
    def probability(self, x, cols, given=None):
        """ Predictive probability of x_1, ..., x_n given y_1, ..., y_n

        Parameters
        ----------
        x : numpy.ndarray(2,)
            2-D numpy array where each row is a set of observations and each
            column corresponds to a feature.
        cols : list
            The names of each column/feature of `x`.
        given : list(tuple)
            List of (name, value,) conditional contraints for the probability

        Returns
        -------
        logps : numpy.ndarray

        Examples
        --------
        The probability that an animal is fast and agile given that it is
        bulbous.

        >>> engine = Engine.load('examples/zoo.bcmodels')
        >>> engine.probability(np.array([[1, 1]]), ['fast', 'agile'],
        ...                    given=[('bulbous': 1]))
        """
        # TODO: make sure that given goes not caontain columns rom cols
        x = du.format_query_data(x)
        col_idxs = [self._converters['col2idx'][col] for col in cols]
        x_cnv = du.convert_data(x, cols, self._dtypes, self._converters)

        if given is not None:
            given = du.convert_given(given, self._dtypes, self._converters)

        return mu.probability(x_cnv, self._models, col_idxs, given=given)
Beispiel #3
0
    def entropy(self, col, n_samples=500):
        """ The entropy of a column.

        Notes
        -----
        Returns differential entropy for continuous feature (obviously).

        Parameters
        ----------
        col : indexer
            The name of the column
        n_samples : int
            The number of samples to use for the Monte Carlo approximation
            (if `col` is categorical).

        Returns
        -------
        h : float
            The entropy of `col`.
        """

        col_idx = self._converters['col2idx'][col]
        dtype = self._dtypes[col_idx]

        # Unless x is enumerable (is categorical), we approximate h(x) using
        # an importance sampling extimate of h(x) using p(x) as the importance
        # function.
        if dtype == 'categorical':
            k = self._distargs[col_idx][0]
            x = np.array([[i] for i in range(k)])
            logps = mu.probability(x, self._models, (col_idx, ))
            assert logps.shape == (k, )
            h = -np.sum(np.exp(logps) * logps)
        else:
            x = mu.sample(self._models, (col_idx, ), n=n_samples)
            logps = mu.probability(x, self._models, (col_idx, ))

            h = -np.sum(logps) / n_samples

        return h
Beispiel #4
0
    def entropy(self, col, n_samples=500):
        """ The entropy of a column.

        Notes
        -----
        Returns differential entropy for continuous feature (obviously).

        Parameters
        ----------
        col : indexer
            The name of the column
        n_samples : int
            The number of samples to use for the Monte Carlo approximation
            (if `col` is categorical).

        Returns
        -------
        h : float
            The entropy of `col`.
        """

        col_idx = self._converters['col2idx'][col]
        dtype = self._dtypes[col_idx]

        # Unless x is enumerable (is categorical), we approximate h(x) using
        # an importance sampling extimate of h(x) using p(x) as the importance
        # function.
        if dtype == 'categorical':
            k = self._distargs[col_idx][0]
            x = np.array([[i] for i in range(k)])
            logps = mu.probability(x, self._models, (col_idx,))
            assert logps.shape == (k,)
            h = -np.sum(np.exp(logps)*logps)
        else:
            x = mu.sample(self._models, (col_idx,), n=n_samples)
            logps = mu.probability(x, self._models, (col_idx,))

            h = -np.sum(logps) / n_samples

        return h
Beispiel #5
0
def test_double_mixed_single_view_values(model):
    x = np.array([[0, 2.1]])
    logp = mu.probability(x, [model], [0, 1])

    assert logp == approx(-2.93134971834475)
Beispiel #6
0
def test_single_categorical_col_probability_values_2(model):
    x = np.array([[2]], dtype=int)
    logp = mu.probability(x, [model], [0])

    assert logp == approx(-1.15710849534972)
Beispiel #7
0
def test_single_categorical_col_probability_values_1(model):
    # logp of 1 in column 1
    x = np.array([[0]], dtype=int)
    logp = mu.probability(x, [model], [0])

    assert logp == approx(-0.848561284433976)
Beispiel #8
0
def test_single_continuous_col_probability_values_2(model):
    x = np.array([[2]], dtype=float)
    logp = mu.probability(x, [model], [1])

    assert logp == approx(-2.01262102403666)
Beispiel #9
0
def test_single_continuous_col_probability_values_1(model):
    # logp of 1 in column 1
    x = np.array([[1]], dtype=float)
    logp = mu.probability(x, [model], [1])

    assert logp == approx(-1.58025784097797)
Beispiel #10
0
def test_double_mixed_single_view_values(model):
    x = np.array([[0, 2.1]])
    logp = mu.probability(x, [model], [0, 1])

    assert logp == approx(-2.93134971834475)
Beispiel #11
0
def test_single_categorical_col_probability_values_2(model):
    x = np.array([[2]], dtype=int)
    logp = mu.probability(x, [model], [0])

    assert logp == approx(-1.15710849534972)
Beispiel #12
0
def test_single_categorical_col_probability_values_1(model):
    # logp of 1 in column 1
    x = np.array([[0]], dtype=int)
    logp = mu.probability(x, [model], [0])

    assert logp == approx(-0.848561284433976)
Beispiel #13
0
def test_single_continuous_col_probability_values_2(model):
    x = np.array([[2]], dtype=float)
    logp = mu.probability(x, [model], [1])

    assert logp == approx(-2.01262102403666)
Beispiel #14
0
def test_single_continuous_col_probability_values_1(model):
    # logp of 1 in column 1
    x = np.array([[1]], dtype=float)
    logp = mu.probability(x, [model], [1])

    assert logp == approx(-1.58025784097797)