Ejemplo n.º 1
0
def test_convert_categorical_data_single_column():
    data_in = np.array([['a', 3], ['b', 2], ['c', 2], ['a', 1], ['b', 1]],
                       dtype=object)
    dtypes = ['categorical', 'continuous']
    converters = {
        'col2idx': {
            'x': 0,
            'y': 1
        },
        'valmaps': {
            'x': {
                'val2idx': {
                    'a': 0,
                    'b': 1,
                    'c': 2
                },
                'idx2val': {
                    0: 'a',
                    1: 'b',
                    2: 'c'
                }
            }
        }
    }

    data_out = du.convert_data(data_in, ['x', 'y'], dtypes, converters)

    assert data_out.shape == data_in.shape

    assert data_out[0, 0] == 0
    assert data_out[1, 0] == 1
    assert data_out[2, 0] == 2
    assert data_out[3, 0] == 0
    assert data_out[4, 0] == 1

    assert data_out[0, 1] == 3
    assert data_out[1, 1] == 2
    assert data_out[2, 1] == 2
    assert data_out[3, 1] == 1
    assert data_out[4, 1] == 1

    data_in_2 = du.convert_data(data_out, ['x', 'y'],
                                dtypes,
                                converters,
                                to_val=True)

    assert data_in_2.shape == data_in.shape

    n_rows, n_cols = data_in.shape
    for i, j in it.product(range(n_rows), range(n_cols)):
        assert data_in[i, j] == data_in_2[i, j]
Ejemplo n.º 2
0
    def probability(self, x, cols, given=None):
        """ Predictive probability of x_1, ..., x_n given y_1, ..., y_n

        Parameters
        ----------
        x : numpy.ndarray(2,)
            2-D numpy array where each row is a set of observations and each
            column corresponds to a feature.
        cols : list
            The names of each column/feature of `x`.
        given : list(tuple)
            List of (name, value,) conditional contraints for the probability

        Returns
        -------
        logps : numpy.ndarray

        Examples
        --------
        The probability that an animal is fast and agile given that it is
        bulbous.

        >>> engine = Engine.load('examples/zoo.bcmodels')
        >>> engine.probability(np.array([[1, 1]]), ['fast', 'agile'],
        ...                    given=[('bulbous': 1]))
        """
        # TODO: make sure that given goes not caontain columns rom cols
        x = du.format_query_data(x)
        col_idxs = [self._converters['col2idx'][col] for col in cols]
        x_cnv = du.convert_data(x, cols, self._dtypes, self._converters)

        if given is not None:
            given = du.convert_given(given, self._dtypes, self._converters)

        return mu.probability(x_cnv, self._models, col_idxs, given=given)
Ejemplo n.º 3
0
    def probability(self, x, cols, given=None):
        """ Predictive probability of x_1, ..., x_n given y_1, ..., y_n

        Parameters
        ----------
        x : numpy.ndarray(2,)
            2-D numpy array where each row is a set of observations and each
            column corresponds to a feature.
        cols : list
            The names of each column/feature of `x`.
        given : list(tuple)
            List of (name, value,) conditional contraints for the probability

        Returns
        -------
        logps : numpy.ndarray

        Examples
        --------
        The probability that an animal is fast and agile given that it is
        bulbous.

        >>> engine = Engine.load('examples/zoo.bcmodels')
        >>> engine.probability(np.array([[1, 1]]), ['fast', 'agile'],
        ...                    given=[('bulbous': 1]))
        """
        # TODO: make sure that given goes not caontain columns rom cols
        x = du.format_query_data(x)
        col_idxs = [self._converters['col2idx'][col] for col in cols]
        x_cnv = du.convert_data(x, cols, self._dtypes, self._converters)

        if given is not None:
            given = du.convert_given(given, self._dtypes, self._converters)

        return mu.probability(x_cnv, self._models, col_idxs, given=given)
Ejemplo n.º 4
0
def test_convert_categorical_data_should_change_everything():
    data_in = np.array([
        ['a', 3],
        ['b', 2],
        ['c', 2],
        ['a', 1],
        ['b', 1]], dtype=object)
    dtypes = ['categorical']*2
    converters = {
        'col2idx':
            {'x': 0, 'y': 1},
        'valmaps': {
            'x': {
                'val2idx': {'a': 0, 'b': 1, 'c': 2},
                'idx2val': {0: 'a', 1: 'b', 2: 'c'}},
            'y': {
                'val2idx': {1: 0, 2: 1, 3: 2},
                'idx2val': {0: 1, 1: 2, 2: 3}}}}

    data_out = du.convert_data(data_in, ['x', 'y'], dtypes, converters)

    assert data_out.shape == data_in.shape

    assert data_out[0, 0] == 0
    assert data_out[1, 0] == 1
    assert data_out[2, 0] == 2
    assert data_out[3, 0] == 0
    assert data_out[4, 0] == 1

    assert data_out[0, 1] == 2
    assert data_out[1, 1] == 1
    assert data_out[2, 1] == 1
    assert data_out[3, 1] == 0
    assert data_out[4, 1] == 0

    data_in_2 = du.convert_data(data_out, ['x', 'y'], dtypes, converters,
                                to_val=True)

    assert data_in_2.shape == data_in.shape

    n_rows, n_cols = data_in.shape
    for i, j in it.product(range(n_rows), range(n_cols)):
        assert data_in[i, j] == data_in_2[i, j]
Ejemplo n.º 5
0
def test_convert_continuous_data_should_do_nothing():
    data_in = np.random.rand(10, 2)
    dtypes = ['continuous'] * 2
    converters = {'col2idx': {'x': 0, 'y': 1}, 'valmaps': {}}

    data_out = du.convert_data(data_in, ['x', 'y'], dtypes, converters)

    assert data_in.shape == data_out.shape

    n_rows, n_cols = data_in.shape
    for i, j in it.product(range(n_rows), range(n_cols)):
        assert data_in[i, j] == data_out[i, j]

    data_out = du.convert_data(data_in, ['x', 'y'],
                               dtypes,
                               converters,
                               to_val=True)

    assert data_in.shape == data_out.shape

    for i, j in it.product(range(n_rows), range(n_cols)):
        assert data_in[i, j] == data_out[i, j]
Ejemplo n.º 6
0
def test_convert_continuous_data_should_do_nothing():
    data_in = np.random.rand(10, 2)
    dtypes = ['continuous']*2
    converters = {
        'col2idx': {'x': 0, 'y': 1},
        'valmaps': {}}

    data_out = du.convert_data(data_in, ['x', 'y'], dtypes, converters)

    assert data_in.shape == data_out.shape

    n_rows, n_cols = data_in.shape
    for i, j in it.product(range(n_rows), range(n_cols)):
        assert data_in[i, j] == data_out[i, j]

    data_out = du.convert_data(data_in, ['x', 'y'], dtypes, converters,
                               to_val=True)

    assert data_in.shape == data_out.shape

    for i, j in it.product(range(n_rows), range(n_cols)):
        assert data_in[i, j] == data_out[i, j]
Ejemplo n.º 7
0
    def sample(self, cols, given=None, n=1):
        """ Draw samples from cols

        Parameters
        ----------
        cols : list(index)
            List of columns from which to jointly draw.
        given : list(tuple(int, value))
            List of column-value tuples that specify conditions
        n : int
            The number of samples to draw

        Examples
        --------
        Draw whether an animal is fast and agile given that it is bulbous.

        >>> engine = Engine.load('examples/zoo.bcmodels')
        >>> engine.sample(['fast', 'agile'], given=[('bulbous': 1]))
        """
        # FIXME: make sure that given does not contain columns from cols
        if given is not None:
            given = du.convert_given(given, self._dtypes, self._converters)

        col_idxs = [self._converters['col2idx'][col] for col in cols]

        data_out = mu.sample(self._models, col_idxs, given=given, n=n)

        x = du.convert_data(data_out,
                            cols,
                            self._dtypes,
                            self._converters,
                            to_val=True)

        if x.shape == (
                1,
                1,
        ):
            return x[0, 0]
        elif x.shape[0] == 1:
            return x[0, :]
        else:
            return x
Ejemplo n.º 8
0
    def sample(self, cols, given=None, n=1):
        """ Draw samples from cols

        Parameters
        ----------
        cols : list(index)
            List of columns from which to jointly draw.
        given : list(tuple(int, value))
            List of column-value tuples that specify conditions
        n : int
            The number of samples to draw

        Examples
        --------
        Draw whether an animal is fast and agile given that it is bulbous.

        >>> engine = Engine.load('examples/zoo.bcmodels')
        >>> engine.sample(['fast', 'agile'], given=[('bulbous': 1]))
        """
        # FIXME: make sure that given does not contain columns from cols
        if given is not None:
            given = du.convert_given(given, self._dtypes, self._converters)

        col_idxs = [self._converters['col2idx'][col] for col in cols]

        data_out = mu.sample(self._models, col_idxs, given=given, n=n)

        x = du.convert_data(data_out, cols, self._dtypes, self._converters,
                            to_val=True)

        if x.shape == (1, 1,):
            return x[0, 0]
        elif x.shape[0] == 1:
            return x[0, :]
        else:
            return x