def test_convert_categorical_data_single_column(): data_in = np.array([['a', 3], ['b', 2], ['c', 2], ['a', 1], ['b', 1]], dtype=object) dtypes = ['categorical', 'continuous'] converters = { 'col2idx': { 'x': 0, 'y': 1 }, 'valmaps': { 'x': { 'val2idx': { 'a': 0, 'b': 1, 'c': 2 }, 'idx2val': { 0: 'a', 1: 'b', 2: 'c' } } } } data_out = du.convert_data(data_in, ['x', 'y'], dtypes, converters) assert data_out.shape == data_in.shape assert data_out[0, 0] == 0 assert data_out[1, 0] == 1 assert data_out[2, 0] == 2 assert data_out[3, 0] == 0 assert data_out[4, 0] == 1 assert data_out[0, 1] == 3 assert data_out[1, 1] == 2 assert data_out[2, 1] == 2 assert data_out[3, 1] == 1 assert data_out[4, 1] == 1 data_in_2 = du.convert_data(data_out, ['x', 'y'], dtypes, converters, to_val=True) assert data_in_2.shape == data_in.shape n_rows, n_cols = data_in.shape for i, j in it.product(range(n_rows), range(n_cols)): assert data_in[i, j] == data_in_2[i, j]
def probability(self, x, cols, given=None): """ Predictive probability of x_1, ..., x_n given y_1, ..., y_n Parameters ---------- x : numpy.ndarray(2,) 2-D numpy array where each row is a set of observations and each column corresponds to a feature. cols : list The names of each column/feature of `x`. given : list(tuple) List of (name, value,) conditional contraints for the probability Returns ------- logps : numpy.ndarray Examples -------- The probability that an animal is fast and agile given that it is bulbous. >>> engine = Engine.load('examples/zoo.bcmodels') >>> engine.probability(np.array([[1, 1]]), ['fast', 'agile'], ... given=[('bulbous': 1])) """ # TODO: make sure that given goes not caontain columns rom cols x = du.format_query_data(x) col_idxs = [self._converters['col2idx'][col] for col in cols] x_cnv = du.convert_data(x, cols, self._dtypes, self._converters) if given is not None: given = du.convert_given(given, self._dtypes, self._converters) return mu.probability(x_cnv, self._models, col_idxs, given=given)
def test_convert_categorical_data_should_change_everything(): data_in = np.array([ ['a', 3], ['b', 2], ['c', 2], ['a', 1], ['b', 1]], dtype=object) dtypes = ['categorical']*2 converters = { 'col2idx': {'x': 0, 'y': 1}, 'valmaps': { 'x': { 'val2idx': {'a': 0, 'b': 1, 'c': 2}, 'idx2val': {0: 'a', 1: 'b', 2: 'c'}}, 'y': { 'val2idx': {1: 0, 2: 1, 3: 2}, 'idx2val': {0: 1, 1: 2, 2: 3}}}} data_out = du.convert_data(data_in, ['x', 'y'], dtypes, converters) assert data_out.shape == data_in.shape assert data_out[0, 0] == 0 assert data_out[1, 0] == 1 assert data_out[2, 0] == 2 assert data_out[3, 0] == 0 assert data_out[4, 0] == 1 assert data_out[0, 1] == 2 assert data_out[1, 1] == 1 assert data_out[2, 1] == 1 assert data_out[3, 1] == 0 assert data_out[4, 1] == 0 data_in_2 = du.convert_data(data_out, ['x', 'y'], dtypes, converters, to_val=True) assert data_in_2.shape == data_in.shape n_rows, n_cols = data_in.shape for i, j in it.product(range(n_rows), range(n_cols)): assert data_in[i, j] == data_in_2[i, j]
def test_convert_continuous_data_should_do_nothing(): data_in = np.random.rand(10, 2) dtypes = ['continuous'] * 2 converters = {'col2idx': {'x': 0, 'y': 1}, 'valmaps': {}} data_out = du.convert_data(data_in, ['x', 'y'], dtypes, converters) assert data_in.shape == data_out.shape n_rows, n_cols = data_in.shape for i, j in it.product(range(n_rows), range(n_cols)): assert data_in[i, j] == data_out[i, j] data_out = du.convert_data(data_in, ['x', 'y'], dtypes, converters, to_val=True) assert data_in.shape == data_out.shape for i, j in it.product(range(n_rows), range(n_cols)): assert data_in[i, j] == data_out[i, j]
def test_convert_continuous_data_should_do_nothing(): data_in = np.random.rand(10, 2) dtypes = ['continuous']*2 converters = { 'col2idx': {'x': 0, 'y': 1}, 'valmaps': {}} data_out = du.convert_data(data_in, ['x', 'y'], dtypes, converters) assert data_in.shape == data_out.shape n_rows, n_cols = data_in.shape for i, j in it.product(range(n_rows), range(n_cols)): assert data_in[i, j] == data_out[i, j] data_out = du.convert_data(data_in, ['x', 'y'], dtypes, converters, to_val=True) assert data_in.shape == data_out.shape for i, j in it.product(range(n_rows), range(n_cols)): assert data_in[i, j] == data_out[i, j]
def sample(self, cols, given=None, n=1): """ Draw samples from cols Parameters ---------- cols : list(index) List of columns from which to jointly draw. given : list(tuple(int, value)) List of column-value tuples that specify conditions n : int The number of samples to draw Examples -------- Draw whether an animal is fast and agile given that it is bulbous. >>> engine = Engine.load('examples/zoo.bcmodels') >>> engine.sample(['fast', 'agile'], given=[('bulbous': 1])) """ # FIXME: make sure that given does not contain columns from cols if given is not None: given = du.convert_given(given, self._dtypes, self._converters) col_idxs = [self._converters['col2idx'][col] for col in cols] data_out = mu.sample(self._models, col_idxs, given=given, n=n) x = du.convert_data(data_out, cols, self._dtypes, self._converters, to_val=True) if x.shape == ( 1, 1, ): return x[0, 0] elif x.shape[0] == 1: return x[0, :] else: return x
def sample(self, cols, given=None, n=1): """ Draw samples from cols Parameters ---------- cols : list(index) List of columns from which to jointly draw. given : list(tuple(int, value)) List of column-value tuples that specify conditions n : int The number of samples to draw Examples -------- Draw whether an animal is fast and agile given that it is bulbous. >>> engine = Engine.load('examples/zoo.bcmodels') >>> engine.sample(['fast', 'agile'], given=[('bulbous': 1])) """ # FIXME: make sure that given does not contain columns from cols if given is not None: given = du.convert_given(given, self._dtypes, self._converters) col_idxs = [self._converters['col2idx'][col] for col in cols] data_out = mu.sample(self._models, col_idxs, given=given, n=n) x = du.convert_data(data_out, cols, self._dtypes, self._converters, to_val=True) if x.shape == (1, 1,): return x[0, 0] elif x.shape[0] == 1: return x[0, :] else: return x