def probability(self, x, cols, given=None): """ Predictive probability of x_1, ..., x_n given y_1, ..., y_n Parameters ---------- x : numpy.ndarray(2,) 2-D numpy array where each row is a set of observations and each column corresponds to a feature. cols : list The names of each column/feature of `x`. given : list(tuple) List of (name, value,) conditional contraints for the probability Returns ------- logps : numpy.ndarray Examples -------- The probability that an animal is fast and agile given that it is bulbous. >>> engine = Engine.load('examples/zoo.bcmodels') >>> engine.probability(np.array([[1, 1]]), ['fast', 'agile'], ... given=[('bulbous': 1])) """ # TODO: make sure that given goes not caontain columns rom cols x = du.format_query_data(x) col_idxs = [self._converters['col2idx'][col] for col in cols] x_cnv = du.convert_data(x, cols, self._dtypes, self._converters) if given is not None: given = du.convert_given(given, self._dtypes, self._converters) return mu.probability(x_cnv, self._models, col_idxs, given=given)
def entropy(self, col, n_samples=500): """ The entropy of a column. Notes ----- Returns differential entropy for continuous feature (obviously). Parameters ---------- col : indexer The name of the column n_samples : int The number of samples to use for the Monte Carlo approximation (if `col` is categorical). Returns ------- h : float The entropy of `col`. """ col_idx = self._converters['col2idx'][col] dtype = self._dtypes[col_idx] # Unless x is enumerable (is categorical), we approximate h(x) using # an importance sampling extimate of h(x) using p(x) as the importance # function. if dtype == 'categorical': k = self._distargs[col_idx][0] x = np.array([[i] for i in range(k)]) logps = mu.probability(x, self._models, (col_idx, )) assert logps.shape == (k, ) h = -np.sum(np.exp(logps) * logps) else: x = mu.sample(self._models, (col_idx, ), n=n_samples) logps = mu.probability(x, self._models, (col_idx, )) h = -np.sum(logps) / n_samples return h
def entropy(self, col, n_samples=500): """ The entropy of a column. Notes ----- Returns differential entropy for continuous feature (obviously). Parameters ---------- col : indexer The name of the column n_samples : int The number of samples to use for the Monte Carlo approximation (if `col` is categorical). Returns ------- h : float The entropy of `col`. """ col_idx = self._converters['col2idx'][col] dtype = self._dtypes[col_idx] # Unless x is enumerable (is categorical), we approximate h(x) using # an importance sampling extimate of h(x) using p(x) as the importance # function. if dtype == 'categorical': k = self._distargs[col_idx][0] x = np.array([[i] for i in range(k)]) logps = mu.probability(x, self._models, (col_idx,)) assert logps.shape == (k,) h = -np.sum(np.exp(logps)*logps) else: x = mu.sample(self._models, (col_idx,), n=n_samples) logps = mu.probability(x, self._models, (col_idx,)) h = -np.sum(logps) / n_samples return h
def test_double_mixed_single_view_values(model): x = np.array([[0, 2.1]]) logp = mu.probability(x, [model], [0, 1]) assert logp == approx(-2.93134971834475)
def test_single_categorical_col_probability_values_2(model): x = np.array([[2]], dtype=int) logp = mu.probability(x, [model], [0]) assert logp == approx(-1.15710849534972)
def test_single_categorical_col_probability_values_1(model): # logp of 1 in column 1 x = np.array([[0]], dtype=int) logp = mu.probability(x, [model], [0]) assert logp == approx(-0.848561284433976)
def test_single_continuous_col_probability_values_2(model): x = np.array([[2]], dtype=float) logp = mu.probability(x, [model], [1]) assert logp == approx(-2.01262102403666)
def test_single_continuous_col_probability_values_1(model): # logp of 1 in column 1 x = np.array([[1]], dtype=float) logp = mu.probability(x, [model], [1]) assert logp == approx(-1.58025784097797)