def parametric_likelihood(node, data, dtype=np.float64): assert len(node.scope) == 1, node.scope probs = np.ones((data.shape[0], 1), dtype=dtype) if data.shape[1] > 1: data = data[:, node.scope] assert data.shape[1] == 1, data.shape # # marginalize over something? marg_ids = np.isnan(data) if isinstance(node, (Gaussian, LogNormal, Exponential)): scipy_obj, params = get_scipy_obj_params(node) probs[~marg_ids] = scipy_obj.pdf(data[~marg_ids], **params) elif isinstance(node, Gamma): scipy_obj, params = get_scipy_obj_params(node) data_m = data[~marg_ids] data_m[data_m == 0] += POS_EPS probs[~marg_ids] = scipy_obj.pdf(data_m, **params) elif isinstance(node, (Poisson, Bernoulli, Geometric)): scipy_obj, params = get_scipy_obj_params(node) probs[~marg_ids] = scipy_obj.pmf(data[~marg_ids], **params) elif isinstance(node, NegativeBinomial): raise ValueError('Mismatch with scipy') elif isinstance(node, Hypergeometric): raise ValueError('Mismatch with wiki') elif isinstance(node, Categorical): # # forcing casting cat_data = data.astype(np.int64) assert np.all(np.equal(np.mod(cat_data[~marg_ids], 1), 0)) out_domain_ids = cat_data >= node.k probs[~marg_ids & out_domain_ids] = 0 probs[~marg_ids & ~out_domain_ids] = np.array( node.p)[cat_data[~marg_ids & ~out_domain_ids]] elif isinstance(node, CategoricalDictionary): dict_probs = [node.p.get(val, 0.0) for val in data[~marg_ids]] probs[~marg_ids] = dict_probs elif isinstance(node, Uniform): probs[~marg_ids] = node.density else: raise Exception("Unknown parametric " + str(type(node))) return probs
def discrete_likelihood(node, data=None, dtype=np.float64): probs, marg_ids, observations = leaf_marginalized_likelihood(node, data, dtype) scipy_obj, params = get_scipy_obj_params(node) probs[~marg_ids] = scipy_obj.pmf(observations, **params) probs[probs == 1.0] = 0.999999999 probs[probs == 0.0] = 0.000000001 return probs
def continuous_multivariate_likelihood(node, data=None, dtype=np.float64): probs = np.ones((data.shape[0], 1), dtype=dtype) observations = data[:, node.scope] assert not np.any(np.isnan(data)) scipy_obj, params = get_scipy_obj_params(node) probs[:, 0] = scipy_obj.pdf(observations, allow_singular=True, **params) return probs
def sample_parametric_node(node, n_samples, data, rand_gen): assert isinstance(node, Parametric) assert n_samples > 0 X = None if ( isinstance(node, Gaussian) or isinstance(node, Gamma) or isinstance(node, LogNormal) or isinstance(node, Poisson) or isinstance(node, Geometric) or isinstance(node, Exponential) or isinstance(node, Bernoulli) ): scipy_obj, params = get_scipy_obj_params(node) X = scipy_obj.rvs(size=n_samples, random_state=rand_gen, **params) elif isinstance(node, Categorical): X = rand_gen.choice(np.arange(node.k), p=node.p, size=n_samples) elif isinstance(node, CategoricalDictionary): vals = [] ps = [] for v, p in node.p.items(): vals.append(v) ps.append(p) X = rand_gen.choice(vals, p=ps, size=n_samples) else: raise Exception("Node type unknown: " + str(type(node))) return X
def assert_correct_node_sampling_continuous(self, node, samples, plot): node.scope = [0] rand_gen = np.random.RandomState(1234) samples_gen = sample_parametric_node(node, 1000000, rand_gen) if plot: import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) x = np.linspace(np.min(samples), np.max(samples), 1000) ax.plot(x, likelihood(node, x.reshape(-1, 1)), 'r-', lw=2, alpha=0.6, label=node.__class__.__name__ + ' pdf') ax.hist(samples, normed=True, histtype='stepfilled', alpha=0.7, bins=1000) ax.legend(loc='best', frameon=False) plt.show() scipy_obj, params = get_scipy_obj_params(node) # H_0 dist are identical test_outside_samples = kstest(samples, lambda x: scipy_obj.cdf(x, **params)) # reject H_0 (dist are identical) if p < 0.05 # we pass the test if they are identical, pass if p >= 0.05 self.assertGreaterEqual(test_outside_samples.pvalue, 0.05) test_generated_samples = kstest(samples_gen, lambda x: scipy_obj.cdf(x, **params)) # reject H_0 (dist are identical) if p < 0.05 # we pass the test if they are identical, pass if p >= 0.05 self.assertGreaterEqual(test_generated_samples.pvalue, 0.05)
def gamma_likelihood(node, data=None, dtype=np.float64): probs, marg_ids, observations = leaf_marginalized_likelihood(node, data, dtype) observations[observations == 0] += POS_EPS scipy_obj, params = get_scipy_obj_params(node) probs[~marg_ids] = scipy_obj.pdf(observations, **params) return probs
def discrete_log_likelihood(node, data=None, dtype=np.float64, **kwargs): probs, marg_ids, observations = leaf_marginalized_log_likelihood( node, data, dtype) scipy_obj, params = get_scipy_obj_params(node) probs[~marg_ids] = scipy_obj.logpmf(observations, **params) # probs[probs == 1.0] = 0.999999999 probs[np.isinf(probs)] = MIN_NEG # 0.000000001 return probs
def sample_parametric_node(node, n_samples, data, rand_gen): assert isinstance(node, Parametric) assert n_samples > 0 X = None if isinstance(node, Gaussian) or isinstance(node, Gamma) or isinstance(node, LogNormal) or \ isinstance(node, Poisson) or isinstance(node, Geometric) or isinstance(node, Exponential) or \ isinstance(node, Bernoulli): scipy_obj, params = get_scipy_obj_params(node) X = scipy_obj.rvs(size=n_samples, random_state=rand_gen, **params) elif isinstance(node, Categorical): X = rand_gen.choice(np.arange(node.k), p=node.p, size=n_samples) else: raise Exception('Node type unknown: ' + str(type(node))) return X
def gaussian_likelihood(node, data=None, dtype=np.float64, bmarg=None, ibm=None): probs, marg_ids, observations = leaf_marginalized_likelihood( node, data, dtype) scipy_obj, params = get_scipy_obj_params(node) # probs[~marg_ids] = scipy_obj.pdf(observations, **params) if bmarg: ibm = ibm[:, node.scope] probs_reliable = np.expand_dims(scipy_obj.pdf(observations, **params), axis=1) probs_unreliable = np.expand_dims(scipy.stats.norm.cdf( observations, loc=params['loc'], scale=params['scale']), axis=1) probs = np.where(ibm, probs_reliable, probs_unreliable) else: probs[~marg_ids] = scipy_obj.pdf(observations, **params) return probs
def continuous_likelihood(node, data=None, dtype=np.float64, **kwargs): probs, marg_ids, observations = leaf_marginalized_likelihood( node, data, dtype) scipy_obj, params = get_scipy_obj_params(node) probs[~marg_ids] = scipy_obj.pdf(observations, **params) return probs
def poisson_likelihood(node, data=None, dtype=np.float64): probs, marg_ids, observations = leaf_marginalized_likelihood( node, data, dtype) scipy_obj, params = get_scipy_obj_params(node) probs[~marg_ids] = scipy_obj.pmf(observations, **params) return probs
def parametric_log_likelihood(node, data, dtype=np.float64, context=None, node_log_likelihood=None): assert len(node.scope) == 1, node.scope log_probs = np.zeros((data.shape[0], 1), dtype=dtype) if data.shape[1] > 1: data = data[:, node.scope] assert data.shape[1] == 1, data.shape # # marginalize over something? marg_ids = np.isnan(data) if isinstance(node, Gaussian) or isinstance(node, LogNormal) or \ isinstance(node, Exponential) or isinstance(node, Beta) or isinstance(node, Gumbel) or \ isinstance(node, Laplace) or isinstance(node, Wald) or isinstance(node, Weibull): scipy_obj, params = get_scipy_obj_params(node) log_probs[~marg_ids] = scipy_obj.logpdf(data[~marg_ids], **params) # if np.any(np.isposinf(log_probs[~marg_ids])): # inf_ids = np.isposinf(log_probs) # print(node, node.scope, log_probs[inf_ids], # node.params, data[~marg_ids], data[inf_ids], params) # 0 / 0 elif isinstance(node, Gamma): scipy_obj, params = get_scipy_obj_params(node) data_m = data[~marg_ids] data_m[data_m == 0] += POS_EPS log_probs[~marg_ids] = scipy_obj.logpdf(data_m, **params) # if np.any(np.isposinf(log_probs[~marg_ids])): # inf_ids = np.isposinf(log_probs) # print(node, node.scope, log_probs[inf_ids], # node.params, data[~marg_ids], data[inf_ids], params) # 0 / 0 elif isinstance(node, Poisson) or isinstance( node, Bernoulli) or isinstance(node, Geometric): scipy_obj, params = get_scipy_obj_params(node) log_probs[~marg_ids] = scipy_obj.logpmf(data[~marg_ids], **params) # if np.any(np.isposinf(log_probs[~marg_ids])): # inf_log = np.isposinf(log_probs) # print(log_probs[inf_log], data[inf_log]) # print(data[~marg_ids], (~marg_ids).sum(), log_probs[~marg_ids]) # 0 / 0 elif isinstance(node, NegativeBinomial): raise ValueError('Mismatch with scipy') elif isinstance(node, Hypergeometric): raise ValueError('Mismatch with wiki') elif isinstance(node, Categorical): # # forcing casting cat_data = data.astype(np.int64) assert np.all(np.equal(np.mod(cat_data[~marg_ids], 1), 0)) # assert np.all(np.logical_and(cat_data[~marg_ids] >= 0, cat_data[~marg_ids] < node.k)) out_domain_ids = cat_data >= node.k log_probs[~marg_ids & out_domain_ids] = LOG_ZERO log_probs[~marg_ids & ~out_domain_ids] = np.array(np.log( node.p))[cat_data[~marg_ids & ~out_domain_ids]] elif isinstance(node, Uniform): log_probs[~marg_ids] = np.log(node.density) else: raise Exception("Unknown parametric " + str(type(node))) return log_probs