def test_Histogram_discrete_inference(self): data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1) ds_context = Context([MetaType.DISCRETE]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=False) prob = np.exp(log_likelihood(hist, data)) self.assertAlmostEqual(float(prob[0]), 2 / 6) self.assertAlmostEqual(float(prob[1]), 2 / 6) self.assertAlmostEqual(float(prob[2]), 1 / 6) self.assertAlmostEqual(float(prob[3]), 3 / 6) self.assertAlmostEqual(float(prob[4]), 3 / 6) self.assertAlmostEqual(float(prob[5]), 3 / 6) data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1) ds_context = Context([MetaType.DISCRETE]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=True) # print(np.var(data.shape[0])) prob = np.exp(log_likelihood(hist, data)) self.assertAlmostEqual(float(prob[0]), 3 / 9) self.assertAlmostEqual(float(prob[1]), 3 / 9) self.assertAlmostEqual(float(prob[2]), 2 / 9) self.assertAlmostEqual(float(prob[3]), 4 / 9) self.assertAlmostEqual(float(prob[4]), 4 / 9) self.assertAlmostEqual(float(prob[5]), 4 / 9)
def create_piecewise_leaf(data, ds_context, scope, isotonic=False, prior_weight=0.1, hist_source="numpy"): assert len(scope) == 1, "scope of univariate Piecewise for more than one variable?" assert data.shape[1] == 1, "data has more than one feature?" idx = scope[0] meta_type = ds_context.meta_types[idx] hist = create_histogram_leaf(data, ds_context, scope, alpha=False, hist_source=hist_source) densities = hist.densities bins = hist.breaks repr_points = hist.bin_repr_points if meta_type == MetaType.REAL: EPS = 1e-8 if len(densities) > 1: def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." a, b = itertools.tee(iterable) next(b, None) return zip(a, b) x = [bins[0] - EPS] + [b0 + (b1 - b0) / 2 for (b0, b1) in pairwise(bins)] + [bins[-1] + EPS] else: assert len(bins) == 2 x = [bins[0] - EPS] + [(bins[0] + (bins[1] - bins[0]) / 2)] + [bins[-1] + EPS] elif meta_type == MetaType.DISCRETE: tail_width = 1 x = [b for b in bins[:-1]] x = [x[0] - tail_width] + x + [x[-1] + tail_width] else: raise Exception("Invalid statistical type: " + meta_type) y = [0.0] + [d for d in densities] + [0.0] assert len(densities) == len(bins) - 1 assert len(x) == len(y), (len(x), len(y)) x, y = np.array(x), np.array(y) if isotonic: x, y = isotonic_unimodal_regression_R(x, y) auc = np.trapz(y, x) y = y / auc node = PiecewiseLinear(x.tolist(), y.tolist(), repr_points, scope=scope) if prior_weight is None: return node uniform_data = np.zeros_like(data) uniform_data[:] = np.nan uniform_hist = create_histogram_leaf(uniform_data, ds_context, scope, alpha=False) return prior_weight * uniform_hist + (1 - prior_weight) * node
def test_Histogram_expectations(self): data = np.random.randn(20000).reshape(-1, 1) ds_context = Context(meta_types=[MetaType.REAL]) ds_context.add_domains(data) hl = create_histogram_leaf(data, ds_context, scope=[0]) expectation = Expectation(hl, set([0])) self.assertAlmostEqual(np.mean(data[:, 0]), expectation[0, 0], 3) data = np.random.randint(0, high=100, size=20000).reshape(-1, 1) ds_context = Context(meta_types=[MetaType.DISCRETE]) ds_context.add_domains(data) hl = create_histogram_leaf(data, ds_context, scope=[0]) expectation = Expectation(hl, set([0])) self.assertAlmostEqual(np.mean(data[:, 0]), expectation[0, 0], 3)
def test_histogram_to_str_and_back(self): data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1) ds_context = Context([MetaType.DISCRETE]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=False) self.check_obj_and_reconstruction(hist)
def test_histogram_leaf(self): data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1) ds_context = Context([MetaType.DISCRETE]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=False) self.assertTrue( np.array_equal(mpe(hist, np.array([[np.nan]])), np.array([[3]])), "mpe should be 3")
def test_valid_histogram(self): np.random.seed(17) data = [1] + [5]*20 + [7] + [10]*50 + [20] + [30]*10 data = np.array(data).reshape((-1, 1)) ds_context = Context([MetaType.REAL]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=False, hist_source="kde") self.assertGreater(len(hist.bin_repr_points), 1)
def create_spmn_leaf(data, ds_context, scope): assert len( scope ) == 1, "scope of univariate histogram for more than one variable?" assert data.shape[1] == 1, "data has more than one feature?" # data = data[~np.isnan(data)] idx = scope[0] meta_type = ds_context.meta_types[idx] if meta_type == MetaType.UTILITY: hist = create_histogram_leaf(data, ds_context, scope) return Utility(hist.breaks, hist.densities, hist.bin_repr_points, scope=idx) else: return create_histogram_leaf(data, ds_context, scope)
def test_mixture_gaussians(self): np.random.seed(17) data = np.random.normal(10, 1, size=200).tolist() + np.random.normal(30, 1, size=200).tolist() data = np.array(data).reshape((-1, 1)) ds_context = Context([MetaType.REAL]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=False, hist_source="kde") x = np.linspace(0, 60, 1000).tolist() + data[:, 0].tolist() x = np.sort(x) from scipy.stats import norm y = 0.5 * norm.pdf(x, 10, 1) + 0.5 * norm.pdf(x, 30, 1) ye = likelihood(hist, x.reshape((-1, 1))) error = np.sum(np.abs(ye[:, 0] - y)) # print(error) self.assertLessEqual(error, 7)
def learn_leaf_from_context(data, ds_context, scope): """ Wrapper function to infer leaf type from the context object :param data: np.array: the data slice :param ds_context: Context: the context oobject for the data/spn :param scope: List: the scope of the variables :return: a correct leaf """ assert len(scope) == 1, "scope for more than one variable?" idx = scope[0] conditional_type = ds_context.parametric_types[idx] assert issubclass(conditional_type, Leaf), 'no instance of leaf ' if issubclass(conditional_type, Parametric): return create_parametric_leaf(data, ds_context, scope) if issubclass(conditional_type, Conditional): return create_conditional_leaf(data, ds_context, scope) if issubclass(conditional_type, Histogram): return create_histogram_leaf(data, ds_context, scope) if issubclass(conditional_type, PiecewiseLinear): return create_piecewise_leaf(data, ds_context, scope) raise Exception('No fitting leaf type found')
def create_leaf(data, ds_context, scope): #return create_piecewise_leaf(data, ds_context, scope, isotonic=False, prior_weight=0.01) return create_histogram_leaf(data, ds_context, scope, alpha=0.005)
if __name__ == '__main__': add_histogram_inference_support() np.random.seed(17) data = np.random.normal(10, 0.01, size=2000).tolist() + np.random.normal( 30, 10, size=2000).tolist() data = np.array(data).reshape((-1, 10)) data[data < 0] = 0 data = (data * 1).astype(int) ds_context = Context(meta_types=[MetaType.DISCRETE] * data.shape[1]) ds_context.add_domains(data) data[:, 0] = 0 data[:, 1] = 1 spn = learn(data, ds_context) spn = create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde") * \ create_histogram_leaf(data[:, 1].reshape((-1, 1)), ds_context, [1], alpha=False, hist_source="kde") spn = 0.3 * create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde") + \ 0.7 * create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde") py_ll = log_likelihood(spn, data) tf_graph, placeholder = spn_to_tf_graph(spn, data) log_tf_out = eval_tf(tf_graph, placeholder, data) print("results are similar for Log TF and Python?", np.all(np.isclose(py_ll, log_tf_out)))