def Expectation(spn, feature_scope, evidence_scope, evidence, node_expectation=_node_expectation): """Compute the Expectation: E[X_feature_scope | X_evidence_scope] given the spn and the evidence data Keyword arguments: spn -- the spn to compute the probabilities from feature_scope -- set() of integers, the scope of the features to get the expectation from evidence_scope -- set() of integers, the scope of the evidence features evidence -- numpy 2d array of the evidence data """ if evidence_scope is None: evidence_scope = set() assert not (len(evidence_scope) > 0 and evidence is None) assert len(feature_scope.intersection(evidence_scope)) == 0 marg_spn = marginalize(spn, keep=feature_scope | evidence_scope) def leaf_expectation(node, data, dtype=np.float64, **kwargs): if node.scope[0] in feature_scope: t_node = type(node) if t_node in node_expectation: exps = np.zeros((data.shape[0], 1), dtype=dtype) exps[:] = node_expectation[t_node](node) return exps else: raise Exception('Node type unknown: ' + str(t_node)) return likelihood(node, evidence) node_expectations = { type(leaf): leaf_expectation for leaf in get_nodes_by_type(marg_spn, Leaf) } node_expectations.update({Sum: sum_likelihood, Product: prod_likelihood}) if evidence is None: #fake_evidence is not used fake_evidence = np.zeros((1, len(spn.scope))).reshape(1, -1) expectation = likelihood(marg_spn, fake_evidence, node_likelihood=node_expectations) return expectation #if we have evidence, we want to compute the conditional expectation expectation = likelihood(marg_spn, evidence, node_likelihood=node_expectations) expectation = expectation / likelihood( marginalize(marg_spn, keep=evidence_scope), evidence) return expectation
def get_mutual_information_correlation(spn, context): categoricals = get_categoricals(spn, context) num_features = len(spn.scope) correlation_matrix = [] for x in range(num_features): if x not in categoricals: correlation_matrix.append(np.full((num_features), np.nan)) else: x_correlation = [np.nan] * num_features x_range = context.get_domains_by_scope([x])[0] spn_x = marginalize(spn, [x]) query_x = np.array([[np.nan] * num_features] * len(x_range)) query_x[:, x] = x_range for y in categoricals: if x == y: x_correlation[x] = 1 continue spn_y = marginalize(spn, [y]) spn_xy = marginalize(spn, [x, y]) y_range = context.get_domains_by_scope([y])[0] query_y = np.array([[np.nan] * num_features] * len(y_range)) query_y[:, y] = y_range query_xy = np.array([[np.nan] * num_features] * (len(x_range + 1) * (len(y_range + 1)))) xy = np.mgrid[x_range[0]:x_range[-1]:len(x_range) * 1j, y_range[0]:y_range[-1]:len(y_range) * 1j] xy = xy.reshape(2, -1) query_xy[:, x] = xy[0, :] query_xy[:, y] = xy[1, :] results_xy = likelihood(spn_xy, query_xy) results_xy = results_xy.reshape(len(x_range), len(y_range)) results_x = likelihood(spn_x, query_x) results_y = likelihood(spn_y, query_y) xx, yy = np.mgrid[0:len(x_range) - 1:len(x_range) * 1j, 0:len(y_range) - 1:len(y_range) * 1j] xx = xx.astype(int) yy = yy.astype(int) grid_results_x = results_x[xx] grid_results_y = results_y[yy] grid_results_xy = results_xy log = np.log( grid_results_xy / (np.multiply(grid_results_x, grid_results_y).squeeze())) prod = np.prod(np.array([log, grid_results_xy]), axis=0) log_x = np.log(results_x) log_y = np.log(results_y) entropy_x = -1 * np.sum(np.multiply(log_x, results_x)) entropy_y = -1 * np.sum(np.multiply(log_y, results_y)) x_correlation[y] = (np.sum(prod) / np.sqrt(entropy_x * entropy_y)) correlation_matrix.append(np.array(x_correlation)) return np.array(correlation_matrix)
def to_str(): spn = create_SPN() spn_marg = marginalize() from spn.io.Text import spn_to_str_equation print(spn_to_str_equation(spn)) print(spn_to_str_equation(spn_marg)) spn = create_SPN2() spn_marg = marginalize() print(spn_to_str_equation(spn)) print(spn_to_str_equation(spn_marg))
def valid(): spn = create_SPN() spn_marg = marginalize() from spn.algorithms.Validity import is_valid print(is_valid(spn)) print(is_valid(spn_marg))
def Moment(spn, feature_scope, evidence_scope, evidence, node_moment=_node_moment, order=1): """Compute the moment: E[X_feature_scope | X_evidence_scope] given the spn and the evidence data Keyword arguments: spn -- the spn to compute the probabilities from feature_scope -- set() of integers, the scope of the features to get the moment from evidence_scope -- set() of integers, the scope of the evidence features evidence -- numpy 2d array of the evidence data """ if evidence_scope is None: evidence_scope = set() assert not (len(evidence_scope) > 0 and evidence is None) assert len(feature_scope.intersection(evidence_scope)) == 0 marg_spn = marginalize(spn, keep=feature_scope | evidence_scope) node_moments = _node_moment node_moments.update({Sum: sum_moment, Product: prod_moment}) if evidence is None: # fake_evidence is not used fake_evidence = np.zeros((1, len(spn.scope))).reshape(1,-1) moment = eval_spn_bottom_up(marg_spn, node_moments, order=order) return moment # if we have evidence, we want to compute the conditional moment else: raise NotImplementedError('Please use a conditional SPN to calculated conditional moments') return moment
def plot_density(spn, data): import matplotlib.pyplot as plt import numpy as np x_max = data[:, 0].max() x_min = data[:, 0].min() y_max = data[:, 1].max() y_min = data[:, 1].min() nbinsx = int(x_max - x_min) / 1 nbinsy = int(y_max - y_min) / 1 xi, yi = np.mgrid[x_min:x_max:nbinsx * 1j, y_min:y_max:nbinsy * 1j] spn_input = np.vstack([xi.flatten(), yi.flatten()]).T marg_spn = marginalize(spn, set([0, 1])) zill = likelihood(marg_spn, spn_input) z = zill.reshape(xi.shape) # Make the plot # plt.pcolormesh(xi, yi, z) plt.imshow(z + 1, extent=(x_min, x_max, y_min, y_max), cmap=cm.hot, norm=PowerNorm(gamma=1. / 5.)) # plt.pcolormesh(xi, yi, z) plt.colorbar() plt.show()
def marginalize(): spn = create_SPN() from spn.algorithms.Marginalization import marginalize spn_marg = marginalize(spn, [1, 2]) return spn_marg
def plot(): spn = create_SPN() spn_marg = marginalize() from spn.io.Graphics import plot_spn plot_spn(spn, "basicspn.png") plot_spn(spn_marg, "marginalspn.png")
def categorical_nodes_description(spn, context): categoricals = get_categoricals(spn, context) num_features = len(spn.scope) total_analysis = {} for cat in categoricals: marg_total = marginalize(spn, [cat]) categorical_probabilities = [] for i, n in enumerate(spn.children): node_weight = np.log(spn.weights[i]) node_probabilities = [] for cat_instance in context.get_domains_by_scope([cat])[0]: marg = marginalize(n, [cat]) query = np.zeros((1, num_features)) query[:, :] = np.nan query[:, cat] = cat_instance proba = np.exp( log_likelihood(marg, query) + node_weight - log_likelihood(marg_total, query)).reshape(-1) node_probabilities.append(proba) categorical_probabilities.append(np.array(node_probabilities)) total_analysis[cat] = np.sum(np.array(categorical_probabilities), axis=2) node_categoricals = {} for cat in categoricals: node_categoricals[cat] = {} node_categoricals[cat]['contrib'] = [] node_categoricals[cat]['explained'] = [] for cat_instance in [ int(c) for c in context.get_domains_by_scope([cat])[0] ]: probs = total_analysis[cat] # TODO: That threshold needs some evidence or theoretical grounding contrib_nodes = np.where(probs[:, cat_instance] / (np.sum(probs, axis=1)) > 0.4) explained_probs = np.sum(probs[contrib_nodes], axis=0) node_categoricals[cat]['contrib'].append(contrib_nodes) node_categoricals[cat]['explained'].append(explained_probs) return node_categoricals, total_analysis
def Moment(spn, feature_scope=None, node_moment=_node_moment, node_likelihoods=_node_likelihood, order=1): """ Computes moments from an spn :param spn: a valid spn :param feature_scope: optional list of features on which to compute the moments :param node_moment: optional list of node moment functions :param node_likelihoods: optional list of node likelihood functions :param order: the order of the moment to compute :return: an np array of computed moments """ if feature_scope is None: feature_scope = spn.scope feature_scope = list(feature_scope) assert len(feature_scope) == len( list(feature_scope)), "Found double entries in feature list" marg_spn = marginalize(spn, feature_scope) node_moments = {Sum: sum_moment, Product: prod_moment} for node in get_node_types(marg_spn, Leaf): try: moment = node_moment[node] node_ll = node_likelihoods[node] except KeyError: raise AssertionError( "Node type {} doe not have associated moment and likelihoods". format(node)) node_moments[node] = leaf_moment(moment, node_ll) results = np.full((1, max(spn.scope) + 1), np.nan) moment = eval_spn_bottom_up(marg_spn, node_moments, order=order, result_array=results) return moment[:, feature_scope]
def inference(): import numpy as np spn = create_SPN() spn_marg = marginalize() test_data = np.array([1.0, 0.0, 1.0]).reshape(-1, 3) from spn.algorithms.Inference import log_likelihood ll = log_likelihood(spn, test_data) print("python ll", ll, np.exp(ll)) llm = log_likelihood(spn_marg, test_data) print("python ll spn_marg", llm, np.exp(llm)) test_data2 = np.array([np.nan, 0.0, 1.0]).reshape(-1, 3) llom = log_likelihood(spn, test_data2) print("python ll spn with nan", llom, np.exp(llom))
plt.subplots(figsize=(5, 5)) plt.scatter(correct_preds[:, 0], correct_preds[:, 1], label="Correctly predicted", c="darkgray", s=10) plt.scatter(wrong_preds[:, 0], wrong_preds[:, 1], label="Wrongly predicted", c="darkred", s=10) plt.xlabel('x') plt.ylabel('y') axes = plt.gca() axes.set_xlim([0, 128]) axes.set_ylim([0, 128]) plt.legend() plt.axis('equal') plt.title('Test Data Prediction') plt.savefig(plot_path + "/test-pred.pdf") plt.show() # Plot decision boundaries spn = marginalize(spn, [0, 1]) likelihoods = likelihood(spn, test_data).reshape((num_test_samples_sqrt, num_test_samples_sqrt)) * 100000 plot_decision_boundaries(likelihoods, pred_test_labels, num_test_samples_sqrt, plot_path) # Convert the model spn_tensor, data_placeholder, variable_dict = convert_spn_to_tf_graph( spn, test_data, batch_size=batch_size, dtype=np.float32 ) # Export the model root = tf.identity(spn_tensor, name="Root") export_dir = export_model(root_dir=output_path, export_dir="/spns/tf_" + spn_name, force_overwrite=True) print("Successfully exported SPN tensor to \"%s\"." % export_dir)
# We will start with the Sum-Product Network structure from the # :ref:`composing_spn_object_hierarchy` example. p0 = Product(children=[Categorical(p=[0.3, 0.7], scope=1), Categorical(p=[0.4, 0.6], scope=2)]) p1 = Product(children=[Categorical(p=[0.5, 0.5], scope=1), Categorical(p=[0.6, 0.4], scope=2)]) s1 = Sum(weights=[0.3, 0.7], children=[p0, p1]) p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1]) p3 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), Categorical(p=[0.3, 0.7], scope=1)]) p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)]) spn = Sum(weights=[0.4, 0.6], children=[p2, p4]) assign_ids(spn) rebuild_scopes_bottom_up(spn) ax = draw_spn(spn) # %% # If we want to marginalize this SPN by summing out all other variables # to leave variables 1 and 2, we can do this as follows: from spn.algorithms.Marginalization import marginalize spn_marg = marginalize(spn, [1, 2]) # %% # This marginalizes all the variables *not* in :math:`[1, 2]`, and create a # *new* structure that knows nothing about the previous one nor about the # variable 0. draw_spn(spn_marg)
def _marginalizeout(self, keep, remove): self._marginalized = self._marginalized.union(remove) self._spn = marginalize( self._spn, [self._initial_names_to_index[x] for x in keep]) return self._unbound_updater,
# Plot likelihoods plot_likelihoods(spn=spn, classes=np.sort(np.unique(test_labels)), res=plot_res, plot_pdf=pdf) # Plot likelihoods with test sample plot_likelihoods(spn=spn, classes=np.sort(np.unique(test_labels)), res=plot_res, plot_pdf=pdf, test_sample=test_samples[t]) # Marginalize SPN spn_marg = marginalize(spn, [0, 1]) # Plot decision boundaries plot_decision_boundaries(spn=spn, marg_spn=spn_marg, classes=np.sort(np.unique(test_labels)), res=plot_res, plot_pdf=pdf) # Plot decision boundaries with test sample plot_decision_boundaries(spn=spn, marg_spn=spn_marg, classes=np.sort(np.unique(test_labels)), res=plot_res, plot_pdf=pdf, test_sample=test_samples[t])
if __name__ == "__main__": ds_name, data, train, test, words, statistical_type, distribution_family = get_RL_data( ) ds_context = Context() ds_context.statistical_type = statistical_type ds_context.distribution_family = distribution_family add_domains(data, ds_context) spn = learn(train, ds_context, min_instances_slice=100, linear=True) print(get_structure_stats(spn)) # print(to_str_ref_graph(spn, histogram_to_str)) spn_marg = marginalize(spn, set([0])) # print(to_str_equation(spn_marg, histogram_to_str)) def eval_conditional(data): return conditional_log_likelihood(spn, spn_marg, data, histogram_likelihood) print(eval_conditional(train[0, :].reshape(1, -1))) import dill dill.settings["recurse"] = True g = dill.dump(eval_conditional, open("conditional.bin", "w+b"))
metadata = "\nSeed: %d\n" % seed + \ "Test sample ID: %d\n" % t + \ "Minimum instances per slice: %d\n" % min_instances_slice + \ "Alpha (threshold): %f\n" % threshold + \ "Type of loss: %s\n" % type_of_loss + \ "Weights ignored: %r\n" % ignore_weights + \ "Means ignored: %r\n" % ignore_means + \ "Variances ignored: %r\n" % ignore_variances + \ "Lissa parameters:\n" + \ " - Scale: %f\n" % scale + \ " - Damping: %.1e\n" % damping + \ " - Recursion depth: %d\n" % recursion_depth stats_file.write(metadata) # Marginalize SPN spn_marg = marginalize(spn, list(range(res**2))) # SPN sanity check ll = log_likelihood(spn, np.array([test_data[t]])) ll_marg = log_likelihood(spn_marg, np.array([test_data[t]])) print("Let t be train sample no. %d, which is:" % t) print(test_data[t]) print("Log-likelihood of t:", ll) print("Likelihood of t:", np.exp(ll)) print("Marginal log-likelihood of t:", ll_marg) print("Marginal likelihood of t:", np.exp(ll_marg)) # Convert the model spn_tensor, _, _ = convert_spn_to_tf_graph(spn, test_data, batch_size=batch_size,
features = ["birthyear", "gender", "party"] co_keys = [ "corona", "covid", "pandem", "vaccin", "Corona", "Covid", "Pandem", "Vaccin", "impf", "Impf", "Maske", "mask", "Lockdown", "infiz", "Infektio" ] fl_keys = [ "Migrat", "Asyl", "Flücht", "Schlepper", "Seenot", "Einwanderung", "asyl", "flücht", "schlepp", "seenot", "einwander" ] is_keys = ["Islamis", "islamis", "Terror", "terror"] keywords = [co_keys] train_data = get_features(memberlist, features, tweet_list, keywords) spn = build_spn(train_data) print(cross_validate(train_data, 5, label=2)) #print(sample_instances(spn, np.array([0, np.nan] * 50).reshape(-1, 2), RandomState(123))) # tweet_scraping(tweet_list, api) ex = np.array([1976., 1., 4., 0.3]).reshape(-1, 4) ex2 = np.array([4., 0.2]).reshape(-1, 2) ds_context = Context( parametric_types=[Gaussian, Categorical, Categorical, Gaussian ]).add_domains(train_data) spn2 = learn_parametric(train_data, ds_context, min_instances_slice=20) spn_marg = marginalize(spn, [2, 3]) ll = log_likelihood(spn, ex) ll2 = log_likelihood(spn2, ex) llm = log_likelihood(spn_marg, ex) print(ll, np.exp(ll)) print(ll2, np.exp(ll2)) print(llm, np.exp(llm))
from spn.io.Graphics import plot_spn from spn.io.Text import to_JSON from spn.algorithms.Marginalization import marginalize from spn.structure.leaves.parametric.Parametric import Categorical from spn.structure.Base import Sum, Product from spn.structure.Base import assign_ids, rebuild_scopes_bottom_up spn = 0.4 * (Categorical(p=[0.2, 0.8], scope=0) * (0.3 * (Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) + 0.7 * (Categorical(p=[0.5, 0.5], scope=1) * Categorical(p=[0.6, 0.4], scope=2)))) \ + 0.6 * (Categorical(p=[0.2, 0.8], scope=0) * Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) spn = marginalize(spn, [1,2]) print(to_JSON(spn) + "\n\n\n") plot_spn(spn, 'spn1.png') spn2 = Product(children=[Categorical(p=[0.5, 0.5], scope=0), Categorical(p=[0.2, 0.8], scope=2)]) print(to_JSON(spn2)) assign_ids(spn2) rebuild_scopes_bottom_up(spn2) plot_spn(spn2, 'basicspn.png') print(to_JSON(spn2)) def getSpn1(): spn = 0.4 * (Categorical(p=[0.2, 0.8], scope=0) * (0.3 * (Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) + 0.7 * (Categorical(p=[0.5, 0.5], scope=1) *