예제 #1
0
def Expectation(spn,
                feature_scope,
                evidence_scope,
                evidence,
                node_expectation=_node_expectation):
    """Compute the Expectation:

        E[X_feature_scope | X_evidence_scope] given the spn and the evidence data

    Keyword arguments:
    spn -- the spn to compute the probabilities from
    feature_scope -- set() of integers, the scope of the features to get the expectation from
    evidence_scope -- set() of integers, the scope of the evidence features
    evidence -- numpy 2d array of the evidence data
    """

    if evidence_scope is None:
        evidence_scope = set()

    assert not (len(evidence_scope) > 0 and evidence is None)

    assert len(feature_scope.intersection(evidence_scope)) == 0

    marg_spn = marginalize(spn, keep=feature_scope | evidence_scope)

    def leaf_expectation(node, data, dtype=np.float64, **kwargs):
        if node.scope[0] in feature_scope:
            t_node = type(node)
            if t_node in node_expectation:
                exps = np.zeros((data.shape[0], 1), dtype=dtype)
                exps[:] = node_expectation[t_node](node)
                return exps
            else:
                raise Exception('Node type unknown: ' + str(t_node))

        return likelihood(node, evidence)

    node_expectations = {
        type(leaf): leaf_expectation
        for leaf in get_nodes_by_type(marg_spn, Leaf)
    }
    node_expectations.update({Sum: sum_likelihood, Product: prod_likelihood})

    if evidence is None:
        #fake_evidence is not used
        fake_evidence = np.zeros((1, len(spn.scope))).reshape(1, -1)
        expectation = likelihood(marg_spn,
                                 fake_evidence,
                                 node_likelihood=node_expectations)
        return expectation

    #if we have evidence, we want to compute the conditional expectation
    expectation = likelihood(marg_spn,
                             evidence,
                             node_likelihood=node_expectations)
    expectation = expectation / likelihood(
        marginalize(marg_spn, keep=evidence_scope), evidence)

    return expectation
예제 #2
0
def get_mutual_information_correlation(spn, context):
    categoricals = get_categoricals(spn, context)
    num_features = len(spn.scope)

    correlation_matrix = []

    for x in range(num_features):
        if x not in categoricals:
            correlation_matrix.append(np.full((num_features), np.nan))
        else:
            x_correlation = [np.nan] * num_features
            x_range = context.get_domains_by_scope([x])[0]
            spn_x = marginalize(spn, [x])
            query_x = np.array([[np.nan] * num_features] * len(x_range))
            query_x[:, x] = x_range
            for y in categoricals:
                if x == y:
                    x_correlation[x] = 1
                    continue
                spn_y = marginalize(spn, [y])
                spn_xy = marginalize(spn, [x, y])
                y_range = context.get_domains_by_scope([y])[0]
                query_y = np.array([[np.nan] * num_features] * len(y_range))
                query_y[:, y] = y_range
                query_xy = np.array([[np.nan] * num_features] *
                                    (len(x_range + 1) * (len(y_range + 1))))
                xy = np.mgrid[x_range[0]:x_range[-1]:len(x_range) * 1j,
                              y_range[0]:y_range[-1]:len(y_range) * 1j]
                xy = xy.reshape(2, -1)
                query_xy[:, x] = xy[0, :]
                query_xy[:, y] = xy[1, :]
                results_xy = likelihood(spn_xy, query_xy)
                results_xy = results_xy.reshape(len(x_range), len(y_range))
                results_x = likelihood(spn_x, query_x)
                results_y = likelihood(spn_y, query_y)
                xx, yy = np.mgrid[0:len(x_range) - 1:len(x_range) * 1j,
                                  0:len(y_range) - 1:len(y_range) * 1j]
                xx = xx.astype(int)
                yy = yy.astype(int)

                grid_results_x = results_x[xx]
                grid_results_y = results_y[yy]
                grid_results_xy = results_xy

                log = np.log(
                    grid_results_xy /
                    (np.multiply(grid_results_x, grid_results_y).squeeze()))
                prod = np.prod(np.array([log, grid_results_xy]), axis=0)

                log_x = np.log(results_x)
                log_y = np.log(results_y)

                entropy_x = -1 * np.sum(np.multiply(log_x, results_x))
                entropy_y = -1 * np.sum(np.multiply(log_y, results_y))

                x_correlation[y] = (np.sum(prod) /
                                    np.sqrt(entropy_x * entropy_y))
            correlation_matrix.append(np.array(x_correlation))
    return np.array(correlation_matrix)
예제 #3
0
def to_str():
    spn = create_SPN()
    spn_marg = marginalize()

    from spn.io.Text import spn_to_str_equation

    print(spn_to_str_equation(spn))
    print(spn_to_str_equation(spn_marg))

    spn = create_SPN2()
    spn_marg = marginalize()

    print(spn_to_str_equation(spn))
    print(spn_to_str_equation(spn_marg))
예제 #4
0
def valid():
    spn = create_SPN()
    spn_marg = marginalize()
    from spn.algorithms.Validity import is_valid

    print(is_valid(spn))
    print(is_valid(spn_marg))
예제 #5
0
def Moment(spn, feature_scope, evidence_scope, evidence, node_moment=_node_moment, order=1):
    """Compute the moment:

        E[X_feature_scope | X_evidence_scope] given the spn and the evidence data

    Keyword arguments:
    spn -- the spn to compute the probabilities from
    feature_scope -- set() of integers, the scope of the features to get the moment from
    evidence_scope -- set() of integers, the scope of the evidence features
    evidence -- numpy 2d array of the evidence data
    """
    if evidence_scope is None:
        evidence_scope = set()

    assert not (len(evidence_scope) > 0 and evidence is None)

    assert len(feature_scope.intersection(evidence_scope)) == 0

    marg_spn = marginalize(spn, keep=feature_scope | evidence_scope)

    node_moments = _node_moment
    node_moments.update({Sum: sum_moment,
                              Product: prod_moment})

    if evidence is None:
        # fake_evidence is not used
        fake_evidence = np.zeros((1, len(spn.scope))).reshape(1,-1)
        moment = eval_spn_bottom_up(marg_spn, node_moments, order=order)
        return moment

    # if we have evidence, we want to compute the conditional moment
    else:
        raise NotImplementedError('Please use a conditional SPN to calculated conditional moments')

    return moment
예제 #6
0
def plot_density(spn, data):
    import matplotlib.pyplot as plt
    import numpy as np

    x_max = data[:, 0].max()
    x_min = data[:, 0].min()
    y_max = data[:, 1].max()
    y_min = data[:, 1].min()

    nbinsx = int(x_max - x_min) / 1
    nbinsy = int(y_max - y_min) / 1
    xi, yi = np.mgrid[x_min:x_max:nbinsx * 1j, y_min:y_max:nbinsy * 1j]

    spn_input = np.vstack([xi.flatten(), yi.flatten()]).T

    marg_spn = marginalize(spn, set([0, 1]))

    zill = likelihood(marg_spn, spn_input)

    z = zill.reshape(xi.shape)

    # Make the plot
    # plt.pcolormesh(xi, yi, z)

    plt.imshow(z + 1,
               extent=(x_min, x_max, y_min, y_max),
               cmap=cm.hot,
               norm=PowerNorm(gamma=1. / 5.))
    # plt.pcolormesh(xi, yi, z)
    plt.colorbar()
    plt.show()
예제 #7
0
def marginalize():
    spn = create_SPN()

    from spn.algorithms.Marginalization import marginalize

    spn_marg = marginalize(spn, [1, 2])

    return spn_marg
예제 #8
0
def plot():
    spn = create_SPN()
    spn_marg = marginalize()

    from spn.io.Graphics import plot_spn

    plot_spn(spn, "basicspn.png")
    plot_spn(spn_marg, "marginalspn.png")
예제 #9
0
def categorical_nodes_description(spn, context):
    categoricals = get_categoricals(spn, context)
    num_features = len(spn.scope)
    total_analysis = {}
    for cat in categoricals:
        marg_total = marginalize(spn, [cat])
        categorical_probabilities = []
        for i, n in enumerate(spn.children):
            node_weight = np.log(spn.weights[i])
            node_probabilities = []
            for cat_instance in context.get_domains_by_scope([cat])[0]:
                marg = marginalize(n, [cat])
                query = np.zeros((1, num_features))
                query[:, :] = np.nan
                query[:, cat] = cat_instance
                proba = np.exp(
                    log_likelihood(marg, query) + node_weight -
                    log_likelihood(marg_total, query)).reshape(-1)
                node_probabilities.append(proba)
            categorical_probabilities.append(np.array(node_probabilities))
        total_analysis[cat] = np.sum(np.array(categorical_probabilities),
                                     axis=2)

    node_categoricals = {}
    for cat in categoricals:
        node_categoricals[cat] = {}
        node_categoricals[cat]['contrib'] = []
        node_categoricals[cat]['explained'] = []
        for cat_instance in [
                int(c) for c in context.get_domains_by_scope([cat])[0]
        ]:
            probs = total_analysis[cat]
            # TODO: That threshold needs some evidence or theoretical grounding
            contrib_nodes = np.where(probs[:, cat_instance] /
                                     (np.sum(probs, axis=1)) > 0.4)
            explained_probs = np.sum(probs[contrib_nodes], axis=0)
            node_categoricals[cat]['contrib'].append(contrib_nodes)
            node_categoricals[cat]['explained'].append(explained_probs)
    return node_categoricals, total_analysis
예제 #10
0
파일: Moments.py 프로젝트: hedgefair/SPFlow
def Moment(spn,
           feature_scope=None,
           node_moment=_node_moment,
           node_likelihoods=_node_likelihood,
           order=1):
    """
    Computes moments from an spn
    :param spn: a valid spn
    :param feature_scope: optional list of features on which to compute the moments
    :param node_moment: optional list of node moment functions
    :param node_likelihoods: optional list of node likelihood functions
    :param order: the order of the moment to compute
    :return: an np array of computed moments
    """

    if feature_scope is None:
        feature_scope = spn.scope
    feature_scope = list(feature_scope)

    assert len(feature_scope) == len(
        list(feature_scope)), "Found double entries in feature list"

    marg_spn = marginalize(spn, feature_scope)

    node_moments = {Sum: sum_moment, Product: prod_moment}

    for node in get_node_types(marg_spn, Leaf):
        try:
            moment = node_moment[node]
            node_ll = node_likelihoods[node]
        except KeyError:
            raise AssertionError(
                "Node type {} doe not have associated moment and likelihoods".
                format(node))
        node_moments[node] = leaf_moment(moment, node_ll)

    results = np.full((1, max(spn.scope) + 1), np.nan)

    moment = eval_spn_bottom_up(marg_spn,
                                node_moments,
                                order=order,
                                result_array=results)
    return moment[:, feature_scope]
예제 #11
0
def inference():
    import numpy as np

    spn = create_SPN()
    spn_marg = marginalize()

    test_data = np.array([1.0, 0.0, 1.0]).reshape(-1, 3)

    from spn.algorithms.Inference import log_likelihood

    ll = log_likelihood(spn, test_data)
    print("python ll", ll, np.exp(ll))

    llm = log_likelihood(spn_marg, test_data)
    print("python ll spn_marg", llm, np.exp(llm))

    test_data2 = np.array([np.nan, 0.0, 1.0]).reshape(-1, 3)
    llom = log_likelihood(spn, test_data2)
    print("python ll spn with nan", llom, np.exp(llom))
예제 #12
0
    plt.subplots(figsize=(5, 5))
    plt.scatter(correct_preds[:, 0], correct_preds[:, 1], label="Correctly predicted", c="darkgray", s=10)
    plt.scatter(wrong_preds[:, 0], wrong_preds[:, 1], label="Wrongly predicted", c="darkred", s=10)
    plt.xlabel('x')
    plt.ylabel('y')
    axes = plt.gca()
    axes.set_xlim([0, 128])
    axes.set_ylim([0, 128])
    plt.legend()
    plt.axis('equal')
    plt.title('Test Data Prediction')
    plt.savefig(plot_path + "/test-pred.pdf")
    plt.show()

    # Plot decision boundaries
    spn = marginalize(spn, [0, 1])
    likelihoods = likelihood(spn, test_data).reshape((num_test_samples_sqrt, num_test_samples_sqrt)) * 100000
    plot_decision_boundaries(likelihoods, pred_test_labels, num_test_samples_sqrt, plot_path)

    # Convert the model
    spn_tensor, data_placeholder, variable_dict = convert_spn_to_tf_graph(
        spn,
        test_data,
        batch_size=batch_size,
        dtype=np.float32
    )

    # Export the model
    root = tf.identity(spn_tensor, name="Root")
    export_dir = export_model(root_dir=output_path, export_dir="/spns/tf_" + spn_name, force_overwrite=True)
    print("Successfully exported SPN tensor to \"%s\"." % export_dir)
예제 #13
0
# We will start with the Sum-Product Network structure from the
# :ref:`composing_spn_object_hierarchy` example.

p0 = Product(children=[Categorical(p=[0.3, 0.7], scope=1), Categorical(p=[0.4, 0.6], scope=2)])
p1 = Product(children=[Categorical(p=[0.5, 0.5], scope=1), Categorical(p=[0.6, 0.4], scope=2)])
s1 = Sum(weights=[0.3, 0.7], children=[p0, p1])
p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1])
p3 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), Categorical(p=[0.3, 0.7], scope=1)])
p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)])
spn = Sum(weights=[0.4, 0.6], children=[p2, p4])

assign_ids(spn)
rebuild_scopes_bottom_up(spn)

ax = draw_spn(spn)

# %%
# If we want to marginalize this SPN by summing out all other variables
# to leave variables 1 and 2, we can do this as follows:

from spn.algorithms.Marginalization import marginalize

spn_marg = marginalize(spn, [1, 2])

# %%
# This marginalizes all the variables *not* in :math:`[1, 2]`, and create a
# *new* structure that knows nothing about the previous one nor about the
# variable 0.

draw_spn(spn_marg)
예제 #14
0
 def _marginalizeout(self, keep, remove):
     self._marginalized = self._marginalized.union(remove)
     self._spn = marginalize(
         self._spn, [self._initial_names_to_index[x] for x in keep])
     return self._unbound_updater,
예제 #15
0
    # Plot likelihoods
    plot_likelihoods(spn=spn,
                     classes=np.sort(np.unique(test_labels)),
                     res=plot_res,
                     plot_pdf=pdf)

    # Plot likelihoods with test sample
    plot_likelihoods(spn=spn,
                     classes=np.sort(np.unique(test_labels)),
                     res=plot_res,
                     plot_pdf=pdf,
                     test_sample=test_samples[t])

    # Marginalize SPN
    spn_marg = marginalize(spn, [0, 1])

    # Plot decision boundaries
    plot_decision_boundaries(spn=spn,
                             marg_spn=spn_marg,
                             classes=np.sort(np.unique(test_labels)),
                             res=plot_res,
                             plot_pdf=pdf)

    # Plot decision boundaries with test sample
    plot_decision_boundaries(spn=spn,
                             marg_spn=spn_marg,
                             classes=np.sort(np.unique(test_labels)),
                             res=plot_res,
                             plot_pdf=pdf,
                             test_sample=test_samples[t])
예제 #16
0
if __name__ == "__main__":
    ds_name, data, train, test, words, statistical_type, distribution_family = get_RL_data(
    )

    ds_context = Context()
    ds_context.statistical_type = statistical_type
    ds_context.distribution_family = distribution_family
    add_domains(data, ds_context)

    spn = learn(train, ds_context, min_instances_slice=100, linear=True)

    print(get_structure_stats(spn))

    # print(to_str_ref_graph(spn, histogram_to_str))

    spn_marg = marginalize(spn, set([0]))

    # print(to_str_equation(spn_marg, histogram_to_str))


    def eval_conditional(data):
        return conditional_log_likelihood(spn, spn_marg, data,
                                          histogram_likelihood)

    print(eval_conditional(train[0, :].reshape(1, -1)))

    import dill

    dill.settings["recurse"] = True

    g = dill.dump(eval_conditional, open("conditional.bin", "w+b"))
예제 #17
0
        metadata = "\nSeed: %d\n" % seed + \
                   "Test sample ID: %d\n" % t + \
                   "Minimum instances per slice: %d\n" % min_instances_slice + \
                   "Alpha (threshold): %f\n" % threshold + \
                   "Type of loss: %s\n" % type_of_loss + \
                   "Weights ignored: %r\n" % ignore_weights + \
                   "Means ignored: %r\n" % ignore_means + \
                   "Variances ignored: %r\n" % ignore_variances + \
                   "Lissa parameters:\n" + \
                   "   - Scale: %f\n" % scale + \
                   "   - Damping: %.1e\n" % damping + \
                   "   - Recursion depth: %d\n" % recursion_depth
        stats_file.write(metadata)

        # Marginalize SPN
        spn_marg = marginalize(spn, list(range(res**2)))

        # SPN sanity check
        ll = log_likelihood(spn, np.array([test_data[t]]))
        ll_marg = log_likelihood(spn_marg, np.array([test_data[t]]))
        print("Let t be train sample no. %d, which is:" % t)
        print(test_data[t])
        print("Log-likelihood of t:", ll)
        print("Likelihood of t:", np.exp(ll))
        print("Marginal log-likelihood of t:", ll_marg)
        print("Marginal likelihood of t:", np.exp(ll_marg))

        # Convert the model
        spn_tensor, _, _ = convert_spn_to_tf_graph(spn,
                                                   test_data,
                                                   batch_size=batch_size,
예제 #18
0
    features = ["birthyear", "gender", "party"]
    co_keys = [
        "corona", "covid", "pandem", "vaccin", "Corona", "Covid", "Pandem",
        "Vaccin", "impf", "Impf", "Maske", "mask", "Lockdown", "infiz",
        "Infektio"
    ]
    fl_keys = [
        "Migrat", "Asyl", "Flücht", "Schlepper", "Seenot", "Einwanderung",
        "asyl", "flücht", "schlepp", "seenot", "einwander"
    ]
    is_keys = ["Islamis", "islamis", "Terror", "terror"]
    keywords = [co_keys]
    train_data = get_features(memberlist, features, tweet_list, keywords)
    spn = build_spn(train_data)
    print(cross_validate(train_data, 5, label=2))
    #print(sample_instances(spn, np.array([0, np.nan] * 50).reshape(-1, 2), RandomState(123)))
    # tweet_scraping(tweet_list, api)
    ex = np.array([1976., 1., 4., 0.3]).reshape(-1, 4)
    ex2 = np.array([4., 0.2]).reshape(-1, 2)
    ds_context = Context(
        parametric_types=[Gaussian, Categorical, Categorical, Gaussian
                          ]).add_domains(train_data)
    spn2 = learn_parametric(train_data, ds_context, min_instances_slice=20)

    spn_marg = marginalize(spn, [2, 3])
    ll = log_likelihood(spn, ex)
    ll2 = log_likelihood(spn2, ex)
    llm = log_likelihood(spn_marg, ex)
    print(ll, np.exp(ll))
    print(ll2, np.exp(ll2))
    print(llm, np.exp(llm))
예제 #19
0
from spn.io.Graphics import plot_spn
from spn.io.Text import to_JSON
from spn.algorithms.Marginalization import marginalize
from spn.structure.leaves.parametric.Parametric import Categorical
from spn.structure.Base import Sum, Product
from spn.structure.Base import assign_ids, rebuild_scopes_bottom_up

spn = 0.4 * (Categorical(p=[0.2, 0.8], scope=0) *
             (0.3 * (Categorical(p=[0.3, 0.7], scope=1) *
                     Categorical(p=[0.4, 0.6], scope=2))
            + 0.7 * (Categorical(p=[0.5, 0.5], scope=1) *
                     Categorical(p=[0.6, 0.4], scope=2)))) \
    + 0.6 * (Categorical(p=[0.2, 0.8], scope=0) *
             Categorical(p=[0.3, 0.7], scope=1) *
             Categorical(p=[0.4, 0.6], scope=2))
spn = marginalize(spn, [1,2])
print(to_JSON(spn) + "\n\n\n")
plot_spn(spn, 'spn1.png')

spn2 = Product(children=[Categorical(p=[0.5, 0.5], scope=0), Categorical(p=[0.2, 0.8], scope=2)])
print(to_JSON(spn2))
assign_ids(spn2)
rebuild_scopes_bottom_up(spn2)
plot_spn(spn2, 'basicspn.png')
print(to_JSON(spn2))

def getSpn1():
    spn = 0.4 * (Categorical(p=[0.2, 0.8], scope=0) *
             (0.3 * (Categorical(p=[0.3, 0.7], scope=1) *
                     Categorical(p=[0.4, 0.6], scope=2))
            + 0.7 * (Categorical(p=[0.5, 0.5], scope=1) *