Exemple #1
0
def likelihood(node,
               data,
               dtype=np.float64,
               node_likelihood=_node_likelihood,
               lls_matrix=None,
               debug=False):
    assert len(data.shape) == 2, "data must be 2D, found: {}".format(
        data.shape)

    all_results = {}

    vf = None
    if debug:

        def val_funct(node, ll):
            assert ll.shape == (
                data.shape[0],
                1), "node %s result has to match dimensions (N,1)" % (node.id)
            assert not np.all(np.isnan(ll)), "ll is nan %s " % (node.id)

        vf = val_funct

    result = eval_spn_bottom_up(node,
                                node_likelihood,
                                all_results=all_results,
                                input_vals=data,
                                after_eval_function=vf,
                                debug=debug,
                                dtype=dtype)

    if lls_matrix is not None:
        for n, ll in all_results.items():
            lls_matrix[:, n.id] = ll[:, 0]

    return result
Exemple #2
0
def Moment(spn, feature_scope, evidence_scope, evidence, node_moment=_node_moment, order=1):
    """Compute the moment:

        E[X_feature_scope | X_evidence_scope] given the spn and the evidence data

    Keyword arguments:
    spn -- the spn to compute the probabilities from
    feature_scope -- set() of integers, the scope of the features to get the moment from
    evidence_scope -- set() of integers, the scope of the evidence features
    evidence -- numpy 2d array of the evidence data
    """
    if evidence_scope is None:
        evidence_scope = set()

    assert not (len(evidence_scope) > 0 and evidence is None)

    assert len(feature_scope.intersection(evidence_scope)) == 0

    marg_spn = marginalize(spn, keep=feature_scope | evidence_scope)

    node_moments = _node_moment
    node_moments.update({Sum: sum_moment,
                              Product: prod_moment})

    if evidence is None:
        # fake_evidence is not used
        fake_evidence = np.zeros((1, len(spn.scope))).reshape(1,-1)
        moment = eval_spn_bottom_up(marg_spn, node_moments, order=order)
        return moment

    # if we have evidence, we want to compute the conditional moment
    else:
        raise NotImplementedError('Please use a conditional SPN to calculated conditional moments')

    return moment
Exemple #3
0
def likelihood(node, data, dtype=np.float64, node_likelihood=_node_likelihood, lls_matrix=None, debug=False, **kwargs):
    all_results = {}

    if debug:
        assert len(data.shape) == 2, "data must be 2D, found: {}".format(data.shape)
        original_node_likelihood = node_likelihood

        def exec_funct(node, *args, **kwargs):
            assert node is not None, "node is nan "
            funct = original_node_likelihood[type(node)]
            ll = funct(node, *args, **kwargs)
            assert ll.shape == (data.shape[0], 1), "node %s result has to match dimensions (N,1)" % node.id
            assert not np.any(np.isnan(ll)), "ll is nan %s " % node.id
            return ll

        node_likelihood = {k: exec_funct for k in node_likelihood.keys()}

    result = eval_spn_bottom_up(node, node_likelihood, all_results=all_results, debug=debug, dtype=dtype, data=data,
                                **kwargs)

    if lls_matrix is not None:
        for n, ll in all_results.items():
            lls_matrix[:, n.id] = ll[:, 0]

    return result
Exemple #4
0
def spn_to_sympy(spn, node_to_sympy=_node_to_sympy, log=False):
    input_vars = sp.symbols("x:%s" % len(spn.scope))

    sympy_ecc = eval_spn_bottom_up(spn,
                                   node_to_sympy,
                                   input_vars=input_vars,
                                   log=log)

    return sympy_ecc
Exemple #5
0
def condition(spn, evidence):
    scope = set(
        [i for i in range(len(spn.scope)) if not np.isnan(evidence)[0][i]])
    node_conditions = {
        type(leaf): leaf_condition
        for leaf in get_nodes_by_type(spn, Leaf)
    }
    node_conditions.update({Sum: sum_condition, Product: prod_condition})

    new_root, val = eval_spn_bottom_up(spn,
                                       node_conditions,
                                       input_vals=evidence,
                                       scope=scope)
    assign_ids(new_root)
    return Prune(new_root)
Exemple #6
0
def spn_to_tf_graph(node,
                    data,
                    node_tf_graph=_node_log_tf_graph,
                    log_space=True):
    tf.reset_default_graph()
    # data is a placeholder, with shape same as numpy data
    data_placeholder = tf.placeholder(data.dtype, (None, data.shape[1]))
    variable_dict = {}
    tf_graph = eval_spn_bottom_up(node,
                                  node_tf_graph,
                                  input_vals=data_placeholder,
                                  log_space=log_space,
                                  variable_dict=variable_dict,
                                  dtype=data.dtype)
    return tf_graph, data_placeholder, variable_dict
Exemple #7
0
def gradient(spn, evidence):
    """
    Computes a forward propagated gradient through the spn. This function
    currently assumes a tree structured SPN!

    :param spn:
    :param evidence:
    :return:
    """
    _node_gradients[Sum] = sum_gradient_forward
    _node_gradients[Product] = prod_gradient_forward

    node_gradients = _node_gradients

    gradients = eval_spn_bottom_up(spn, node_gradients, input_vals=evidence)
    return gradients
def group_by_combinations(spn, ds_context, feature_scope, ranges, node_distinct_vals=None, node_likelihoods=None):
    """
    Computes the distinct value combinations for features given the range conditions.
    """
    evidence_scope = set([i for i, r in enumerate(ranges[0]) if r is not None])
    evidence = ranges

    # make feature scope sorted
    feature_scope_unsorted = copy.copy(feature_scope)
    feature_scope.sort()
    # add range conditions to feature scope (makes checking with bloom filters easier)
    feature_scope = list(set(feature_scope)
                         .union(evidence_scope.intersection(np.where(ds_context.no_unique_values <= 1200)[0])))
    feature_scope.sort()
    inverted_order = [feature_scope.index(scope) for scope in feature_scope_unsorted]

    assert not (len(evidence_scope) > 0 and evidence is None)

    relevant_scope = set()
    relevant_scope.update(evidence_scope)
    relevant_scope.update(feature_scope)
    marg_spn = marginalize(spn, relevant_scope)

    def leaf_expectation(node, data, dtype=np.float64, **kwargs):

        if node.scope[0] in feature_scope:
            t_node = type(node)
            if t_node in node_distinct_vals:
                vals = node_distinct_vals[t_node](node, evidence)
                return vals
            else:
                raise Exception('Node type unknown: ' + str(t_node))

        return likelihood(node, evidence, node_likelihood=node_likelihoods)

    node_expectations = {type(leaf): leaf_expectation for leaf in get_nodes_by_type(marg_spn, Leaf)}
    node_expectations.update({Sum: sum_group_by, Product: prod_group_by})

    result = eval_spn_bottom_up(marg_spn, node_expectations, all_results={}, data=evidence, dtype=np.float64)
    if feature_scope_unsorted == feature_scope:
        return result
    scope, grouped_tuples = result
    return feature_scope_unsorted, set(
        [tuple(group_tuple[i] for i in inverted_order) for group_tuple in grouped_tuples])
Exemple #9
0
def Moment(spn,
           feature_scope=None,
           node_moment=_node_moment,
           node_likelihoods=_node_likelihood,
           order=1):
    """
    Computes moments from an spn
    :param spn: a valid spn
    :param feature_scope: optional list of features on which to compute the moments
    :param node_moment: optional list of node moment functions
    :param node_likelihoods: optional list of node likelihood functions
    :param order: the order of the moment to compute
    :return: an np array of computed moments
    """

    if feature_scope is None:
        feature_scope = spn.scope
    feature_scope = list(feature_scope)

    assert len(feature_scope) == len(
        list(feature_scope)), "Found double entries in feature list"

    marg_spn = marginalize(spn, feature_scope)

    node_moments = {Sum: sum_moment, Product: prod_moment}

    for node in get_node_types(marg_spn, Leaf):
        try:
            moment = node_moment[node]
            node_ll = node_likelihoods[node]
        except KeyError:
            raise AssertionError(
                "Node type {} doe not have associated moment and likelihoods".
                format(node))
        node_moments[node] = leaf_moment(moment, node_ll)

    results = np.full((1, max(spn.scope) + 1), np.nan)

    moment = eval_spn_bottom_up(marg_spn,
                                node_moments,
                                order=order,
                                result_array=results)
    return moment[:, feature_scope]
Exemple #10
0
def spn_to_tf_graph(node,
                    data,
                    batch_size=None,
                    node_tf_graph=_node_log_tf_graph,
                    log_space=True,
                    dtype=None):
    tf.reset_default_graph()
    if not dtype:
        dtype = data.dtype
    # data is a placeholder, with shape same as numpy data
    data_placeholder = tf.placeholder(dtype, (batch_size, data.shape[1]))
    variable_dict = {}
    tf_graph = eval_spn_bottom_up(node,
                                  node_tf_graph,
                                  data_placeholder=data_placeholder,
                                  log_space=log_space,
                                  variable_dict=variable_dict,
                                  dtype=dtype)
    return tf_graph, data_placeholder, variable_dict
Exemple #11
0
def likelihood(node,
               data,
               dtype=np.float64,
               node_likelihood=_node_likelihood,
               lls_matrix=None,
               debug=False,
               bmarg=None,
               ibm=None):
    assert len(data.shape) == 2, "data must be 2D, found: {}".format(
        data.shape)

    all_results = {}

    if debug:
        node_likelihood_with_validation = {}
        for k, funct in node_likelihood.items():

            def exec_funct(node, children, data=None, dtype=np.float64):
                ll = funct(node, children, data=data, dtype=dtype)
                assert ll.shape == (
                    data.shape[0], 1
                ), "node %s result has to match dimensions (N,1)" % (node.id)
                assert not np.all(np.isnan(ll)), "ll is nan %s " % (node.id)
                return ll

            node_likelihood_with_validation[k] = exec_funct

        node_likelihood = node_likelihood_with_validation

    result = eval_spn_bottom_up(node,
                                node_likelihood,
                                all_results=all_results,
                                debug=debug,
                                dtype=dtype,
                                data=data,
                                bmarg=bmarg,
                                ibm=ibm)

    if lls_matrix is not None:
        for n, ll in all_results.items():
            lls_matrix[:, n.id] = ll[:, 0]

    return result
Exemple #12
0
def joined_means(spn):
    """Compute the joined mean:

        E[XY]

    TODO: Currently, only unconditional correlatios are implemented

    Keyword arguments:
    spn -- the spn to compute the probabilities from
    """

    node_functions = {
        type(leaf): node_correlation
        for leaf in get_nodes_by_type(spn, Leaf)
    }
    node_functions.update({Sum: sum_correlation, Product: prod_correlation})

    expectation = eval_spn_bottom_up(spn,
                                     node_functions,
                                     full_scope=len(spn.scope))
    return expectation