Example #1
0
    def _compute_class_balance(self, class_balance=None, Y_dev=None):
        # generate class balance of Ys
        Ys_ordered = ['Y_{}'.format(i) for i in range(self.v)]
        cardinalities = [2 for i in range(self.v)]
        if class_balance is not None:
            class_balance = class_balance / sum(class_balance)
            cb = JointProbabilityDistribution(Ys_ordered, cardinalities,
                                              class_balance)
        elif Y_dev is not None:
            Ys_ordered = ['Y_{}'.format(i) for i in range(self.v)]
            vals = {Y: (-1, 1) for Y in Ys_ordered}
            Y_vecs = sorted([[vec_dict[Y] for Y in Ys_ordered]
                             for vec_dict in dict_product(vals)])
            counts = {tuple(Y_vec): 0 for Y_vec in Y_vecs}
            for data_point in Y_dev:
                counts[tuple(data_point)] += 1
            cb = JointProbabilityDistribution(
                Ys_ordered, cardinalities,
                [float(counts[tuple(Y_vec)]) / len(Y_dev) for Y_vec in Y_vecs])
        else:
            num_combinations = 2**self.v
            cb = JointProbabilityDistribution(
                Ys_ordered, cardinalities,
                [1. / num_combinations for i in range(num_combinations)])

        return cb
Example #2
0
 def test_is_imap(self):
     val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032,
            0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128]
     JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val)
     fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val)
     self.assertTrue(self.G1.is_imap(JPD))
     self.assertRaises(TypeError, self.G1.is_imap, fac)
Example #3
0
def jointDistribution(model: BayesianModel) -> JointProbabilityDistribution:
    ''' Returns joint prob distribution over entire network'''

    # There is no reason the cpds must be converted to DiscreteFactors ; can access variables, values, cardinality the same way, but this is how the mini-example in API docs does it. (imap() implementation)
    factors: List[DiscreteFactor] = [
        cpd.to_factor() for cpd in model.get_cpds()
    ]
    jointProbFactor: DiscreteFactor = reduce(mul, factors)

    # TODO need to assert that probabilities sum to 1? Always true? or to normalize here?

    return JointProbabilityDistribution(
        variables=jointProbFactor.variables,
        cardinality=jointProbFactor.cardinality,
        values=jointProbFactor.values)
Example #4
0
from pgmpy.factors.discrete import JointProbabilityDistribution as JPD
import numpy as np

pxy = JPD((['X', 'Y']), [2, 2], np.array([3, 9, 7, 1]) / 20)
pmx = pxy.marginal_distribution(['X'], inplace=False)

print(pxy)
print(pmx)
Example #5
0
# Installed pgmpy if not installed
conda install -c ankurankan pgmpy

#import TabularCPP
from pgmpy.factors.discrete import TabularCPD

# This example shows the performance of top MBA graduates fund manages with diffrent programs
# Where MarketPerformace_0 and Program_0 shows the poor performance and MarketPerformace_1, Program_1 shows the high performance

# Now Create a Joint Probability Distribution
cpd = TabularCPD('MarketPerformance',2,[[0.11,0.06],[0.29,0.54]],evidence=['Program'],evidence_card=[2])
print(cpd)

# Sum of the table shows the Marginal Probability and columns in the table shows the Joint Probability.
# We can also find the Conditional Probability Distribution Object of PGMPy

cpd = TabularCPD('grade', 2,
                        [[0.7, 0.6, 0.6, 0.2],[0.3, 0.4, 0.4, 0.8]],
                        evidence = ['intel', 'diff'], evidence_card = [2, 2])
print(cpd.variables)
print(cpd.cardinality)

# Understanding Joint Probability Distribution Object of PGMPy
from pgmpy.factors.discrete import JointProbabilityDistribution as JPD
prob = JPD(['I','D','G'],[2,2,3],
               [0.126,0.168,0.126,0.009,0.045,0.126,0.252,0.0224,0.0056,0.06,0.036,0.024])
prob.check_independence(['I'], ['D'])
prob.check_independence(['I'], ['D'], [('G', 1)])
prob.get_independencies()
Example #6
0
    def _lambda_pass(self,
                     L_train,
                     lambda_marginals,
                     lambda_moment_vals,
                     lambda_equals_one,
                     lambda_zeros,
                     abstention_probabilities,
                     verbose=False):
        '''
        Make the pass over L_train.
        
        In this pass, we need to:
        * Compute all the joint marginal distributions over multiple lambda's (lambda_marginals)
        * Compute the probabilities that some set of lambda's are all equal to zero (lambda_zeros)
        * Compute all the lambda moments, including conditional moments (lambda_moment_vals)
        * Compute the probability that the product of some lambdas is zero (abstention_probabilities)
        '''

        # do the fast cases first
        easy_marginals = {
            marginal: None
            for marginal in lambda_marginals if len(marginal) == 1
        }
        easy_moments = {
            moment: None
            for moment in lambda_moment_vals
            if type(moment[0]) != type(()) and len(moment) <= 2
        }
        easy_equals_one = {
            factor: None
            for factor in lambda_equals_one
            if type(factor[0]) != type(()) and len(factor) == 1
        }
        easy_zeros = {
            condition: None
            for condition in lambda_zeros if len(condition) == 1
        }
        easy_abstention_probs = {
            factor: None
            for factor in abstention_probabilities if len(factor) == 1
        }

        means = np.einsum('ij->j', L_train) / L_train.shape[0]
        covariance = np.einsum('ij,ik->jk', L_train,
                               L_train) / L_train.shape[0]

        lf_cardinality = 3 if self.allow_abstentions else 2
        lf_values = (-1, 0, 1) if self.allow_abstentions else (-1, 1)
        for marginal in easy_marginals:
            idx = marginal[0]
            counts = [
                np.sum(L_train[:, idx] == val) / L_train.shape[0]
                for val in lf_values
            ]
            easy_marginals[marginal] = JointProbabilityDistribution(
                ['lambda_{}'.format(idx)], [lf_cardinality], counts)

            if marginal in easy_equals_one:
                easy_equals_one[marginal] = counts[-1]
            if marginal in easy_zeros:
                easy_zeros[marginal] = counts[1]
            if marginal in easy_abstention_probs:
                easy_abstention_probs[marginal] = counts[1]
        for moment in easy_moments:
            if len(moment) == 1:
                easy_moments[moment] = means[moment[0]]
            else:
                easy_moments[moment] = covariance[moment[0]][moment[1]]
        for factor in easy_equals_one:
            if easy_equals_one[factor] is None:
                easy_equals_one[factor] = np.sum(
                    L_train[:, factor[0]] == 1) / L_train.shape[0]
        for condition in easy_zeros:
            if easy_zeros[condition] is None:
                idx = condition[0]
                easy_zeros[condition] = np.sum(
                    L_train[:, idx] == 0) / L_train.shape[0]
        for factor in easy_abstention_probs:
            if easy_abstention_probs[factor] is None:
                idx = factor[0]
                easy_abstention_probs[factor] = np.sum(
                    L_train[:, idx] == 0) / L_train.shape[0]

        # time for the remaining cases
        lambda_marginals = {
            key: lambda_marginals[key]
            for key in lambda_marginals if key not in easy_marginals
        }
        lambda_moment_vals = {
            key: lambda_moment_vals[key]
            for key in lambda_moment_vals if key not in easy_moments
        }
        lambda_equals_one = {
            key: lambda_equals_one[key]
            for key in lambda_equals_one if key not in easy_equals_one
        }
        lambda_zeros = {
            key: lambda_zeros[key]
            for key in lambda_zeros if key not in easy_zeros
        }
        abstention_probabilities = {
            key: abstention_probabilities[key]
            for key in abstention_probabilities
            if key not in easy_abstention_probs
        }

        # for the rest, loop through L_train
        if (len(lambda_marginals) > 0 or len(lambda_moment_vals) > 0
                or len(lambda_equals_one) > 0 or len(lambda_zeros) > 0
                or len(abstention_probabilities) > 0):

            # figure out which lambda states we need to keep track of
            lambda_marginal_counts = {}
            lambda_marginal_vecs = {}
            lf_values = (-1, 0, 1) if self.allow_abstentions else (-1, 1)
            for lambda_marginal in lambda_marginals:
                nodes = ['lambda_{}'.format(idx) for idx in lambda_marginal]
                vals = {lf: lf_values for lf in nodes}
                lf_vecs = sorted([[vec_dict[lf] for lf in nodes]
                                  for vec_dict in dict_product(vals)])
                counts = {tuple(lf_vec): 0 for lf_vec in lf_vecs}
                lambda_marginal_vecs[lambda_marginal] = lf_vecs
                lambda_marginal_counts[lambda_marginal] = counts

            lambda_moment_counts = {moment: 0 for moment in lambda_moment_vals}
            lambda_moment_basis = {moment: 0 for moment in lambda_moment_vals}
            lambda_equals_one_counts = {
                factor: 0
                for factor in lambda_equals_one
            }
            lambda_equals_one_basis = {
                factor: 0
                for factor in lambda_equals_one
            }
            lambda_zero_counts = {condition: 0 for condition in lambda_zeros}
            abstention_probability_counts = {
                factor: 0
                for factor in abstention_probabilities
            }

            for data_point in tqdm(L_train) if verbose else L_train:
                for marginal in lambda_marginals:
                    mask = [data_point[idx] for idx in marginal]
                    lambda_marginal_counts[marginal][tuple(mask)] += 1
                for moment in lambda_moment_vals:
                    if type(moment[0]) == type(()):
                        pos_mask = [data_point[idx] for idx in moment[0]]
                        zero_mask = [data_point[idx] for idx in moment[1]]

                        if np.count_nonzero(zero_mask) == 0:
                            lambda_moment_basis[moment] += 1
                        lambda_moment_counts[moment] += np.prod(pos_mask)
                    else:
                        mask = [data_point[idx] for idx in moment]
                        lambda_moment_counts[moment] += np.prod(mask)
                        lambda_moment_basis[moment] += 1
                for factor in lambda_equals_one:
                    if type(factor[0]) == type(()):
                        pos_mask = [data_point[idx] for idx in factor[0]]
                        zero_mask = [data_point[idx] for idx in factor[1]]

                        if np.count_nonzero(zero_mask) == 0:
                            lambda_equals_one_basis[factor] += 1
                            if np.prod(pos_mask) == 1:
                                lambda_equals_one_counts[factor] += 1
                    else:
                        mask = [data_point[idx] for idx in factor]
                        if np.prod(mask) == 1:
                            lambda_equals_one_counts[factor] += 1
                        lambda_equals_one_basis[factor] += 1
                for zero_condition in lambda_zeros:
                    zero_mask = [data_point[idx] for idx in zero_condition]
                    if np.count_nonzero(zero_mask) == 0:
                        lambda_zero_counts[zero_condition] += 1
                for factor in abstention_probability_counts:
                    zero_mask = [data_point[idx] for idx in factor]
                    if np.prod(zero_mask) == 0:
                        abstention_probability_counts[factor] += 1

            lf_cardinality = 3 if self.allow_abstentions else 2
            for marginal in lambda_marginals:
                nodes = ['lambda_{}'.format(idx) for idx in marginal]
                lf_vecs = lambda_marginal_vecs[marginal]
                counts = lambda_marginal_counts[marginal]

                lambda_marginals[marginal] = JointProbabilityDistribution(
                    nodes, [lf_cardinality for node in nodes], [
                        float(counts[tuple(lf_vec)]) / len(L_train)
                        for lf_vec in lf_vecs
                    ])

            for moment in lambda_moment_vals:
                if lambda_moment_basis[moment] == 0:
                    moment_val = 0
                else:
                    moment_val = lambda_moment_counts[
                        moment] / lambda_moment_basis[moment]
                lambda_moment_vals[moment] = moment_val

            for factor in lambda_equals_one:
                if lambda_equals_one_basis[factor] == 0:
                    prob = 0
                else:
                    prob = lambda_equals_one_counts[
                        factor] / lambda_equals_one_basis[factor]
                lambda_equals_one[factor] = prob

            for zero_condition in lambda_zeros:
                lambda_zeros[zero_condition] = lambda_zero_counts[
                    zero_condition] / len(L_train)

            for factor in abstention_probabilities:
                abstention_probabilities[
                    factor] = abstention_probability_counts[factor] / len(
                        L_train)

        # update with the easy values
        lambda_marginals.update(easy_marginals)
        lambda_moment_vals.update(easy_moments)
        lambda_equals_one.update(easy_equals_one)
        lambda_zeros.update(easy_zeros)
        abstention_probabilities.update(easy_abstention_probs)

        return lambda_marginals, lambda_moment_vals, lambda_equals_one, lambda_zeros, abstention_probabilities