Esempio n. 1
0
def build_genetic_network(parents, allele_freqs, prob_trait_genotype):
    prob_trait_genotype = np.array(prob_trait_genotype)

    variables = {}
    for person in parents.keys():
        v1 = RandomVar(person + '_allele_1', len(allele_freqs))
        v2 = RandomVar(person + '_allele_2', len(allele_freqs))
        v3 = RandomVar(person + '_trait', 2)

        variables[person] = [v1, v2, v3]

    factors = []
    for person in parents.keys():
        v1, v2, v3 = variables[person]

        if parents[person]:
            p1_vars = variables[parents[person][0]]
            p2_vars = variables[parents[person][1]]

            f_allele1 = allele_given_parent_alleles(v1, p1_vars)
            f_allele2 = allele_given_parent_alleles(v2, p2_vars)
        else:
            f_allele1 = CPD([v1], allele_freqs)
            f_allele2 = CPD([v2], allele_freqs)

        f_phenotype = phenotype_given_genotype(variables[person],
                                               prob_trait_genotype)

        factors += [f_allele1, f_allele2, f_phenotype]

    return BayesianNetwork(factors)
Esempio n. 2
0
    def init(self, graph):
        if self.known_cpds is None:
            self.known_cpds = []
        self.alpha = float(self.alpha)

        known = {cpd.scope[0] for cpd in self.known_cpds}
        self.unknown = set(self.scope) - known

        known_cpds = [CPD(cpd.scope, cpd.values) for cpd in self.known_cpds]
        unknown_cpds = []

        self.parents = find_parents(self.scope, graph)
        for v in self.unknown:
            pa_v = sorted(self.parents[v])
            f = Factor([v] + pa_v)

            val_pa_v = product(*(range(pa.k) for pa in pa_v))
            for assg in val_pa_v:
                dist = np.random.dirichlet([self.alpha / len(f.values)] * v.k)

                assg = list(assg)
                for i in range(v.k):
                    f.values[f.atoi([i] + assg)] = dist[i]

            unknown_cpds.append(CPD(f.scope, f.values))

        self.bn = BayesianNetwork(known_cpds + unknown_cpds)
Esempio n. 3
0
def one_predictive_var():
    Y = RandomVar('Y', 2)

    X1 = RandomVar('X1', 2)
    X2 = RandomVar('X2', 2)
    X3 = RandomVar('X3', 2)

    f_X1_Y = CPD([X1, Y], [1.0, 0.0, 0.0, 1.0])
    f_X2_Y = CPD([X2, Y], [0.5, 0.5, 0.5, 0.5])
    f_X3_Y = CPD([X3, Y], [0.5, 0.5, 0.5, 0.5])

    f_Y = CPD([Y], [0.5, 0.5])

    bn = BayesianNetwork([f_Y, f_X1_Y, f_X2_Y, f_X3_Y])

    # Training the model
    fs = ForwardSampler(bn)
    fs.sample(1000)
    scope, X = fs.samples_to_matrix()

    y = X[:, -1]
    X = X[:, 0:-1]

    nb = NaiveBayes()
    nb.fit(X, y)

    # Evaluating the model

    fs = ForwardSampler(bn)
    fs.sample(10)
    _, X = fs.samples_to_matrix()

    print(nb.score(X[:, 0:-1], X[:, -1]))
    print(nb.predict_proba(X[:, 0:-1]))
Esempio n. 4
0
    def optimal_decision_rule(self, scope):
        cf = self.id.chance_factors
        uf = Factor([], [0.0])
        for f in self.id.utility_factors:
            uf += f

        gd = GibbsDistribution(cf + [uf])
        ve = VariableElimination(gd)

        mu = ve.posterior(scope, normalize=False)
        assg_map = [scope.index(v) for v in mu.scope]
        ind = mu.scope.index(scope[0])

        rule = Factor(scope, np.zeros(np.prod([v.k for v in scope])))
        n = int(np.prod([v.k for v in scope[1:]]))

        for i in range(n):
            assg = rule.itoa(i)

            assg_mu = np.array(assg)[assg_map]
            assg_mu[ind] = -1

            assg_max = [mu.argmax(assg_mu)] + list(assg[1:])

            rule.values[rule.atoi(assg_max)] = 1.0

        return CPD(rule.scope, rule.values)
Esempio n. 5
0
def main():
    x1 = RandomVar('X1', 2)
    x2 = RandomVar('X2', 2)
    x3 = RandomVar('X3', 2)

    fx1 = CPD([x1], [0.11, 0.89])
    fx2_x1 = CPD([x2, x1], [0.59, 0.22, 0.41, 0.78])
    fx3_x2 = CPD([x3, x2], [0.39, 0.06, 0.61, 0.94])

    bn = BayesianNetwork([fx1, fx2_x1, fx3_x2])
    #    mn = MarkovNetwork([fx1, fx2_x1, fx3_x2])

    ve = VariableElimination(bn)
    jm = JointMarginalization(bn)

    print(ve.posterior([x1, x2], [(x3, 0)]))
    print(jm.posterior([x1, x2], [(x3, 0)]))

    print(ve.posterior([x1, x2, x3]))
    print(jm.posterior([x1, x2, x3]))

    print(ve.maximum_a_posteriori(evidence=[(x3, 0)]))
    print(jm.maximum_a_posteriori([x1, x2], [(x3, 0)]))

    fs = ForwardSampler(bn)
    fs.sample(10000)

    for c in itertools.product(range(2), repeat=3):
        print('{0}: {1}'.format(c, fs.posterior(zip([x1, x2, x3], c))))

    px3_0 = fs.posterior([(x3, 0)])
    for c in itertools.product(range(2), repeat=2):
        assg = list(zip([x1, x2], c)) + [(x3, 0)]

        print('{0}: {1}'.format(c, fs.posterior(assg) / px3_0))

    gs = GibbsSampler(bn)
    gs.sample(burn_in=1000, n=2000)

    for c in itertools.product(range(2), repeat=3):
        print('{0}: {1}'.format(c, gs.posterior(zip([x1, x2, x3], c))))

    gs.reset()
    gs.sample(burn_in=1000, n=1000, evidence=[(x3, 0)])

    for c in itertools.product(range(2), repeat=2):
        print('{0}: {1}'.format(c, gs.posterior(zip([x1, x2], c))))
Esempio n. 6
0
def three_variables():
    M = RandomVar('Market', 3)
    F = RandomVar('Found', 2)

    uMF = Factor([M, F], [0, -7, 0, 5, 0, 20])

    cM = CPD([M], [0.5, 0.3, 0.2])

    # Alternative decision rules for F
    dF_1 = CPD([F], [1.0, 0])
    dF_2 = CPD([F], [0, 1.0])  # Optimal

    id = InfluenceDiagram([cM], [uMF])
    eu = ExpectedUtility(id)

    print(eu.expected_utility([dF_1]))
    print(eu.expected_utility([dF_2]))

    print(eu.optimal_decision_rule([F]))
Esempio n. 7
0
def earthquake():
    B = RandomVar('B', 2)
    E = RandomVar('E', 2)
    A = RandomVar('A', 2)
    R = RandomVar('R', 2)

    a_be = CPD([A, B, E],
               [0.999, 0.01, 0.01, 0.0001, 0.001, 0.99, 0.99, 0.9999])
    r_e = CPD([R, E], [1.0, 0.0, 0.0, 1.0])
    b = CPD([B], [0.99, 0.01])
    e = CPD([E], [0.999, 0.001])

    bn = BayesianNetwork([a_be, r_e, b, e])

    fs = ForwardSampler(bn)
    fs.sample(1000)
    scope, X = fs.samples_to_matrix()

    graph = bn.graph()
    #    graph = {B : set(), E: set(), A: set(), R: set()}

    score_l = LikelihoodScore(scope).fit(X, graph).score
    print(score_l)
    score_bic = BICScore(scope).fit(X, graph).score
    print(score_bic)
    score_b = BayesianScore(scope).fit(X, graph).score
    print(score_b)

    #    scorer = LikelihoodScore(scope)
    #    scorer = BICScore(scope)
    scorer = BayesianScore(scope)
    best_graph, best_score = restarting_local_search(X,
                                                     scope,
                                                     scorer,
                                                     restarts=1,
                                                     iterations=100,
                                                     epsilon=0.2,
                                                     verbose=1)
    print('Best:')
    print(best_score)
    print(best_graph)
Esempio n. 8
0
def main():
    B = RandomVar('B', 2)
    E = RandomVar('E', 2)
    A = RandomVar('A', 2)
    R = RandomVar('R', 2)

    a_be = CPD([A, B, E],
               [0.999, 0.01, 0.01, 0.0001, 0.001, 0.99, 0.99, 0.9999])
    r_e = CPD([R, E], [1.0, 0.0, 0.0, 1.0])
    b = CPD([B], [0.99, 0.01])
    e = CPD([E], [0.999, 0.001])

    bn = BayesianNetwork([a_be, r_e, b, e])

    ve = VariableElimination(bn)
    jm = JointMarginalization(bn)

    print(ve.posterior([B, E, A, R]) == jm.posterior([B, E, A, R]))

    fs = ForwardSampler(bn)
    fs.sample(1000)
Esempio n. 9
0
def traffic():
    A = RandomVar('A', 2)
    T = RandomVar('T', 2)
    P = RandomVar('P', 2)

    fP = CPD([P], [0.99, 0.01])
    fA = CPD([A], [0.9, 0.1])

    fT_AP = CPD([T, P, A], [0.9, 0.5, 0.4, 0.1, 0.1, 0.5, 0.6, 0.9])

    bn = BayesianNetwork([fP, fA, fT_AP])
    print(bn)

    fs = ForwardSampler(bn)
    fs.sample(1000)
    scope, X = fs.samples_to_matrix()

    mle = MaximumLikelihood(scope)
    print(mle.fit_predict(X, bn.graph()))

    ud = UniformDirichlet(scope, alpha=1.0)
    print(ud.fit_predict(X, bn.graph()))
Esempio n. 10
0
def simple_chain():
    x1 = RandomVar('X1', 2)
    x2 = RandomVar('X2', 2)
    x3 = RandomVar('X3', 2)

    fx1 = CPD([x1], [0.11, 0.89])
    fx2_x1 = CPD([x2, x1], [0.59, 0.22, 0.41, 0.78])
    fx3_x2 = CPD([x3, x2], [0.39, 0.06, 0.61, 0.94])

    bn = BayesianNetwork([fx1, fx2_x1, fx3_x2])
    graph = bn.graph()
    print(bn)

    fs = ForwardSampler(bn)
    fs.sample(1000)
    scope, X = fs.samples_to_matrix()

    mle = MaximumLikelihood(scope)
    print(mle.fit_predict(X, graph))

    ud = UniformDirichlet(scope, alpha=1.0)
    print(ud.fit_predict(X, graph))
Esempio n. 11
0
def simple_chain():
    x1 = RandomVar('X1', 2)
    x2 = RandomVar('X2', 2)
    x3 = RandomVar('X3', 2)

    fx1 = CPD([x1], [0.11, 0.89])
    fx2_x1 = CPD([x2, x1], [0.59, 0.22, 0.41, 0.78])
    fx3_x2 = CPD([x3, x2], [0.39, 0.06, 0.61, 0.94])

    bn = BayesianNetwork([fx1, fx2_x1, fx3_x2])

    fs = ForwardSampler(bn)
    fs.sample(2000)

    scope, X = fs.samples_to_matrix()

    graph = bn.graph()
    #    graph = {x1 : set(), x2: set(), x3: set()}

    score_l = LikelihoodScore(scope).fit(X, graph).score
    print(score_l)
    score_bic = BICScore(scope).fit(X, graph).score
    print(score_bic)
    score_b = BayesianScore(scope).fit(X, graph).score
    print(score_b)

    #    scorer = LikelihoodScore(scope)
    scorer = BICScore(scope)
    #    scorer = BayesianScore(scope)
    best_graph, best_score = restarting_local_search(X,
                                                     scope,
                                                     scorer,
                                                     restarts=5,
                                                     iterations=50,
                                                     epsilon=0.2,
                                                     verbose=1)
    print('Best:')
    print(best_score)
    print(best_graph)
Esempio n. 12
0
def traffic():
    A = RandomVar('A', 2)
    T = RandomVar('T', 2)
    P = RandomVar('P', 2)

    fP = CPD([P], [0.99, 0.01])
    fA = CPD([A], [0.9, 0.1])

    fT_AP = CPD([T, P, A], [0.9, 0.5, 0.4, 0.1, 0.1, 0.5, 0.6, 0.9])

    bn = BayesianNetwork([fP, fA, fT_AP])
    #    print(bn)

    fs = ForwardSampler(bn)
    fs.sample(2000)
    scope, X = fs.samples_to_matrix()

    graph = bn.graph()

    score_l = LikelihoodScore(scope).fit(X, graph).score
    print(score_l)
    score_bic = BICScore(scope).fit(X, graph).score
    print(score_bic)
    score_b = BayesianScore(scope).fit(X, graph).score
    print(score_b)

    #    scorer = LikelihoodScore(scope)
    scorer = BICScore(scope)
    #    scorer = BayesianScore(scope)
    best_graph, best_score = restarting_local_search(X,
                                                     scope,
                                                     scorer,
                                                     restarts=5,
                                                     iterations=50,
                                                     epsilon=0.2,
                                                     verbose=1)
    print('Best:')
    print(best_score)
    print(best_graph)
Esempio n. 13
0
def main():
    A = RandomVar('A', 2)
    T = RandomVar('T', 2)
    P = RandomVar('P', 2)

    fP = CPD([P], [0.99, 0.01])
    fA = CPD([A], [0.9, 0.1])

    fT_AP = CPD([T, P, A], [0.9, 0.5, 0.4, 0.1, 0.1, 0.5, 0.6, 0.9])

    bn = BayesianNetwork([fP, fA, fT_AP])

    ve = VariableElimination(bn)
    jm = JointMarginalization(bn)

    print(jm.maximum_a_posteriori([A], [(T, 1)]))

    print(ve.posterior([A], [(T, 1)]))
    print(jm.posterior([A], [(T, 1)]))

    print(ve.posterior([A, T, P]))
    print(jm.posterior([A, T, P]))
Esempio n. 14
0
def phenotype_given_genotype(variables, prob_trait_genotype):
    v1, v2, v3 = variables

    dims = [v1.k, v2.k, v3.k]
    values = np.zeros(np.prod(dims))

    for i in range(len(values)):
        assg = np.unravel_index(i, dims)

        values[i] = prob_trait_genotype[assg[1], assg[2]]
        if not assg[0]:
            values[i] = 1 - values[i]

    return CPD([v3, v1, v2], values)
Esempio n. 15
0
def allele_given_parent_alleles(allele, p_alleles):
    dims = [allele.k, p_alleles[0].k, p_alleles[1].k]

    values = np.zeros(np.prod(dims))

    for i in range(len(values)):
        assg = np.unravel_index(i, dims)

        if assg[0] == assg[1]:
            values[i] += 0.5
        if assg[0] == assg[2]:
            values[i] += 0.5

    return CPD([allele, p_alleles[0], p_alleles[1]], values)
Esempio n. 16
0
def earthquake():
    B = RandomVar('B', 2)
    E = RandomVar('E', 2)
    A = RandomVar('A', 2)
    R = RandomVar('R', 2)

    a_be = CPD([A, B, E],
               [0.999, 0.01, 0.01, 0.0001, 0.001, 0.99, 0.99, 0.9999])
    r_e = CPD([R, E], [1.0, 0.0, 0.0, 1.0])
    b = CPD([B], [0.99, 0.01])
    e = CPD([E], [0.999, 0.001])

    bn = BayesianNetwork([a_be, r_e, b, e])
    print(bn)

    fs = ForwardSampler(bn)
    fs.sample(1000)
    scope, X = fs.samples_to_matrix()

    mle = MaximumLikelihood(scope)
    print(mle.fit_predict(X, bn.graph()))

    ud = UniformDirichlet(scope, alpha=1.0)
    print(ud.fit_predict(X, bn.graph()))
Esempio n. 17
0
def six_variables():
    M = RandomVar('Market', 3)
    S = RandomVar('Survey', 4)  # S = 3 means no survey

    T = RandomVar('Test', 2)
    F = RandomVar('Found', 2)

    uMF = Factor([M, F], [0, -7, 0, 5, 0, 20])
    uT = Factor([T], [0, -1])

    cM = CPD([M], [0.5, 0.3, 0.2])

    cST = CPD([S, M, T], [
        0.0, 0.6, 0.0, 0.3, 0.0, 0.1, 0.0, 0.3, 0.0, 0.4, 0.0, 0.4, 0.0, 0.1,
        0.0, 0.3, 0.0, 0.5, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0
    ])

    # Alternative decision rules for F given S
    dFS_1 = CPD([F, S], [0, 0, 0, 1, 1, 1, 1, 0])
    dFS_2 = CPD([F, S], [1, 0, 0, 0, 0, 1, 1, 1])  # Optimal

    # Alternative decision rules for T
    dT_1 = CPD([T], [1.0, 0.0])
    dT_2 = CPD([T], [0.0, 1.0])  # Optimal

    id = InfluenceDiagram([cM, cST], [uMF, uT])
    eu = ExpectedUtility(id)

    print(eu.expected_utility([dFS_1, dT_1]))
    print(eu.expected_utility([dFS_1, dT_2]))
    print(eu.expected_utility([dFS_2, dT_1]))
    print(eu.expected_utility([dFS_2, dT_2]))

    # New influence diagram with a single decision rule
    dT = dT_2

    id2 = InfluenceDiagram([cM, cST, dT], [uMF, uT])
    eu2 = ExpectedUtility(id2)

    dFS_optimal = eu2.optimal_decision_rule([F, S])
    print(eu.expected_utility([dFS_optimal, dT]))
Esempio n. 18
0
    def fit(self, X, graph):
        """Find the parameters for a probabilistic graphical model, given a
        graph and a data set that possibly contains missing data.

        After fitting, the model is available as a BayesianNetwork `self.bn`.

        Parameters
        ----------
        X : two-dimensional np.array or python matrix of integers
            Matrix representing the observations. The value `X[i, j]` should
            correspond to the discrete random variable `self.scope[j]` in
            sample element `i`. The number -1 represents a missing value.
        graph: dict from RandomVariables to sets of RandomVariables
            the graph for the probabilistic graphical model
        """
        var_index = {v: i for (i, v) in enumerate(self.scope)}

        best_ll = float('-inf')
        best_bn = None
        for irestart in range(self.n_restarts):
            if self.verbose > 0:
                print('Restart {0}.'.format(irestart + 1))

            self.init(graph)

            known_cpds = [
                CPD(cpd.scope, cpd.values) for cpd in self.known_cpds
            ]

            M_scopes = []
            for v in self.unknown:
                M_scopes.append([v] + sorted(self.parents[v]))

            for iiteration in range(self.n_iterations):
                ess = [Factor(M_scope) for M_scope in M_scopes]

                for x in X:
                    evidence = []
                    hidden = []
                    for (i, xi) in enumerate(x):
                        if xi == -1:
                            hidden.append(self.scope[i])
                        else:
                            evidence.append((self.scope[i], xi))

                    for M in ess:
                        M_assg = x[[var_index[v] for v in M.scope]]

                        M_h = []
                        for (i, v) in enumerate(M.scope):
                            if M_assg[i] == -1:
                                M_h.append(v)

                        if M_h:
                            ve = VariableElimination(self.bn)
                            f = ve.posterior(M_h, evidence=evidence)

                            Mh_index = [M.scope.index(v) for v in f.scope]

                            for i in range(len(f.values)):
                                f_assg = f.itoa(i)
                                M_assg[Mh_index] = f_assg
                                M.values[M.atoi(M_assg)] += f.values[i]
                        else:
                            M.values[M.atoi(M_assg)] += 1

                self.bn = BayesianNetwork([M.to_cpd()
                                           for M in ess] + known_cpds)

                if self.verbose > 1:
                    print('Iteration {0}. '.format(iiteration + 1))
                if self.verbose > 2:
                    ll = self.log_likelihood(X, self.bn)
                    print('Current log-likelihood {0}.'.format(ll))

            ll = self.log_likelihood(X, self.bn)
            print('Final log-likelihood {0}.'.format(ll))
            if ll > best_ll:
                best_ll = ll
                best_bn = self.bn

        self.bn = best_bn

        return self