def test_complete_2():
    v1 = Rvar.Rvar(1, 2)
    v2 = Rvar.Rvar(2, 2)
    v3 = Rvar.Rvar(3, 2)
    v4 = Rvar.Rvar(4, 3)
    v5 = Rvar.Rvar(5, 2)
        
    d = Factor.Factor([v1])
    d.fill_values([0.6, 0.4])
    i = Factor.Factor([v2])
    i.fill_values([0.7, 0.3]);
    s = Factor.Factor([v3, v2])
    s.fill_values([0.95, 0.05, 0.2, 0.8]);
    g = Factor.Factor([v4, v1, v2])
    g.fill_values([0.3, 0.4, 0.3, 0.05, 0.25, 0.7, 0.9, 0.08, 0.02, 0.5, 0.3, 0.2]);
    l = Factor.Factor([v5, v4])
    l.fill_values([0.1, 0.9, 0.4, 0.6, 0.99, 0.01]);
    
    s = FactorOperations.observe(s, {v3:1}) # we observe high SAT
    
    factors = [i, s, g, l]
    a = d
    for f in factors:
        a = FactorOperations.multiply(a, f)
        print a.variables, a.values.size
    
    rvars = [v1, v3, v4, v5]
    for v in rvars:
        a = FactorOperations.marginalize(a, v)
        print a.variables, a.values.size
    
    assert np.allclose(a.values, [0.12727273, 0.87272727])
Exemple #2
0
    def mssg(self, from_v, to_w, isMax=False):
        # collect all mssg arriving at v
        mess = []
        neighbors = self.adj[from_v]
        for n in neighbors:
            if n!=to_w:
                pos = self.adj[n].index(from_v)
                msg = self.delta[n][pos]
                mess.append(msg)

        # take the the initial Psi (and log if needed)
        d = copy.copy(self.factors[from_v])
        if isMax==True:
            d.values = np.log(d.values)

        # multiply/sum by incoming messages
        for ms in mess:
            if isMax==True:
                d = FactorOperations.sum(d, ms, False)
            else:
                d = FactorOperations.multiply(d, ms, True)

        # marginalized to setsep vars
        for n in d.variables:
            if n not in (self.box[from_v] & self.box[to_w]):
                if isMax==True:
                    d = FactorOperations.max_marginalize(d, n)
                else:
                    d = FactorOperations.marginalize(d, n)
        return d
def test_multiply_dimensions():
    c = FactorOperations.multiply(f['a'], f['b'])
    assert c.variables == [v[1], v[2]]
    assert c.cards == [2, 2]
    
    c = FactorOperations.multiply(f['x'], f['y'])
    assert c.variables == [v[2], v[3], v[4]]
    assert c.cards == [2, 2, 3]
def test_marginalize():
    c = FactorOperations.marginalize(f['b'], v[2])
    assert np.allclose(c.values, [1., 1.], atol=epsilon)
    
    c = FactorOperations.marginalize(f['z'], v[2])
    sol = Factor.Factor([v[4], v[3]])
    sol.fill_values([0.33, 0.05, 0.24, 0.51, 0.07, 0.39])
    assert np.allclose(c.values, sol.values.T, atol=epsilon)
Exemple #5
0
def build_joint_cpd():
    a = None
    for k in GN.keys():
        b = FactorOperations.multiply(GN[k]['factor_geno'], GN[k]['factor_pheno'])
        if a==None:
            a = b
        else:
            a = FactorOperations.multiply(a, b)
    return a
Exemple #6
0
    def eliminateVar(self, F, C, z):
        # separate factors into two lists, those that use z (F_Cluster) and the rest
        F_Cluster, F_Rest, Cluster_vars = [], [], []
        for f in F.factors:
            if z in f.variables:
                F_Cluster += [f]
                Cluster_vars += f.variables
            else:
                F_Rest += [f]
        
        if F_Cluster!=[]:
            Cluster_vars = tuple(sorted(set(Cluster_vars)))

            # when computing tau of new node, check if it uses other nodes' taus
            rows,cols = C['edges'].shape
            C['edges'] = np.vstack([C['edges'], np.zeros((1,cols))])
            C['edges'] = np.hstack([C['edges'], np.zeros((rows+1,1))])
            pos = np.zeros(cols+1)
            for n,node in enumerate(C['nodes']):
                if node['tau'] in F_Cluster:
                    pos[n]=1
            # create a new array of connecting node edges based on taus in common
            C['edges'][-1,:] = pos
            C['edges'][:,-1] = pos
            
            # multiply the factors in Cluster... (lambda) ...and marginalize by z (tau)
            tau = F_Cluster.pop(0)
            for f in F_Cluster:
                tau = FactorOperations.multiply(tau, f)
            if tau.variables != [z]:
                tau = FactorOperations.marginalize(tau, z)
            
            # add to unused factor list the resulting tau ==> new factor list with var eliminated
            F_Rest += [tau]
            
            # update the edges (connect all vars inside new cluster, & disconnect the eliminated variable)
            for vi in Cluster_vars:
                for vj in Cluster_vars:
                    F.edges[F.allVars.index(vi), F.allVars.index(vj)] = 1
            F.edges[F.allVars.index(z),:] = 0
            F.edges[:, F.allVars.index(z)] = 0
            
            C['nodes'] += [{'vars':Cluster_vars, 'tau':tau}]
            
            F.factors = F_Rest
        return [F, C]
Exemple #7
0
 def calibrate(self, isMax=False):
     self.beta = [None]*self.V
     # compute messages
     for e in self.computePath():
         from_v, to_w = e
         pos_to = self.adj[from_v].index(to_w)
         self.delta[from_v][pos_to] = self.mssg(from_v, to_w, isMax)
     
     # compute the beliefs
     for v in range(self.V):
         belief = copy.copy(self.factors[v])
         if isMax==True:
             belief.values = np.log(belief.values)
         for w in self.adj[v]:
             pos = self.adj[w].index(v)
             delta = self.delta[w][pos]
             if isMax==True:
                 belief = FactorOperations.sum(belief, delta, False)
             else:
                 belief = FactorOperations.multiply(belief, delta, False)
         self.beta[v] = belief
Exemple #8
0
 def eliminateVar(self, F, z):
     # separate factors into two lists, those that use z (F_Cluster) and the rest
     F_Cluster, F_Rest, Cluster_vars = [], [], []
     for f in F.factors:
         if z in f.variables:
             F_Cluster += [f]
             Cluster_vars += f.variables
         else:
             F_Rest += [f]
     
     if F_Cluster!=[]:
         # add a node to clique tree with the variables involved
         position = self.V
         self.V += 1
         self.box.insert(position, set(Cluster_vars))
         self.adj.insert(position, [])
         
         # when computing tau of new node, check if it uses other nodes' taus and connect
         for i in range(position):
             if self.tau[i] in F_Cluster:
                 self.addEdge(i, position)
         
         # multiply the factors in Cluster... (lambda) ...and marginalize by z (tau)
         tau = F_Cluster.pop(0)
         for f in F_Cluster:
             tau = FactorOperations.multiply(tau, f, False)
         if tau.variables != [z]:
             tau = FactorOperations.marginalize(tau, z)
         self.tau.insert(position, tau)
         
         # update the edges of F (connect all vars inside new factor, & disconnect the eliminated variable)
         F.connectAll([F.index_var(v) for v in self.box[position]])
         F.adj[F.index_var(z)] = []
         
         # add to unused factor list the resulting tau ==> new factor list with var eliminated
         F_Rest += [tau]            
         F.factors = F_Rest
     return F
def test_multiply_values2():
    v_1 = Rvar.Rvar(1, 3)
    v_2 = Rvar.Rvar(2, 2)
    v_3 = Rvar.Rvar(3, 2)
    
    X = Factor.Factor([v_2, v_1])
    X.fill_values([0.5, 0.8, 0.1, 0., 0.3, 0.9])
    Y = Factor.Factor([v_3, v_2])
    Y.fill_values([0.5, 0.7, 0.1, 0.2])

    Z = FactorOperations.multiply(X, Y, False)
    sol = Factor.Factor([v_1, v_2, v_3])
    sol.fill_values([0.25, 0.05, 0.15, 0.08, 0, 0.09, 0.35, 0.07, 0.21, 0.16, 0, 0.18])
    assert np.allclose(Z.values, sol.values, atol=epsilon)
def test_complete_1():
    v1 = Rvar.Rvar(1, 3)
    v2 = Rvar.Rvar(2, 2)
    v3 = Rvar.Rvar(3, 2)
    v4 = Rvar.Rvar(4, 2)
    v5 = Rvar.Rvar(5, 3)
    v6 = Rvar.Rvar(6, 3)
    v7 = Rvar.Rvar(7, 2)
    v8 = Rvar.Rvar(8, 3)
        
    f1 = Factor.Factor([v1])
    f1.fill_values([1.0/3.0, 1.0/3.0, 1.0/3.0])
    f2 = Factor.Factor([v8, v2])
    f2.fill_values([0.9, 0.1, 0.5, 0.5, 0.1, 0.9])
    f3 = Factor.Factor([v3, v4, v7, v2])
    f3.fill_values([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.4, 0.6, 0.3, 0.7, 0.2, 0.8, 0.1, 0.9])
    f4 = Factor.Factor([v4])
    f4.fill_values([0.5, 0.5])
    f5 = Factor.Factor([v5, v6])
    f5.fill_values([0.75, 0.2, 0.05, 0.2, 0.6, 0.2, 0.05, 0.2, 0.75])
    f6 = Factor.Factor([v6])
    f6.fill_values([0.3333, 0.3333, 0.3333])
    f7 = Factor.Factor([v7, v5, v6])
    f7.fill_values([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.5, 0.5, 0.4, 0.6, 0.3, 0.7, 0.2, 0.8, 0.1, 0.9])
    f8 = Factor.Factor([v8, v4, v1])
    f8.fill_values([0.1, 0.3, 0.6, 0.05, 0.2,0.75, 0.2, 0.5, 0.3, 0.1, 0.35, 0.55, 0.8, 0.15, 0.05, 0.2, 0.6, 0.2])

    factors = [f2, f3, f4, f5, f6, f7, f8]
    a = f1
    for f in factors:
        a = FactorOperations.multiply(a, f)
    rvars = [v2,v3,v4,v5,v6,v7,v8]

    for v in rvars:
        a = FactorOperations.marginalize(a, v)
    
    assert np.allclose(a.values, [0.37414966, 0.30272109, 0.32312925])
def MAP_Word(word):
    chars = len(word['gT'])
    vall = [None]*chars
    for i in range(chars):
        vall[i] = Rvar.Rvar(i, 26)
    f = []
    for i in range(chars):
        f.append(singletonFactor(vall[i], word['img'][i]))
    for i in range(chars-1):
        f.append(pairwiseFactor(vall[i], vall[i+1]))
    for i in range(chars-2):
        f.append(tripletFactor(vall[i], vall[i+1], vall[i+2]))

    # choose the top two similar images
    ss = []
    for i in range(chars):
        for j in range(i+1, chars):
            ss.append([vall[i], vall[j], similarity(word['img'][i], word['img'][j])])
    ss = sorted(ss, key = lambda x: x[2])
    top1 = ss.pop()
    print top1
    f.append(image_simil_factor(top1[0], top1[1], top1[2]))
    top2 = ss.pop()
    print top2
    f.append(image_simil_factor(top2[0], top2[1], top2[2]))
    #alist.sort(key=lambda x: x.foo)
    #f1.append(image_simil_factor(vall[i], vall[j], word['img'][i], word['img'][j]))
    print '---', len(f)
    cc = CliqueTree.CliqueTree(f)
    print cc
    cc.calibrate(isMax=True)
    
    # BEWARE I AM assuming that I get exact unambiguous marginals
    # which in the generality of problems does not have to happen
    # that is why the checking at bottom is important
    sol = []
    for vari in vall:
        for beta in cc.beta:
            if vari in beta.variables:
                fu = copy.copy(beta)
                for g in (set(beta.variables) - set([vari])):
                    fu = FactorOperations.max_marginalize(fu, g)
                maxi = np.max(fu.values)
                sol.append(list(fu.values).index(maxi))
                break
    return sol
Exemple #12
0
 def initializePotentials(self, listOfFactors):
     # create factors initialized to ones
     self.factors = [None]*self.V
     for i in range(self.V):
         fu = Factor.Factor(sorted(list(self.box[i])))
         fu.values = np.ones(fu.cards)
         self.factors[i] = fu
     
     # ... and now brutishly (FIFO) we assign the factors
     for fu in listOfFactors:
         notUsed = True
         for i,n in enumerate(self.box):
             if n.issuperset(set(fu.variables)):
                 self.factors[i] = FactorOperations.multiply(self.factors[i], fu, False)
                 notUsed = False
                 break  # to use only once
         if notUsed:
             raise NameError('factor not used in any clique!', fu.variables)
     pass
Exemple #13
0
    def __init__ (self, listOfFactors):  
        F = FactorGraph(listOfFactors)
        
        # create nodes iteratively through var elimination
        C = {'nodes':[], 'edges':np.zeros((0,0))}
        considered_cliques = 0
        while considered_cliques < len(F.allVars):
            z = F.firstMinNeighborVar()
            [F,C] = self.eliminateVar(F, C, z)
            considered_cliques += 1
        
        self.nodes = [set(n['vars']) for n in C['nodes']]
        self.edges = C['edges']

        # prune tree
        keepPruning = True
        while keepPruning:
            keepPruning = self.pruneNode()

        # initialize potentials first to all ones
        self.factors = []
        for i in range(len(self.nodes)):
            fu = Factor.Factor(sorted(list(self.nodes[i])))
            #fu.fill_values(np.ones(np.product(fu.cards)))
            fu.values = np.ones(fu.cards)
            self.factors += [fu]
        # ... and now brutishly (FIFO) we assign the factors
        for fu in listOfFactors:
            notUsed = True
            for i,n in enumerate(self.nodes):
                if set(fu.variables) <= n:
                    self.factors[i] = FactorOperations.multiply(self.factors[i], fu, False)
                    notUsed = False
                    break
            if notUsed:
                raise NameError('factor not used in any clique!', fu.variables)
Exemple #14
0
## BUILDING THE NETWORK ##

GN = geneticNetwork(family_tree, frequency_of_alleles_in_general_population, probability_of_trait_based_on_genotype)

# Evidence conditioning

#modify_Factor_by_evidence('Ira',   'pheno', TRAIT_PRESENT)
modify_Factor_by_evidence('James', 'geno', Ff)
modify_Factor_by_evidence('Rene',  'geno', FF)

# lets try first the whole kahuna CPD and compute the prob of developing CF

a =build_joint_cpd()

# marginalizing
target = GN['Benito']['var_pheno']

lista = [x for x in a.variables if x!=target]

for v in lista:
    a = FactorOperations.marginalize(a, v)
    #print a.variables, a.values.size

print 'probability of Benito showing ailment', 100.*a.values[0], '%'






Exemple #15
0
#msg_10_1 = FactorOperations.marginalize(msg_10_1, msg_10_1.variables[1])
#msg_10_1 = FactorOperations.marginalize(msg_10_1, msg_10_1.variables[1])
#print '---->', msg_10_1.variables

#belief_1 = FactorOperations.multiply(msg_10_1, cc.factors[1], True)

#print belief_1.variables
#sol = FactorOperations.marginalize(belief_1, belief_1.variables[0])

#print 'benito prob of having illnes is now :', sol.values[0]

# COMPUTE ALL EXACT MARGINALS (of showing the sickness for all)
import copy

# for fun lets reduce some evidence
cc.factors[3] = FactorOperations.observe(cc.factors[3], {v[15]:0}) # Ira shows pheno
cc.factors[6] = FactorOperations.observe(cc.factors[6], {v[0]:0})  # rene has gen FF
cc.factors[4] = FactorOperations.observe(cc.factors[4], {v[12]:1}) # James has gen Ff
cc.calibrate()

phenos_nodes = [0,1,2,3,4,5,6,7,8]
probs = {}
for i in phenos_nodes:
	belief = cc.beta[i]
	genes = [v for v in belief.variables if not v.id.endswith("_p")]
	f = copy.copy(belief)
	f = FactorOperations.marginalize(f, genes[0])
	probs[f.variables[0].id] = f.values[0]
print probs

def test_conditioning():
    evidence = {v[2]:0, v[3]:1}
    assert np.allclose(FactorOperations.observe(f['a'], evidence).values, f['a'].values)
    assert np.allclose(FactorOperations.observe(f['b'], evidence, False).values, [[0.59, 0.22], [0., 0.]])
    assert np.allclose(FactorOperations.observe(f['c'], evidence, False).values, [[0., 0.], [0.61, 0.]])
    assert np.allclose(FactorOperations.observe(f['z'], {v[3]:0}, False).values, [[[0.25, 0.05, 0.15], [0.08, 0., 0.09]], [[0.,0.,0.],[0.,0.,0.]]])
Exemple #17
0
def modify_Factor_by_evidence(name, node, ass):
    factor = GN[name]['factor_'+node]
    randvar = GN[name]['var_'+node]
    GN[name]['factor_'+node] = FactorOperations.observe(factor, {randvar:ass})
def test_multiply_values():
    c = FactorOperations.multiply(f['a'], f['b'])
    sol = Factor.Factor(c.variables)
    sol.fill_values([0.0649, 0.1958, 0.0451, 0.6942])
    assert np.allclose(c.values, sol.values, atol=epsilon)
        F.append(f1)
        F.append(f2)
        F.append(f3)
    return [F,v]



[F, v] = geneticNetwork(family_tree, frequency_of_alleles_in_general_population, probability_of_trait_based_on_genotype)
print F
cc = CliqueTree.CliqueTree(F)

for i,e in enumerate(v):
    print i, e
# for fun lets reduce some evidence
cc.factors[2] = FactorOperations.observe(cc.factors[2], {v[17]:0}) # Ira shows pheno
cc.factors[5] = FactorOperations.observe(cc.factors[5], {v[6]:0})  # rene has gen1 F
cc.factors[5] = FactorOperations.observe(cc.factors[5], {v[7]:1})  # rene has gen2 f
cc.factors[1] = FactorOperations.observe(cc.factors[1], {v[5]:0}) # Eva shows pheno

cc.calibrate()
print cc

phenos_nodes = [0,1,2,3,4,5,6]
probs = {}
for i in phenos_nodes:
    belief = cc.beta[i]
    genes = [v1 for v1 in belief.variables if not v1.id.endswith("_p")]
    f = copy.copy(belief)
    print genes
    for g in genes: