def test_complete_2(): v1 = Rvar.Rvar(1, 2) v2 = Rvar.Rvar(2, 2) v3 = Rvar.Rvar(3, 2) v4 = Rvar.Rvar(4, 3) v5 = Rvar.Rvar(5, 2) d = Factor.Factor([v1]) d.fill_values([0.6, 0.4]) i = Factor.Factor([v2]) i.fill_values([0.7, 0.3]); s = Factor.Factor([v3, v2]) s.fill_values([0.95, 0.05, 0.2, 0.8]); g = Factor.Factor([v4, v1, v2]) g.fill_values([0.3, 0.4, 0.3, 0.05, 0.25, 0.7, 0.9, 0.08, 0.02, 0.5, 0.3, 0.2]); l = Factor.Factor([v5, v4]) l.fill_values([0.1, 0.9, 0.4, 0.6, 0.99, 0.01]); s = FactorOperations.observe(s, {v3:1}) # we observe high SAT factors = [i, s, g, l] a = d for f in factors: a = FactorOperations.multiply(a, f) print a.variables, a.values.size rvars = [v1, v3, v4, v5] for v in rvars: a = FactorOperations.marginalize(a, v) print a.variables, a.values.size assert np.allclose(a.values, [0.12727273, 0.87272727])
def mssg(self, from_v, to_w, isMax=False): # collect all mssg arriving at v mess = [] neighbors = self.adj[from_v] for n in neighbors: if n!=to_w: pos = self.adj[n].index(from_v) msg = self.delta[n][pos] mess.append(msg) # take the the initial Psi (and log if needed) d = copy.copy(self.factors[from_v]) if isMax==True: d.values = np.log(d.values) # multiply/sum by incoming messages for ms in mess: if isMax==True: d = FactorOperations.sum(d, ms, False) else: d = FactorOperations.multiply(d, ms, True) # marginalized to setsep vars for n in d.variables: if n not in (self.box[from_v] & self.box[to_w]): if isMax==True: d = FactorOperations.max_marginalize(d, n) else: d = FactorOperations.marginalize(d, n) return d
def test_multiply_dimensions(): c = FactorOperations.multiply(f['a'], f['b']) assert c.variables == [v[1], v[2]] assert c.cards == [2, 2] c = FactorOperations.multiply(f['x'], f['y']) assert c.variables == [v[2], v[3], v[4]] assert c.cards == [2, 2, 3]
def test_marginalize(): c = FactorOperations.marginalize(f['b'], v[2]) assert np.allclose(c.values, [1., 1.], atol=epsilon) c = FactorOperations.marginalize(f['z'], v[2]) sol = Factor.Factor([v[4], v[3]]) sol.fill_values([0.33, 0.05, 0.24, 0.51, 0.07, 0.39]) assert np.allclose(c.values, sol.values.T, atol=epsilon)
def build_joint_cpd(): a = None for k in GN.keys(): b = FactorOperations.multiply(GN[k]['factor_geno'], GN[k]['factor_pheno']) if a==None: a = b else: a = FactorOperations.multiply(a, b) return a
def eliminateVar(self, F, C, z): # separate factors into two lists, those that use z (F_Cluster) and the rest F_Cluster, F_Rest, Cluster_vars = [], [], [] for f in F.factors: if z in f.variables: F_Cluster += [f] Cluster_vars += f.variables else: F_Rest += [f] if F_Cluster!=[]: Cluster_vars = tuple(sorted(set(Cluster_vars))) # when computing tau of new node, check if it uses other nodes' taus rows,cols = C['edges'].shape C['edges'] = np.vstack([C['edges'], np.zeros((1,cols))]) C['edges'] = np.hstack([C['edges'], np.zeros((rows+1,1))]) pos = np.zeros(cols+1) for n,node in enumerate(C['nodes']): if node['tau'] in F_Cluster: pos[n]=1 # create a new array of connecting node edges based on taus in common C['edges'][-1,:] = pos C['edges'][:,-1] = pos # multiply the factors in Cluster... (lambda) ...and marginalize by z (tau) tau = F_Cluster.pop(0) for f in F_Cluster: tau = FactorOperations.multiply(tau, f) if tau.variables != [z]: tau = FactorOperations.marginalize(tau, z) # add to unused factor list the resulting tau ==> new factor list with var eliminated F_Rest += [tau] # update the edges (connect all vars inside new cluster, & disconnect the eliminated variable) for vi in Cluster_vars: for vj in Cluster_vars: F.edges[F.allVars.index(vi), F.allVars.index(vj)] = 1 F.edges[F.allVars.index(z),:] = 0 F.edges[:, F.allVars.index(z)] = 0 C['nodes'] += [{'vars':Cluster_vars, 'tau':tau}] F.factors = F_Rest return [F, C]
def calibrate(self, isMax=False): self.beta = [None]*self.V # compute messages for e in self.computePath(): from_v, to_w = e pos_to = self.adj[from_v].index(to_w) self.delta[from_v][pos_to] = self.mssg(from_v, to_w, isMax) # compute the beliefs for v in range(self.V): belief = copy.copy(self.factors[v]) if isMax==True: belief.values = np.log(belief.values) for w in self.adj[v]: pos = self.adj[w].index(v) delta = self.delta[w][pos] if isMax==True: belief = FactorOperations.sum(belief, delta, False) else: belief = FactorOperations.multiply(belief, delta, False) self.beta[v] = belief
def eliminateVar(self, F, z): # separate factors into two lists, those that use z (F_Cluster) and the rest F_Cluster, F_Rest, Cluster_vars = [], [], [] for f in F.factors: if z in f.variables: F_Cluster += [f] Cluster_vars += f.variables else: F_Rest += [f] if F_Cluster!=[]: # add a node to clique tree with the variables involved position = self.V self.V += 1 self.box.insert(position, set(Cluster_vars)) self.adj.insert(position, []) # when computing tau of new node, check if it uses other nodes' taus and connect for i in range(position): if self.tau[i] in F_Cluster: self.addEdge(i, position) # multiply the factors in Cluster... (lambda) ...and marginalize by z (tau) tau = F_Cluster.pop(0) for f in F_Cluster: tau = FactorOperations.multiply(tau, f, False) if tau.variables != [z]: tau = FactorOperations.marginalize(tau, z) self.tau.insert(position, tau) # update the edges of F (connect all vars inside new factor, & disconnect the eliminated variable) F.connectAll([F.index_var(v) for v in self.box[position]]) F.adj[F.index_var(z)] = [] # add to unused factor list the resulting tau ==> new factor list with var eliminated F_Rest += [tau] F.factors = F_Rest return F
def test_multiply_values2(): v_1 = Rvar.Rvar(1, 3) v_2 = Rvar.Rvar(2, 2) v_3 = Rvar.Rvar(3, 2) X = Factor.Factor([v_2, v_1]) X.fill_values([0.5, 0.8, 0.1, 0., 0.3, 0.9]) Y = Factor.Factor([v_3, v_2]) Y.fill_values([0.5, 0.7, 0.1, 0.2]) Z = FactorOperations.multiply(X, Y, False) sol = Factor.Factor([v_1, v_2, v_3]) sol.fill_values([0.25, 0.05, 0.15, 0.08, 0, 0.09, 0.35, 0.07, 0.21, 0.16, 0, 0.18]) assert np.allclose(Z.values, sol.values, atol=epsilon)
def test_complete_1(): v1 = Rvar.Rvar(1, 3) v2 = Rvar.Rvar(2, 2) v3 = Rvar.Rvar(3, 2) v4 = Rvar.Rvar(4, 2) v5 = Rvar.Rvar(5, 3) v6 = Rvar.Rvar(6, 3) v7 = Rvar.Rvar(7, 2) v8 = Rvar.Rvar(8, 3) f1 = Factor.Factor([v1]) f1.fill_values([1.0/3.0, 1.0/3.0, 1.0/3.0]) f2 = Factor.Factor([v8, v2]) f2.fill_values([0.9, 0.1, 0.5, 0.5, 0.1, 0.9]) f3 = Factor.Factor([v3, v4, v7, v2]) f3.fill_values([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.4, 0.6, 0.3, 0.7, 0.2, 0.8, 0.1, 0.9]) f4 = Factor.Factor([v4]) f4.fill_values([0.5, 0.5]) f5 = Factor.Factor([v5, v6]) f5.fill_values([0.75, 0.2, 0.05, 0.2, 0.6, 0.2, 0.05, 0.2, 0.75]) f6 = Factor.Factor([v6]) f6.fill_values([0.3333, 0.3333, 0.3333]) f7 = Factor.Factor([v7, v5, v6]) f7.fill_values([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.5, 0.5, 0.4, 0.6, 0.3, 0.7, 0.2, 0.8, 0.1, 0.9]) f8 = Factor.Factor([v8, v4, v1]) f8.fill_values([0.1, 0.3, 0.6, 0.05, 0.2,0.75, 0.2, 0.5, 0.3, 0.1, 0.35, 0.55, 0.8, 0.15, 0.05, 0.2, 0.6, 0.2]) factors = [f2, f3, f4, f5, f6, f7, f8] a = f1 for f in factors: a = FactorOperations.multiply(a, f) rvars = [v2,v3,v4,v5,v6,v7,v8] for v in rvars: a = FactorOperations.marginalize(a, v) assert np.allclose(a.values, [0.37414966, 0.30272109, 0.32312925])
def MAP_Word(word): chars = len(word['gT']) vall = [None]*chars for i in range(chars): vall[i] = Rvar.Rvar(i, 26) f = [] for i in range(chars): f.append(singletonFactor(vall[i], word['img'][i])) for i in range(chars-1): f.append(pairwiseFactor(vall[i], vall[i+1])) for i in range(chars-2): f.append(tripletFactor(vall[i], vall[i+1], vall[i+2])) # choose the top two similar images ss = [] for i in range(chars): for j in range(i+1, chars): ss.append([vall[i], vall[j], similarity(word['img'][i], word['img'][j])]) ss = sorted(ss, key = lambda x: x[2]) top1 = ss.pop() print top1 f.append(image_simil_factor(top1[0], top1[1], top1[2])) top2 = ss.pop() print top2 f.append(image_simil_factor(top2[0], top2[1], top2[2])) #alist.sort(key=lambda x: x.foo) #f1.append(image_simil_factor(vall[i], vall[j], word['img'][i], word['img'][j])) print '---', len(f) cc = CliqueTree.CliqueTree(f) print cc cc.calibrate(isMax=True) # BEWARE I AM assuming that I get exact unambiguous marginals # which in the generality of problems does not have to happen # that is why the checking at bottom is important sol = [] for vari in vall: for beta in cc.beta: if vari in beta.variables: fu = copy.copy(beta) for g in (set(beta.variables) - set([vari])): fu = FactorOperations.max_marginalize(fu, g) maxi = np.max(fu.values) sol.append(list(fu.values).index(maxi)) break return sol
def initializePotentials(self, listOfFactors): # create factors initialized to ones self.factors = [None]*self.V for i in range(self.V): fu = Factor.Factor(sorted(list(self.box[i]))) fu.values = np.ones(fu.cards) self.factors[i] = fu # ... and now brutishly (FIFO) we assign the factors for fu in listOfFactors: notUsed = True for i,n in enumerate(self.box): if n.issuperset(set(fu.variables)): self.factors[i] = FactorOperations.multiply(self.factors[i], fu, False) notUsed = False break # to use only once if notUsed: raise NameError('factor not used in any clique!', fu.variables) pass
def __init__ (self, listOfFactors): F = FactorGraph(listOfFactors) # create nodes iteratively through var elimination C = {'nodes':[], 'edges':np.zeros((0,0))} considered_cliques = 0 while considered_cliques < len(F.allVars): z = F.firstMinNeighborVar() [F,C] = self.eliminateVar(F, C, z) considered_cliques += 1 self.nodes = [set(n['vars']) for n in C['nodes']] self.edges = C['edges'] # prune tree keepPruning = True while keepPruning: keepPruning = self.pruneNode() # initialize potentials first to all ones self.factors = [] for i in range(len(self.nodes)): fu = Factor.Factor(sorted(list(self.nodes[i]))) #fu.fill_values(np.ones(np.product(fu.cards))) fu.values = np.ones(fu.cards) self.factors += [fu] # ... and now brutishly (FIFO) we assign the factors for fu in listOfFactors: notUsed = True for i,n in enumerate(self.nodes): if set(fu.variables) <= n: self.factors[i] = FactorOperations.multiply(self.factors[i], fu, False) notUsed = False break if notUsed: raise NameError('factor not used in any clique!', fu.variables)
## BUILDING THE NETWORK ## GN = geneticNetwork(family_tree, frequency_of_alleles_in_general_population, probability_of_trait_based_on_genotype) # Evidence conditioning #modify_Factor_by_evidence('Ira', 'pheno', TRAIT_PRESENT) modify_Factor_by_evidence('James', 'geno', Ff) modify_Factor_by_evidence('Rene', 'geno', FF) # lets try first the whole kahuna CPD and compute the prob of developing CF a =build_joint_cpd() # marginalizing target = GN['Benito']['var_pheno'] lista = [x for x in a.variables if x!=target] for v in lista: a = FactorOperations.marginalize(a, v) #print a.variables, a.values.size print 'probability of Benito showing ailment', 100.*a.values[0], '%'
#msg_10_1 = FactorOperations.marginalize(msg_10_1, msg_10_1.variables[1]) #msg_10_1 = FactorOperations.marginalize(msg_10_1, msg_10_1.variables[1]) #print '---->', msg_10_1.variables #belief_1 = FactorOperations.multiply(msg_10_1, cc.factors[1], True) #print belief_1.variables #sol = FactorOperations.marginalize(belief_1, belief_1.variables[0]) #print 'benito prob of having illnes is now :', sol.values[0] # COMPUTE ALL EXACT MARGINALS (of showing the sickness for all) import copy # for fun lets reduce some evidence cc.factors[3] = FactorOperations.observe(cc.factors[3], {v[15]:0}) # Ira shows pheno cc.factors[6] = FactorOperations.observe(cc.factors[6], {v[0]:0}) # rene has gen FF cc.factors[4] = FactorOperations.observe(cc.factors[4], {v[12]:1}) # James has gen Ff cc.calibrate() phenos_nodes = [0,1,2,3,4,5,6,7,8] probs = {} for i in phenos_nodes: belief = cc.beta[i] genes = [v for v in belief.variables if not v.id.endswith("_p")] f = copy.copy(belief) f = FactorOperations.marginalize(f, genes[0]) probs[f.variables[0].id] = f.values[0] print probs
def test_conditioning(): evidence = {v[2]:0, v[3]:1} assert np.allclose(FactorOperations.observe(f['a'], evidence).values, f['a'].values) assert np.allclose(FactorOperations.observe(f['b'], evidence, False).values, [[0.59, 0.22], [0., 0.]]) assert np.allclose(FactorOperations.observe(f['c'], evidence, False).values, [[0., 0.], [0.61, 0.]]) assert np.allclose(FactorOperations.observe(f['z'], {v[3]:0}, False).values, [[[0.25, 0.05, 0.15], [0.08, 0., 0.09]], [[0.,0.,0.],[0.,0.,0.]]])
def modify_Factor_by_evidence(name, node, ass): factor = GN[name]['factor_'+node] randvar = GN[name]['var_'+node] GN[name]['factor_'+node] = FactorOperations.observe(factor, {randvar:ass})
def test_multiply_values(): c = FactorOperations.multiply(f['a'], f['b']) sol = Factor.Factor(c.variables) sol.fill_values([0.0649, 0.1958, 0.0451, 0.6942]) assert np.allclose(c.values, sol.values, atol=epsilon)
F.append(f1) F.append(f2) F.append(f3) return [F,v] [F, v] = geneticNetwork(family_tree, frequency_of_alleles_in_general_population, probability_of_trait_based_on_genotype) print F cc = CliqueTree.CliqueTree(F) for i,e in enumerate(v): print i, e # for fun lets reduce some evidence cc.factors[2] = FactorOperations.observe(cc.factors[2], {v[17]:0}) # Ira shows pheno cc.factors[5] = FactorOperations.observe(cc.factors[5], {v[6]:0}) # rene has gen1 F cc.factors[5] = FactorOperations.observe(cc.factors[5], {v[7]:1}) # rene has gen2 f cc.factors[1] = FactorOperations.observe(cc.factors[1], {v[5]:0}) # Eva shows pheno cc.calibrate() print cc phenos_nodes = [0,1,2,3,4,5,6] probs = {} for i in phenos_nodes: belief = cc.beta[i] genes = [v1 for v1 in belief.variables if not v1.id.endswith("_p")] f = copy.copy(belief) print genes for g in genes: