def __init__(self, alphaList, numAlleles, geneCopyVarOne, geneCopyVarTwo, phenotypeVar): self.numalleles = numAlleles self.alphaList = alphaList self.phenotypeFactor = Factor( [phenotypeVar, geneCopyVarOne, geneCopyVarTwo], [], [], 'phenotype| geneCopy1, geneCopy2') ngenos = len(alphaList) self.phenotypeFactor.setCard([2, numAlleles, numAlleles]) #phenotypeFactor.val = zeros(1, prod(phenotypeFactor.card)); values = np.zeros( (1, np.prod(self.phenotypeFactor.getCard()))).flatten().tolist() affectedAlphas = alphaList unaffectedAlphas = [1 - alpha for alpha in alphaList] (allelesToGenotypes, genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles) assignments = IndexToAssignment( np.arange(np.prod(self.phenotypeFactor.getCard())), self.phenotypeFactor.getCard()) - 1 for z in range(np.prod(self.phenotypeFactor.getCard())): curr_assign = assignments[z] curr_assign = assignments[z] genotype_num = allelesToGenotypes[curr_assign[1], curr_assign[2]] if curr_assign[0] == 0: values[z] = affectedAlphas[genotype_num] else: values[z] = unaffectedAlphas[genotype_num] self.phenotypeFactor.setVal(values)
def multiply(factor1, factor2): largeFactor = factor1 if factor1.array.ndim >= factor2.array.ndim else factor2 smallFactor = factor1 if factor1.array.ndim < factor2.array.ndim else factor2 variableListFactor1 = factor1.variables coordList1 = [1] * 5 for var in variableListFactor1: index = getVariableIndex(var) if (index == 1): coordList1[index] = 3 else: coordList1[index] = 2 coordTuple1 = tuple(coordList1) factor1Temp = factor1.array.reshape(coordTuple1) variableListFactor2 = factor2.variables coordList2 = [1] * 5 for var in variableListFactor2: index = getVariableIndex(var) if (index == 1): coordList2[index] = 3 else: coordList2[index] = 2 coordTuple2 = tuple(coordList2) factor2Temp = factor2.array.reshape(coordTuple2) soln = np.squeeze(factor1Temp * factor2Temp) variables = largeFactor.variables + list( set(smallFactor.variables) - set(largeFactor.variables)) return Factor(variables, soln)
def __init__(self, allelefreqs, genotypeVar, name): self.allelefreq = allelefreqs #number of alleles == number of allele frequencies passed in numAlleles = len(allelefreqs) self.allelesToGenotypes = None self.genotypesToAlleles = None self.genotypeFactor = None #map alleles to genotypes and genotyeps to alleles (self.allelesToGenotypes, self.genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles) (ngenos, ploidy) = np.shape(self.genotypesToAlleles) self.genotypeFactor = Factor([genotypeVar], [], [], name) #the cardinality of the factor is the number of genotypes self.genotypeFactor.setCard([ngenos]) #set the values to zero initially values = np.zeros((np.prod(self.genotypeFactor.getCard()))).tolist() for i in range(ngenos): alleles = self.genotypesToAlleles[i, :].tolist() if alleles[0] == alleles[1]: values[i] = np.prod([allelefreqs[j] for j in alleles]) else: values[i] = np.prod([allelefreqs[j] for j in alleles]) * 2 self.genotypeFactor.setVal(values)
def FactorMarginalization(A, V): """ FactorMarginalization Sums given variables out of a factor. B = FactorMarginalization(A,V) computes the factor with the variables in V summed out. The factor data structure has the following fields: .var Vector of variables in the factor, e.g. [1 2 3] .card Vector of cardinalities corresponding to .var, e.g. [2 2 2] .val Value table of size prod(.card) The resultant factor should have at least one variable remaining or this function will throw an error. See also FactorProduct, IndexToAssignment , and AssignmentToIndex Based on matlab code found here: https://github.com/indapa/PGM/blob/master/Prog1/FactorMarginalization.m """ #the resulting factor after marginalizing out variables in python list V that are in #the factor A B = Factor() #check for empy factor or variable list if len(A.getVar()) == 0 or len(V) == 0: return A #construct the variables of the marginalized factor by #computing the set difference between A.var and V #These variables in the difference set will be the scope of the new factor setA = set(A.getVar()) setV = set(V) Bvar = np.array(list(setA.difference(setV))) mapB = isMember(Bvar, A.getVar( )) #indices of the variables of the new factor in the original factor A #print mapB, Bvar #check to see if the new factor has empty scope if len(Bvar) == 0: sys.stderr.write( "FactorMarginalization:Error, resultant factor has empty scope...\n" ) return None #set the marginalized factor's variable scope and cardinality B.setVar(Bvar.tolist()) B.setCard(A.getCard()[mapB]) B.setVal(np.zeros(np.prod(B.getCard())).tolist()) #compute some helper indices assignments = IndexToAssignment(np.arange(np.prod(A.getCard())), A.getCard()) #indxB tells which values in A to sum together when marginalizing out the variable(s) in B indxB = AssignmentToIndex(assignments[:, mapB], B.getCard()) - 1 #accum is a numpy implementation of matlab accumarray #accumarray sums data in each group #here the group(s) are defined in indxB #indxB is a map to tell which value in A.val to map the sum to #see http://blogs.mathworks.com/loren/2008/02/20/under-appreciated-accumarray/ marginal_vals = accum(indxB, A.getVal()) #set the marginal values to the new factor with teh variable(s) in V summed(marginalized) out B.setVal(marginal_vals.tolist()) return B
def ComputeMarginal(V, F, E): """ ComputeMarginal Computes the marginal over a set of given variables M = ComputeMarginal(V, F, E) computes the marginal over variables V in the distribution induced by the set of factors F, given evidence E M is a factor containing the marginal over variables V V is a vector containing the variables in the marginal e.g. [1 2 3] for X_1, X_2 and X_3. i.e. a result of FactorMarginalization F is a vector of factors (struct array) containing the factors defining the distribution E is an N-by-2 matrix, each row being a variable/value pair. Variables are in the first column and values are in the second column. If there is no evidence, pass in the empty matrix [] for E. """ totalFactors = len(F) #reshape a 1d array to 1 x ncol array #since ObserveEvidence requires Nx2 array, we reshape to a 2 column array #see http://stackoverflow.com/a/12576163 for reshaping 1d array to 2d array EVIDENCE = np.reshape(np.array(E), (-1, 2)) #print np.shape(EVIDENCE) if totalFactors == 0: sys.stderr.write("empty factor list given as input.\n") return Factor([], [], []) # union of all variables in list of factors F variableList = [ ] # a list of of lists, where each element is a list containing the variables of the factor in F for factor in F: var = factor.getVar().tolist() variableList.append(var) #get the union of variables across all the factor in F #see this http://stackoverflow.com/a/2151553, Pythonic Way to Create Union of All Values Contained in Multiple Lists union_variables = set().union(*variableList) #print union_variables #v contains the variables not in the list of variables in the marginal v = list(union_variables.difference(V)) # compute the joint distribution, but then reduce it, given the evidence # ComputeJointDistribution returns a factor, but ObserveEvidence expects a list # of factors as the first argument, so hence the need for brackets [ ] # ObserveEvidence returns a list, but we want the first element so thats why the [0] jointE = ObserveEvidence([ComputeJointDistribution(F)], EVIDENCE)[0] #now we need to re-normaize the joint, since observe evidence doesn't do it for us jointE_normalizedVal = jointE.getVal() / np.sum(jointE.getVal()) jointE.setVal(jointE_normalizedVal.tolist()) return FactorMarginalization(jointE, v)
def __init__(self, numAlleles, genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo, name): self.genotypeFactor = Factor( [genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo], [], [], name) #map alleles to genotypes and genotyeps to alleles (self.allelesToGenotypes, self.genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles) (ngenos, ploidy) = np.shape(self.genotypesToAlleles) self.genotypeFactor.setCard([ngenos, ngenos, ngenos]) #set the values to zero initially values = np.zeros((np.prod(self.genotypeFactor.getCard()))).tolist() #iterate thru variable assignments to random variables #assign probablities based on Punnet square crosses assignments = IndexToAssignment( np.arange(np.prod(self.genotypeFactor.getCard())), self.genotypeFactor.getCard()) - 1 for z in range(np.prod(self.genotypeFactor.getCard())): curr_assign = assignments[z] childAssignment = int(curr_assign[0]) parent1gametes = self.genotypesToAlleles[curr_assign[1], :] parent2gametes = self.genotypesToAlleles[curr_assign[2], :] #print 'parental gametes: ', parent1gametes, parent2gametes #print 'child assignment: ', childAssignment #list of tuples containing list of zygote(genotype) tuples zygote_list = list( itertools.product(parent1gametes, parent2gametes)) punnet_freq = [ self.allelesToGenotypes[zygote[0], zygote[1]] for zygote in zygote_list ] histc = {} hist = [] for g in range(ngenos): histc[g] = 0. for x in punnet_freq: histc[x] += 1. #print histc.values() for g in range(ngenos): hist.append(histc[g]) #print punnet_freq hist = (np.array(hist)) / 4 #print 'hist:', hist #print zygote_list values[z] = hist[childAssignment] self.genotypeFactor.setVal(values)
def from_xml_file(self, filename): doc = minidom.parse(filename) rootnode = doc.getElementsByTagName("root")[0] for node in rootnode.childNodes: if node.nodeType == node.TEXT_NODE: continue if node.nodeName == "instruction": self.instruction = node.getAttribute("text") elif node.nodeName == "world": self.world = World() self.world.from_xml(node.toxml()) else: self.root = Factor() self.root.from_xml(node.toxml())
def main(): f = open('data/ASIA/asia.bif') BIF = f.readlines() BIF = BIFParser.fixWhiteSpace(BIF) BN = BIFParser.parseBIF(BIF) factors = [] for nodes in BN: if not nodes.isRoot(): tempArray = [nodes] tempArray.extend(nodes.getParents()) factors.append(Factor.Factor(nodes.getDist(), tempArray)) converged=False converNum=0 while not converged: prevConverNum = copy.deepcopy(converNum) converNum=0 for a in BN: for f in factors: if partOf(a,f): message = a.sendMarginal(f) f.receiveBelief(message, a) for f in factors: for a in BN: if partOf(a,f): message = f.sendBelief(a) a.receiveMarginal(message, f) for a in BN: a.updateMarginal() converNum += a.getMarginal()[a.getMarginal().keys()[0]] if (np.abs(converNum-prevConverNum) < .00001): converged=True g=open("results.txt","w") for a in BN: g.write(a.getName() + " ") print a.getMarginal() i=len(a.getMarginal().keys())-1 while(i >= 0): g.write(str(a.getMarginal()[a.getMarginal().keys()[i]]) + " ") i-=1 g.write("\n") g.close()
def ComputeJointDistribution(INPUTS): """ ComputeJointDistribution Computes the joint distribution defined by a set of given factors Joint = ComputeJointDistribution(INPUTS) computes the joint distribution defined by a set of given factors Joint is a factor that encapsulates the joint distribution given by INPUTS INPUTS is a vector of Factor objects containing the factors defining the distribution """ totalFactors = len(INPUTS) #check for empty list of INPUTS if totalFactors == 0: sys.stderr.write("Empty factor list given as input\n") return Factor([], [], []) else: # see http://docs.python.org/library/functions.html#reduce for description of Python reduce function return reduce(lambda x, y: FactorProduct(x, y), INPUTS)
def __init__(self, numAlleles, geneCopyVarChild, geneCopyHapOne, geneCopyHapTwo): self.numalleles = numAlleles self.hapone = geneCopyVarChild self.haptwo = geneCopyHapTwo #geneCopyFactor = struct('var', [], 'card', [], 'val', []); self.geneCopyFactor = Factor( [geneCopyVarChild, geneCopyHapOne, geneCopyHapTwo], [], [], 'child|hap1,hap2') self.geneCopyFactor.setCard( [self.numalleles, self.numalleles, self.numalleles]) values = np.zeros( np.prod([self.numalleles, self.numalleles, self.numalleles])).tolist() #this keeps track of what posiiton you are in the values list index = 0 #the number of iterations thru the nested for loops should be equal to numallels^3 for i in range(numAlleles): #iterate through alleles from #grand(paternal) haplotype for j in range(numAlleles): #iterate through alleles from #grand(maternal) haplotype for k in range(numAlleles): #iterate thru child alleles print i, j, k if j == k: #child has grandmotherhap if i == k: #grandfatherhap is the same values[index] = 1 else: values[index] = .5 elif i == k: #child has grandfather hap values[index] = .5 else: pass index += 1 #print values self.geneCopyFactor.setVal(values)
def __init__(self, isDominant, genotypeVar, phenotypeVar, name): #instantiate a Factor object phenotype = Factor([phenotypeVar, genotypeVar], [2, 3], [], name) phenotype.setVal(np.zeros(np.prod(phenotype.getCard())).tolist()) #this enumerates the values the factor can take # since there are 2x3 cardinality, 6 possible assignments assignments = IndexToAssignment( np.arange(np.prod(phenotype.getCard())), phenotype.getCard()) val = val = np.zeros(np.prod(phenotype.getCard())) (nrows, ncols) = np.shape(assignments) for i in range(np.prod([2, 3])): #if its dominant, if you have at least one copy, you have the phenotype (pheno, geno) = assignments[i] if isDominant == 1: if pheno == 1: #affected if geno == 1 or geno == 2: val[i] = 1 else: val[i] = 0 else: #uneffected if geno == 3: val[i] = 1 if isDominant == 0: if pheno == 1: if geno == 3: val[i] = 1 else: if geno == 1 or geno == 2: val[i] = 1 phenotype.setVal(val.tolist()) self.phenotype = phenotype
def __init__(self, alphaList, phenotypeVar, genotypeVar, name): self.phenotypeFactor = Factor([phenotypeVar, genotypeVar], [], [], name) self.alpha = np.array(alphaList) ngenotypes = len(alphaList) self.phenotypeFactor.setCard([2, ngenotypes]) values = [x for x in range(np.prod(self.phenotypeFactor.getCard()))] for i in range(len(alphaList)): values[i] = alphaList[i] values[i + 1] = 1 - alphaList[i] ctr = 0 alphas = 2 * len(alphaList) * [None] for i in range(len(alphaList)): alphas[ctr] = alphaList[i] ctr = ctr + 1 alphas[ctr] = 1 - alphaList[i] ctr = ctr + 1 values = alphas self.phenotypeFactor.setVal(values)
def FactorMaxMarginalization(A, V): """ computes the factor with the variables in V *maxed* out. The resulting factor will have all the variables in A minus those variables in V. This is quite similiar to FactorMarginalization, but rather then summing out variables in V we take the max. In the code, this translates passing np.max as the function to accum See section 13.2 in Koller and Friedman for more information""" B = Factor() #check for empy factor or variable list if len(A.getVar()) == 0 or len(V) == 0: return A Bvar = np.setdiff1d(A.getVar(), V) mapB = isMember(Bvar, A.getVar()) if len(Bvar) == 0: sys.stderr.write( "FactorMaxMarginalization: Error, resultant factor has empty scope...\n" ) return np.max(A.getVal()) #set the marginalized factor's variable scope and cardinality B.setVar(Bvar.tolist()) B.setCard(A.getCard()[mapB]) B.setVal(np.zeros(np.prod(B.getCard())).tolist()) #compute some helper indices assignments = IndexToAssignment(np.arange(np.prod(A.getCard())), A.getCard()) #indxB tells which values in A to sum together when marginalizing out the variable(s) in B indxB = AssignmentToIndex(assignments[:, mapB], B.getCard()) - 1 #here we pass in the function np.max #NumPy and Python are awesome max_vals = accum(indxB, A.getVal(), np.max) B.setVal(max_vals.tolist()) return B
def eliminateVar(self, Z, E, factorList): """ a variable elimination function based on https://github.com/indapa/PGM/blob/master/Prog4/EliminateVar.m Z is the variable to be eliminated. We base this code on the matlab file linked to above as well as the Sum-product VE pseudo code in Koller and Friedman page 298 E is a numpy 2d matrix representing adjacency matrix of variables It represents the induced VE graph Once a variable is eliminated, its edges are removed from E """ useFactors = [] #the index of the factor that contains the variable Z scope = [] #print 'Z: ', Z #get a list containining the index in self.factorLlist of factors #that contain the variable Z to be eliminated # get the scope of variables from the factors that contain variable Z for i in range(len(factorList)): if Z in factorList[i].getVar().tolist(): useFactors.append( i ) #the ith factor is being currently involved in elimination scope = list( set.union(set(scope), factorList[i].getVar().tolist())) # update edge map """ These represent the induced edges for the VE graph. once the variable Z is eliminated, its edges are removed from the graph but in the process of elimination, we create a new factor. This introduces fill edges (see pg. 307 Koller and Friedman) Z is one based, but the indices in E are zero based, hence Z-1 also the variable names in scope are 1 based, so we subtract 1 when updating the induced VE graph """ for i in range(len(scope)): for j in range(len(scope)): if i != j: E[scope[i] - 1, scope[j] - 1] = 1 E[scope[j] - 1, scope[i] - 1] = 1 E[Z - 1, :] = 0 E[:, Z - 1] = 0 #G=nx.from_numpy_matrix(E) #print 'induced graph edges:\n', (G.edges()) #nx.draw_shell(G) #plt.show() #these are the indices of factorList which are not involved in VE unusedFactors = list( set.difference(set(range(len(factorList))), set(useFactors))) newF = None #check first if there are any unused factors left! if len(unusedFactors) > 0: newF = len(unusedFactors) * [None] newmap = np.zeros(max(unusedFactors) + 1, dtype=int).tolist() #newF is a new factor list, we populate it first #with the unused factors #newmap is maps the new location of ith unusedFactor for i in range(len(unusedFactors)): newF[i] = factorList[unusedFactors[i]] newmap[unusedFactors[i]] = i #print 'newmap ', newmap,"\n" #print 'length of newmap: ', len(newmap), "\n" newFactor = Factor([], [], [], 'newFactor') #we multiple in all the factors that contain the variable Z for i in range(len(useFactors)): newFactor = FactorProduct(newFactor, factorList[useFactors[i]]) #then we marginalize Z out and obtain a new factor #then append it the end of newF, the new factor list newFactor = FactorMarginalization(newFactor, [Z]) #print 'newFactor: ',newFactor #newF(length(nonUseFactors)+1) = newFactor; if newFactor != None: newF.append(newFactor) if newF != None: factorList = newF #return E ######################################################################## """ the remaining code builds the edges of the clique tree """ """ add new node with the factors that contain the variable Z adding a new node represents new clique. The scope of every factor generated during the variable elimination process is a clique pg. 309 Koller & Friedman """ self.nodeList.append(scope) #newC is the total number of nodes in the clique tree newC = len(self.nodeList) #print 'newC: ', newC #factorInds are individual factors with one variable ... I think self.factorInds.append(len(unusedFactors) + 1) #print 'range( newC -1) ', range( newC-1 ) #print 'factorInds: ', self.factorInds #print 'useFactors: ', useFactors #pdb.set_trace() """ we update the edges of the clique tree """ for i in range(newC - 1): #if self.factorInds [ i ] -1 in useFactors: #there was the off by onoe erorr - the values in factorInds #were one-based, need to subtract 1 if self.factorInds[i] - 1 in useFactors: self.edges[i, newC - 1] = 1 self.edges[newC - 1, i] = 1 self.factorInds[i] = 0 else: if self.factorInds[i] != 0: #print 'i: ', i #print 'factorInds: ', self.factorInds #print 'newmap: ', newmap #print 'newmap [ self.factorInds[i] -1: ', newmap [ self.factorInds[i] -1 ] #print 'self.factorInds[ i ] = newmap [ self.factorInds[i] - 1 ] + 1 ' if len(unusedFactors) > 0: #self.factorInds[ i ] = newmap [ self.factorInds[i] -1 ] +1 self.factorInds[i] = newmap[self.factorInds[i] - 1] + 1 #self.factorInds[ i ] = newmap [ self.factorInds[i] ] #print 'factorInds right before returning: ', self.factorInds return E, factorList
from CliqueTree import * from CliqueTreeOperations import * from FactorOperations import * import scipy.io as sio import numpy as np import pprint import pdb matfile = '/Users/amit/BC_Classes/PGM/Prog4/PA4Sample.mat' mat_contents = sio.loadmat(matfile) mat_struct = mat_contents['FactorMax'] val = mat_struct[0, 0] input_factors = val['INPUT1'][0][0] var = input_factors[0].flatten().tolist() card = input_factors[1].flatten().tolist() value = input_factors[2].flatten().tolist() print var print card print value INPUT1 = Factor(var, card, value, 'test') INPUT2 = val['INPUT2'].flatten() print INPUT1 print INPUT2 print FactorMaxMarginalization(INPUT1, INPUT2) #example used in section 13.2 pg 555 of Friedman and Koller print "=====" psi = Factor([1, 2, 3], [3, 2, 2], [.25, .05, .15, .08, 0, .09, .35, .07, .21, .16, 0, .18]) maxfactor = FactorMaxMarginalization(psi, [2]) print maxfactor print IndexToAssignment(np.arange(6), [3, 2])
print "\n ** FINAL NORMALIZED SOLUTION ** " answer = normalize(answer) print answer def printfactorList(factorList): print " *** FACTOR LIST *** " for factor in factorList: print factor print " *** *********** *** " # FACTORS # Pr(G) f0 = Factor(['G'], np.array([0.90, 0.1])) # Pr(D) f1 = Factor(['D'], np.array([0.50, 0.25, 0.25])) # Pr(D|F) f2 = Factor(['D', 'F'], np.array([[0.98, 0.02], [0.40, 0.60], [0.15, 0.85]])) # Pr(D|DS) f3 = Factor(['D', 'DS'], np.array([[0.98, 0.02], [0.15, 0.85], [0.40, 0.60]])) # Pr(D|S, G) f4 = Factor(['D', 'S', 'G'], np.array([[[0.98, 0.02], [0.15, 0.85], [0.15, 0.85]], [[0.998, 0.002], [0.98, 0.02], [0.98, 0.02]]]))
def FactorDiv(A, B): """ FactorProduct Computes the dividend of two factors. % Similiar to Factor Product, but if we divide 0/0, return 0 see page 365 in Koller and Friedman for definition of FactorDivision """ #print "A: ", A #print "====" #print "B: ", B C = Factor() #check for empty factors if len(A.getVar()) == 0: sys.stderr.write("A factor is empty!\n") return B if len(B.getVar()) == 0: sys.stderr.write("B factor is empty!\n") return A #check of variables that in both A and B have the same cardinality #print 'A.getVar(): ', A.getVar() #print 'B.getVar(): ',B.getVar() #setA= set( A.getVar() ) #setB= set( B.getVar() ) #intersect=np.array( list( setA.intersection(setB))) intersect = np.intersect1d(A.getVar(), B.getVar()).tolist() #print "Intersection of variables in FactorProduct ", intersect #print "A var: ", A.getVar() #print "B var: ", B.getVar() #if the intersection of variables in the two factors #is non-zero, then make sure they have the same cardinality if len(intersect) > 0: #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array? iA = getIndex(A.getVar(), intersect) #print "iA: ", iA #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist() iB = getIndex(B.getVar(), intersect) #print "iB: ", iB # check to see if any of the comparisons in the array resulting from of a.getCard()[iA] == b.getCard()[iB] # are all False. If so print an error and exit if len( np.where(A.getCard()[iA].all() == B.getCard()[iB].all() == False)[0].tolist()) > 0: sys.stderr.write("dimensionality mismatch in factors!\n") sys.exit(1) #now set the variables of C to the union of variables in factors A and B #print 'setA ' ,setA #print 'setB ', setB #print list( setA.union(setB) ) C.setVar(np.union1d(A.getVar(), B.getVar()).tolist()) #C.setVar ( list( setA.union(setB) ) ) mapA = isMember(A.getVar(), C.getVar()) mapB = isMember(B.getVar(), C.getVar()) #Set the cardinality of variables in C C.setCard(np.zeros(len(C.getVar())).tolist()) C.getCard()[mapA] = A.getCard() C.getCard()[mapB] = B.getCard() #intitialize the values of the factor C to be zero C.setVal(np.zeros(np.prod(C.getCard())).tolist()) #some helper indices to tell what indices of A and B values to multiply assignments = IndexToAssignment(np.arange(np.prod( C.getCard())), C.getCard()) #get the assignment of values of C indxA = AssignmentToIndex(assignments[:, mapA], A.getCard( )) - 1 # re-arrange the assignment of C, to what it would be in factor A indxB = AssignmentToIndex(assignments[:, mapB], B.getCard( )) - 1 # re-arange the assignment of C to what it would be in factorB numerator = A.getVal()[indxA.flatten().tolist()] denominator = B.getVal()[indxB.flatten().tolist()] #print numerator #print denominator #print zip(numerator, denominator) val = map(lambda x: common.zerodiv_tuple(x), zip(numerator, denominator)) #print val C.setVal(val) return C
def LogFactor(F): """ return a factor whose values are the natural log of the orginal factor F """ return Factor(F.getVar().tolist(), F.getCard().tolist(), np.log(F.getVal()).tolist(), F.getName())
def IdentityFactor(F): return Factor(F.getVar().tolist(), F.getCard().tolist(), np.ones(np.prod(F.getCard())), F.getName() + '_identity')
def FactorProduct(A, B): """ FactorProduct Computes the product of two factors. % C = FactorProduct(A,B) computes the product between two factors, A and B, % where each factor is defined over a set of variables with given dimension. % The factor data structure has the following fields: % .var Vector of variables in the factor, e.g. [1 2 3] % .card Vector of cardinalities corresponding to .var, e.g. [2 2 2] % .val Value table of size prod(.card) % % See also FactorMarginalization IndexToAssignment, % AssignmentToIndex, and https://github.com/indapa/PGM/blob/master/Prog1/FactorProduct.m """ #print "A: ", A #print "====" #print "B: ", B C = Factor() #check for empty factors if len(A.getVar()) == 0: sys.stderr.write("A factor is empty!\n") return B if len(B.getVar()) == 0: sys.stderr.write("B factor is empty!\n") return A #check of variables that in both A and B have the same cardinality #print 'A.getVar(): ', A.getVar() #print 'B.getVar(): ',B.getVar() #setA= set( A.getVar() ) #setB= set( B.getVar() ) #intersect=np.array( list( setA.intersection(setB))) intersect = np.intersect1d(A.getVar(), B.getVar()).tolist() #print "Intersection of variables in FactorProduct ", intersect #print "A var: ", A.getVar() #print "B var: ", B.getVar() #if the intersection of variables in the two factors #is non-zero, then make sure they have the same cardinality if len(intersect) > 0: #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array? iA = getIndex(A.getVar(), intersect) #print "iA: ", iA #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist() iB = getIndex(B.getVar(), intersect) #print "iB: ", iB # check to see if any of the comparisons in the array resulting from of a.getCard()[iA] == b.getCard()[iB] # are all False. If so print an error and exit if len( np.where(A.getCard()[iA].all() == B.getCard()[iB].all() == False)[0].tolist()) > 0: sys.stderr.write("dimensionality mismatch in factors!\n") sys.exit(1) #now set the variables of C to the union of variables in factors A and B #print 'setA ' ,setA #print 'setB ', setB #print list( setA.union(setB) ) C.setVar(np.union1d(A.getVar(), B.getVar()).tolist()) #C.setVar ( list( setA.union(setB) ) ) mapA = isMember(A.getVar(), C.getVar()) mapB = isMember(B.getVar(), C.getVar()) #Set the cardinality of variables in C C.setCard(np.zeros(len(C.getVar())).tolist()) C.getCard()[mapA] = A.getCard() C.getCard()[mapB] = B.getCard() #intitialize the values of the factor C to be zero C.setVal(np.zeros(np.prod(C.getCard())).tolist()) #some helper indices to tell what indices of A and B values to multiply assignments = IndexToAssignment(np.arange(np.prod( C.getCard())), C.getCard()) #get the assignment of values of C indxA = AssignmentToIndex(assignments[:, mapA], A.getCard( )) - 1 # re-arrange the assignment of C, to what it would be in factor A indxB = AssignmentToIndex(assignments[:, mapB], B.getCard( )) - 1 # re-arange the assignment of C to what it would be in factorB c_val = A.getVal()[indxA.flatten().tolist()] * B.getVal()[indxB.flatten( ).tolist( )] #now that we have the index into A.val and B.val vector, multiply them to factor product C.setVal(c_val.tolist()) return C
def FactorSum(A, B): """ FactorSum Computes the sum of two factors. % Similiar to FactorProduct We would use this in log space where multiplication becomes addition % Based on the code here https://github.com/indapa/PGM/blob/master/Prog4/FactorSum.m """ C = Factor() #check for empty factors if len(A.getVar()) == 0: sys.stderr.write("A factor is empty!\n") return B if len(B.getVar()) == 0: sys.stderr.write("B factor is empty!\n") return A #check of variables that in both A and B have the same cardinality #print 'A.getVar(): ', A.getVar() #print 'B.getVar(): ',B.getVar() #setA= set( A.getVar() ) #setB= set( B.getVar() ) #intersect=np.array( list( setA.intersection(setB))) intersect = np.intersect1d(A.getVar(), B.getVar()).tolist() #print "Intersection of variables in FactorProduct ", intersect #print "A var: ", A.getVar() #print "B var: ", B.getVar() #if the intersection of variables in the two factors #is non-zero, then make sure they have the same cardinality if len(intersect) > 0: #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array? iA = getIndex(A.getVar(), intersect) #print "iA: ", iA #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist() iB = getIndex(B.getVar(), intersect) #print "iB: ", iB # check to see if any of the comparisons in the array resulting from of a.getCard()[iA] == b.getCard()[iB] # are all False. If so print an error and exit if len( np.where(A.getCard()[iA].all() == B.getCard()[iB].all() == False)[0].tolist()) > 0: sys.stderr.write("dimensionality mismatch in factors!\n") sys.exit(1) #now set the variables of C to the union of variables in factors A and B #print 'setA ' ,setA #print 'setB ', setB #print list( setA.union(setB) ) C.setVar(np.union1d(A.getVar(), B.getVar()).tolist()) #C.setVar ( list( setA.union(setB) ) ) mapA = isMember(A.getVar(), C.getVar()) mapB = isMember(B.getVar(), C.getVar()) #Set the cardinality of variables in C C.setCard(np.zeros(len(C.getVar())).tolist()) C.getCard()[mapA] = A.getCard() C.getCard()[mapB] = B.getCard() #intitialize the values of the factor C to be zero C.setVal(np.zeros(np.prod(C.getCard())).tolist()) #some helper indices to tell what indices of A and B values to multiply assignments = IndexToAssignment(np.arange(np.prod( C.getCard())), C.getCard()) #get the assignment of values of C indxA = AssignmentToIndex(assignments[:, mapA], A.getCard( )) - 1 # re-arrange the assignment of C, to what it would be in factor A indxB = AssignmentToIndex(assignments[:, mapB], B.getCard( )) - 1 # re-arange the assignment of C to what it would be in factorB #print 'indxA ', indxA #print 'indxB ', indxB c_val = A.getVal()[indxA.flatten().tolist()] + B.getVal()[indxB.flatten( ).tolist( )] #now that we have the index into A.val and B.val vector, multiply them to factor product C.setVal(c_val.tolist()) return C
from Factor import * from PGMcommon import * from CliqueTree import * from CliqueTreeOperations import * from FactorOperations import * import scipy.io as sio import numpy as np import pprint import pdb matfile='/Users/amit/BC_Classes/PGM/Prog4/PA4Sample.mat' mat_contents=sio.loadmat(matfile) mat_struct=mat_contents['SumProdCalibrate'] val=mat_struct[0,0] input_edges = val['INPUT']['edges'][0][0] input_cliqueList= val['INPUT']['cliqueList'][0][0][0] clique_list_factorObj=[] for tpl in input_cliqueList: (var, card, values)=tpl f= Factor( var[0].tolist(), card[0].tolist(), values[0].tolist(), 'factor' ) clique_list_factorObj.append(f) P=CliqueTree( clique_list_factorObj , input_edges, clique_list_factorObj, []) P=CliqueTreeCalibrate(P) for f in P.getNodeList(): print f print "=="
def __init__(self, alleleFreqs, geneCopyVar): numAlleles = len(alleleFreqs) self.geneCopyFactor = Factor([geneCopyVar], [], [], 'founderHap') self.geneCopyFactor.setCard([numAlleles]) self.geneCopyFactor.setVal(alleleFreqs)