Esempio n. 1
0
    def __init__(self, alphaList, numAlleles, geneCopyVarOne, geneCopyVarTwo,
                 phenotypeVar):

        self.numalleles = numAlleles
        self.alphaList = alphaList
        self.phenotypeFactor = Factor(
            [phenotypeVar, geneCopyVarOne, geneCopyVarTwo], [], [],
            'phenotype| geneCopy1, geneCopy2')

        ngenos = len(alphaList)
        self.phenotypeFactor.setCard([2, numAlleles, numAlleles])
        #phenotypeFactor.val = zeros(1, prod(phenotypeFactor.card));
        values = np.zeros(
            (1, np.prod(self.phenotypeFactor.getCard()))).flatten().tolist()

        affectedAlphas = alphaList
        unaffectedAlphas = [1 - alpha for alpha in alphaList]

        (allelesToGenotypes,
         genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles)
        assignments = IndexToAssignment(
            np.arange(np.prod(self.phenotypeFactor.getCard())),
            self.phenotypeFactor.getCard()) - 1
        for z in range(np.prod(self.phenotypeFactor.getCard())):
            curr_assign = assignments[z]
            curr_assign = assignments[z]
            genotype_num = allelesToGenotypes[curr_assign[1], curr_assign[2]]
            if curr_assign[0] == 0:
                values[z] = affectedAlphas[genotype_num]
            else:
                values[z] = unaffectedAlphas[genotype_num]
        self.phenotypeFactor.setVal(values)
Esempio n. 2
0
def multiply(factor1, factor2):
    largeFactor = factor1 if factor1.array.ndim >= factor2.array.ndim else factor2
    smallFactor = factor1 if factor1.array.ndim < factor2.array.ndim else factor2

    variableListFactor1 = factor1.variables
    coordList1 = [1] * 5
    for var in variableListFactor1:
        index = getVariableIndex(var)
        if (index == 1):
            coordList1[index] = 3
        else:
            coordList1[index] = 2
    coordTuple1 = tuple(coordList1)
    factor1Temp = factor1.array.reshape(coordTuple1)

    variableListFactor2 = factor2.variables
    coordList2 = [1] * 5
    for var in variableListFactor2:
        index = getVariableIndex(var)
        if (index == 1):
            coordList2[index] = 3
        else:
            coordList2[index] = 2

    coordTuple2 = tuple(coordList2)
    factor2Temp = factor2.array.reshape(coordTuple2)

    soln = np.squeeze(factor1Temp * factor2Temp)
    variables = largeFactor.variables + list(
        set(smallFactor.variables) - set(largeFactor.variables))
    return Factor(variables, soln)
Esempio n. 3
0
    def __init__(self, allelefreqs, genotypeVar, name):
        self.allelefreq = allelefreqs
        #number of alleles == number of allele frequencies passed in
        numAlleles = len(allelefreqs)
        self.allelesToGenotypes = None
        self.genotypesToAlleles = None
        self.genotypeFactor = None

        #map alleles to genotypes and genotyeps to alleles
        (self.allelesToGenotypes,
         self.genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles)
        (ngenos, ploidy) = np.shape(self.genotypesToAlleles)

        self.genotypeFactor = Factor([genotypeVar], [], [], name)
        #the cardinality of the factor is the number of genotypes
        self.genotypeFactor.setCard([ngenos])

        #set the values to zero initially
        values = np.zeros((np.prod(self.genotypeFactor.getCard()))).tolist()

        for i in range(ngenos):
            alleles = self.genotypesToAlleles[i, :].tolist()

            if alleles[0] == alleles[1]:
                values[i] = np.prod([allelefreqs[j] for j in alleles])

            else:
                values[i] = np.prod([allelefreqs[j] for j in alleles]) * 2

        self.genotypeFactor.setVal(values)
Esempio n. 4
0
def FactorMarginalization(A, V):
    """   FactorMarginalization Sums given variables out of a factor.
          B = FactorMarginalization(A,V) computes the factor with the variables
          in V summed out. The factor data structure has the following fields:
          .var    Vector of variables in the factor, e.g. [1 2 3]
          .card   Vector of cardinalities corresponding to .var, e.g. [2 2 2]
          .val    Value table of size prod(.card)

          The resultant factor should have at least one variable remaining or this
          function will throw an error.   See also FactorProduct, IndexToAssignment , and AssignmentToIndex
          Based on matlab code found here: https://github.com/indapa/PGM/blob/master/Prog1/FactorMarginalization.m """

    #the resulting factor after marginalizing out variables in python list V that are in
    #the factor A
    B = Factor()

    #check for empy factor or variable list
    if len(A.getVar()) == 0 or len(V) == 0:
        return A

    #construct the variables of the marginalized factor by
    #computing the set difference between A.var and V
    #These variables in the difference set will be the scope of the new factor
    setA = set(A.getVar())
    setV = set(V)
    Bvar = np.array(list(setA.difference(setV)))
    mapB = isMember(Bvar, A.getVar(
    ))  #indices of the variables of the new factor in the original factor A
    #print mapB,  Bvar

    #check to see if the new factor has empty scope
    if len(Bvar) == 0:
        sys.stderr.write(
            "FactorMarginalization:Error, resultant factor has empty scope...\n"
        )
        return None
    #set the marginalized factor's variable scope and cardinality
    B.setVar(Bvar.tolist())
    B.setCard(A.getCard()[mapB])
    B.setVal(np.zeros(np.prod(B.getCard())).tolist())

    #compute some helper indices
    assignments = IndexToAssignment(np.arange(np.prod(A.getCard())),
                                    A.getCard())
    #indxB tells which values in A to sum together when marginalizing out the variable(s) in B
    indxB = AssignmentToIndex(assignments[:, mapB], B.getCard()) - 1

    #accum is a numpy implementation of matlab accumarray
    #accumarray sums data in each group
    #here the group(s) are defined in indxB
    #indxB is a map to tell which value in A.val to map the sum to
    #see http://blogs.mathworks.com/loren/2008/02/20/under-appreciated-accumarray/
    marginal_vals = accum(indxB, A.getVal())

    #set the marginal values to the new factor with teh variable(s) in V summed(marginalized) out
    B.setVal(marginal_vals.tolist())
    return B
Esempio n. 5
0
def ComputeMarginal(V, F, E):
    """
        ComputeMarginal Computes the marginal over a set of given variables
        M = ComputeMarginal(V, F, E) computes the marginal over variables V
        in the distribution induced by the set of factors F, given evidence E

        M is a factor containing the marginal over variables V

        V is a vector containing the variables in the marginal e.g. [1 2 3] for X_1, X_2 and X_3.
        i.e. a result of FactorMarginalization

        F is a vector of factors (struct array) containing the factors
        defining the distribution

        E is an N-by-2 matrix, each row being a variable/value pair.
        Variables are in the first column and values are in the second column.
        If there is no evidence, pass in the empty matrix [] for E.

    """
    totalFactors = len(F)
    #reshape a 1d array to 1 x ncol array
    #since ObserveEvidence requires Nx2 array, we reshape to a 2 column array
    #see http://stackoverflow.com/a/12576163 for reshaping 1d array to 2d array
    EVIDENCE = np.reshape(np.array(E), (-1, 2))
    #print np.shape(EVIDENCE)

    if totalFactors == 0:
        sys.stderr.write("empty factor list given as input.\n")
        return Factor([], [], [])
    # union of all variables in list of factors F
    variableList = [
    ]  # a list of of lists, where each element is a list containing the variables of the factor in F
    for factor in F:
        var = factor.getVar().tolist()
        variableList.append(var)

    #get the union of variables across all the factor in F
    #see this http://stackoverflow.com/a/2151553, Pythonic Way to Create Union of All Values Contained in Multiple Lists
    union_variables = set().union(*variableList)
    #print union_variables
    #v contains the variables not in the list of variables in the marginal
    v = list(union_variables.difference(V))

    # compute the joint distribution, but then reduce it, given the evidence
    # ComputeJointDistribution returns a factor, but ObserveEvidence expects a list
    # of factors as the first argument, so hence the need for brackets [ ]
    # ObserveEvidence returns a list, but we want the first element so thats why the [0]
    jointE = ObserveEvidence([ComputeJointDistribution(F)], EVIDENCE)[0]

    #now we need to re-normaize the joint, since observe evidence doesn't do it for us
    jointE_normalizedVal = jointE.getVal() / np.sum(jointE.getVal())
    jointE.setVal(jointE_normalizedVal.tolist())

    return FactorMarginalization(jointE, v)
Esempio n. 6
0
    def __init__(self, numAlleles, genotypeVarChild, genotypeVarParentOne,
                 genotypeVarParentTwo, name):
        self.genotypeFactor = Factor(
            [genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo], [],
            [], name)

        #map alleles to genotypes and genotyeps to alleles
        (self.allelesToGenotypes,
         self.genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles)

        (ngenos, ploidy) = np.shape(self.genotypesToAlleles)

        self.genotypeFactor.setCard([ngenos, ngenos, ngenos])
        #set the values to zero initially
        values = np.zeros((np.prod(self.genotypeFactor.getCard()))).tolist()

        #iterate thru variable assignments to random variables
        #assign probablities based on Punnet square crosses
        assignments = IndexToAssignment(
            np.arange(np.prod(self.genotypeFactor.getCard())),
            self.genotypeFactor.getCard()) - 1
        for z in range(np.prod(self.genotypeFactor.getCard())):
            curr_assign = assignments[z]
            childAssignment = int(curr_assign[0])

            parent1gametes = self.genotypesToAlleles[curr_assign[1], :]
            parent2gametes = self.genotypesToAlleles[curr_assign[2], :]
            #print 'parental gametes: ', parent1gametes, parent2gametes
            #print 'child assignment: ', childAssignment
            #list of tuples containing list of zygote(genotype) tuples
            zygote_list = list(
                itertools.product(parent1gametes, parent2gametes))
            punnet_freq = [
                self.allelesToGenotypes[zygote[0], zygote[1]]
                for zygote in zygote_list
            ]
            histc = {}
            hist = []
            for g in range(ngenos):
                histc[g] = 0.
            for x in punnet_freq:
                histc[x] += 1.
            #print histc.values()
            for g in range(ngenos):
                hist.append(histc[g])
            #print punnet_freq
            hist = (np.array(hist)) / 4
            #print 'hist:', hist
            #print zygote_list
            values[z] = hist[childAssignment]

        self.genotypeFactor.setVal(values)
Esempio n. 7
0
 def from_xml_file(self, filename):
     doc = minidom.parse(filename)
     rootnode = doc.getElementsByTagName("root")[0]
     for node in rootnode.childNodes:
         if node.nodeType == node.TEXT_NODE:
             continue
         if node.nodeName == "instruction":
             self.instruction = node.getAttribute("text")
         elif node.nodeName == "world":
             self.world = World()
             self.world.from_xml(node.toxml())
         else:
             self.root = Factor()
             self.root.from_xml(node.toxml())
Esempio n. 8
0
def main():
	f = open('data/ASIA/asia.bif')
	BIF = f.readlines()

	BIF = BIFParser.fixWhiteSpace(BIF)
	BN = BIFParser.parseBIF(BIF)
	factors = []
	for nodes in BN:
		if not nodes.isRoot():
			tempArray = [nodes]
			tempArray.extend(nodes.getParents())
			factors.append(Factor.Factor(nodes.getDist(), tempArray))

	converged=False
	converNum=0
	while not converged:
		prevConverNum = copy.deepcopy(converNum)
		converNum=0
		for a in BN:
			for f in factors:
				if partOf(a,f):
					message = a.sendMarginal(f)
					f.receiveBelief(message, a)
		for f in factors:
			for a in BN:
				if partOf(a,f):
					message = f.sendBelief(a)
					a.receiveMarginal(message, f)
		for a in BN:
			a.updateMarginal()
			converNum += a.getMarginal()[a.getMarginal().keys()[0]]
		if (np.abs(converNum-prevConverNum) < .00001):
			converged=True		
	g=open("results.txt","w")

	for a in BN:
		g.write(a.getName() + " ")
		print a.getMarginal()
		i=len(a.getMarginal().keys())-1
		while(i >= 0):
			g.write(str(a.getMarginal()[a.getMarginal().keys()[i]]) + " ")
			i-=1
		g.write("\n")

	g.close()
Esempio n. 9
0
def ComputeJointDistribution(INPUTS):
    """ ComputeJointDistribution Computes the joint distribution defined by a set of given factors

    Joint = ComputeJointDistribution(INPUTS) computes the joint distribution
    defined by a set of given factors

    Joint is a factor that encapsulates the joint distribution given by INPUTS
    INPUTS is a vector of Factor objects containing the factors defining the distribution

    """

    totalFactors = len(INPUTS)
    #check for empty list of INPUTS

    if totalFactors == 0:
        sys.stderr.write("Empty factor list given as input\n")
        return Factor([], [], [])

    else:
        # see http://docs.python.org/library/functions.html#reduce for description of Python reduce function
        return reduce(lambda x, y: FactorProduct(x, y), INPUTS)
Esempio n. 10
0
    def __init__(self, numAlleles, geneCopyVarChild, geneCopyHapOne,
                 geneCopyHapTwo):
        self.numalleles = numAlleles
        self.hapone = geneCopyVarChild
        self.haptwo = geneCopyHapTwo

        #geneCopyFactor = struct('var', [], 'card', [], 'val', []);
        self.geneCopyFactor = Factor(
            [geneCopyVarChild, geneCopyHapOne, geneCopyHapTwo], [], [],
            'child|hap1,hap2')
        self.geneCopyFactor.setCard(
            [self.numalleles, self.numalleles, self.numalleles])
        values = np.zeros(
            np.prod([self.numalleles, self.numalleles,
                     self.numalleles])).tolist()
        #this keeps track of what posiiton you are in the values list
        index = 0
        #the number of iterations thru the nested for loops should be equal to numallels^3

        for i in range(numAlleles):
            #iterate through alleles from
            #grand(paternal) haplotype
            for j in range(numAlleles):
                #iterate through alleles from
                #grand(maternal) haplotype
                for k in range(numAlleles):
                    #iterate thru child alleles
                    print i, j, k
                    if j == k:  #child has grandmotherhap
                        if i == k:  #grandfatherhap is the same
                            values[index] = 1
                        else:
                            values[index] = .5
                    elif i == k:  #child has grandfather hap
                        values[index] = .5
                    else:
                        pass
                    index += 1
        #print values
        self.geneCopyFactor.setVal(values)
Esempio n. 11
0
    def __init__(self, isDominant, genotypeVar, phenotypeVar, name):

        #instantiate a Factor object
        phenotype = Factor([phenotypeVar, genotypeVar], [2, 3], [], name)

        phenotype.setVal(np.zeros(np.prod(phenotype.getCard())).tolist())
        #this enumerates the values the factor can take
        # since there are 2x3 cardinality, 6 possible assignments
        assignments = IndexToAssignment(
            np.arange(np.prod(phenotype.getCard())), phenotype.getCard())
        val = val = np.zeros(np.prod(phenotype.getCard()))
        (nrows, ncols) = np.shape(assignments)

        for i in range(np.prod([2, 3])):
            #if its dominant, if you have at least one copy, you have the phenotype
            (pheno, geno) = assignments[i]
            if isDominant == 1:
                if pheno == 1:  #affected
                    if geno == 1 or geno == 2:
                        val[i] = 1
                    else:
                        val[i] = 0
                else:  #uneffected
                    if geno == 3:
                        val[i] = 1

            if isDominant == 0:
                if pheno == 1:
                    if geno == 3:
                        val[i] = 1
                else:
                    if geno == 1 or geno == 2:
                        val[i] = 1

        phenotype.setVal(val.tolist())

        self.phenotype = phenotype
Esempio n. 12
0
    def __init__(self, alphaList, phenotypeVar, genotypeVar, name):
        self.phenotypeFactor = Factor([phenotypeVar, genotypeVar], [], [],
                                      name)
        self.alpha = np.array(alphaList)

        ngenotypes = len(alphaList)
        self.phenotypeFactor.setCard([2, ngenotypes])

        values = [x for x in range(np.prod(self.phenotypeFactor.getCard()))]

        for i in range(len(alphaList)):

            values[i] = alphaList[i]
            values[i + 1] = 1 - alphaList[i]
        ctr = 0
        alphas = 2 * len(alphaList) * [None]
        for i in range(len(alphaList)):
            alphas[ctr] = alphaList[i]
            ctr = ctr + 1
            alphas[ctr] = 1 - alphaList[i]
            ctr = ctr + 1

        values = alphas
        self.phenotypeFactor.setVal(values)
Esempio n. 13
0
def FactorMaxMarginalization(A, V):
    """ computes the factor with the variables in V *maxed* out.
        The resulting factor will have all the variables in A minus
        those variables in V. This is quite similiar to FactorMarginalization, but rather then summing out variables in V
        we take the max. In the code, this translates passing np.max as the function to accum
        See section  13.2 in Koller and Friedman  for more information"""

    B = Factor()
    #check for empy factor or variable list
    if len(A.getVar()) == 0 or len(V) == 0:
        return A
    Bvar = np.setdiff1d(A.getVar(), V)
    mapB = isMember(Bvar, A.getVar())

    if len(Bvar) == 0:
        sys.stderr.write(
            "FactorMaxMarginalization: Error, resultant factor has empty scope...\n"
        )
        return np.max(A.getVal())
    #set the marginalized factor's variable scope and cardinality
    B.setVar(Bvar.tolist())
    B.setCard(A.getCard()[mapB])
    B.setVal(np.zeros(np.prod(B.getCard())).tolist())

    #compute some helper indices
    assignments = IndexToAssignment(np.arange(np.prod(A.getCard())),
                                    A.getCard())
    #indxB tells which values in A to sum together when marginalizing out the variable(s) in B
    indxB = AssignmentToIndex(assignments[:, mapB], B.getCard()) - 1

    #here we pass in the function np.max
    #NumPy and Python are awesome
    max_vals = accum(indxB, A.getVal(), np.max)
    B.setVal(max_vals.tolist())

    return B
Esempio n. 14
0
    def eliminateVar(self, Z, E, factorList):
        """ a variable elimination function
            based on https://github.com/indapa/PGM/blob/master/Prog4/EliminateVar.m

            Z is the variable to be eliminated. We base this code on the matlab file
            linked to above as well as the Sum-product VE pseudo code in Koller and Friedman
            page 298

            E is a numpy 2d matrix representing adjacency matrix of variables
            It represents the induced VE graph
            Once a variable is eliminated, its edges are removed from E

            """

        useFactors = []  #the index of the factor that contains the variable Z
        scope = []

        #print 'Z: ', Z

        #get a list containining the index in self.factorLlist of factors
        #that contain the variable Z to be eliminated
        # get the scope of variables from the factors that contain variable Z
        for i in range(len(factorList)):

            if Z in factorList[i].getVar().tolist():
                useFactors.append(
                    i
                )  #the ith factor is being currently involved in elimination
                scope = list(
                    set.union(set(scope), factorList[i].getVar().tolist()))

        # update edge map
        """ These represent the induced edges for the VE graph.
         once the variable Z is eliminated, its edges are removed from the graph
         but in the process of elimination, we create a new factor. This
         introduces fill edges (see pg. 307 Koller and Friedman)
         Z is one based, but the indices in E are zero based, hence Z-1
         also the variable names in scope are 1 based, so we subtract 1 when
         updating the induced VE graph """

        for i in range(len(scope)):
            for j in range(len(scope)):
                if i != j:
                    E[scope[i] - 1, scope[j] - 1] = 1
                    E[scope[j] - 1, scope[i] - 1] = 1
        E[Z - 1, :] = 0
        E[:, Z - 1] = 0

        #G=nx.from_numpy_matrix(E)
        #print 'induced graph edges:\n', (G.edges())
        #nx.draw_shell(G)
        #plt.show()

        #these are the indices of factorList which are not involved in VE
        unusedFactors = list(
            set.difference(set(range(len(factorList))), set(useFactors)))

        newF = None
        #check first if there are any unused factors left!
        if len(unusedFactors) > 0:
            newF = len(unusedFactors) * [None]
            newmap = np.zeros(max(unusedFactors) + 1, dtype=int).tolist()

            #newF is a new factor list, we populate it first
            #with the unused factors
            #newmap is maps the new location of ith unusedFactor
            for i in range(len(unusedFactors)):
                newF[i] = factorList[unusedFactors[i]]
                newmap[unusedFactors[i]] = i

        #print 'newmap ', newmap,"\n"
        #print 'length of newmap: ', len(newmap), "\n"

        newFactor = Factor([], [], [], 'newFactor')

        #we multiple in all the factors that contain the variable Z
        for i in range(len(useFactors)):
            newFactor = FactorProduct(newFactor, factorList[useFactors[i]])

        #then we marginalize Z out and obtain a new factor
        #then append it the end of newF, the new factor list
        newFactor = FactorMarginalization(newFactor, [Z])
        #print 'newFactor: ',newFactor
        #newF(length(nonUseFactors)+1) = newFactor;
        if newFactor != None:
            newF.append(newFactor)

        if newF != None:
            factorList = newF
        #return E

        ########################################################################
        """ the remaining code builds the edges of the clique tree """
        """ add new node with the factors that contain the variable Z
            adding a  new node represents new clique.
            The scope of every factor generated during the variable elimination process is a clique pg. 309 Koller & Friedman """

        self.nodeList.append(scope)

        #newC is the total number of nodes in the clique tree
        newC = len(self.nodeList)
        #print 'newC: ', newC

        #factorInds are individual factors with one variable ... I think
        self.factorInds.append(len(unusedFactors) + 1)

        #print 'range( newC -1) ', range( newC-1  )
        #print 'factorInds: ', self.factorInds
        #print 'useFactors: ', useFactors
        #pdb.set_trace()
        """ we update the edges of the clique tree """
        for i in range(newC - 1):

            #if self.factorInds [ i ] -1 in useFactors:
            #there was the off by onoe erorr - the values in factorInds
            #were one-based, need to subtract 1
            if self.factorInds[i] - 1 in useFactors:

                self.edges[i, newC - 1] = 1
                self.edges[newC - 1, i] = 1
                self.factorInds[i] = 0
            else:
                if self.factorInds[i] != 0:
                    #print 'i: ', i
                    #print 'factorInds: ', self.factorInds
                    #print 'newmap: ', newmap
                    #print 'newmap [ self.factorInds[i] -1: ', newmap [ self.factorInds[i] -1 ]
                    #print 'self.factorInds[ i ]  = newmap [ self.factorInds[i] - 1  ] + 1 '
                    if len(unusedFactors) > 0:
                        #self.factorInds[ i ]  = newmap [ self.factorInds[i] -1  ] +1
                        self.factorInds[i] = newmap[self.factorInds[i] - 1] + 1
                        #self.factorInds[ i ]  = newmap [ self.factorInds[i]   ]

        #print 'factorInds right before returning: ', self.factorInds
        return E, factorList
Esempio n. 15
0
from CliqueTree import *
from CliqueTreeOperations import *
from FactorOperations import *
import scipy.io as sio
import numpy as np
import pprint
import pdb
matfile = '/Users/amit/BC_Classes/PGM/Prog4/PA4Sample.mat'
mat_contents = sio.loadmat(matfile)
mat_struct = mat_contents['FactorMax']
val = mat_struct[0, 0]
input_factors = val['INPUT1'][0][0]
var = input_factors[0].flatten().tolist()
card = input_factors[1].flatten().tolist()
value = input_factors[2].flatten().tolist()
print var
print card
print value
INPUT1 = Factor(var, card, value, 'test')
INPUT2 = val['INPUT2'].flatten()
print INPUT1
print INPUT2
print FactorMaxMarginalization(INPUT1, INPUT2)
#example used in section 13.2 pg 555 of Friedman and Koller
print "====="
psi = Factor([1, 2, 3], [3, 2, 2],
             [.25, .05, .15, .08, 0, .09, .35, .07, .21, .16, 0, .18])
maxfactor = FactorMaxMarginalization(psi, [2])
print maxfactor
print IndexToAssignment(np.arange(6), [3, 2])
Esempio n. 16
0
    print "\n ** FINAL NORMALIZED SOLUTION ** "
    answer = normalize(answer)
    print answer


def printfactorList(factorList):
    print " *** FACTOR LIST *** "
    for factor in factorList:
        print factor
    print " *** *********** *** "


#  FACTORS
# Pr(G)
f0 = Factor(['G'], np.array([0.90, 0.1]))

# Pr(D)
f1 = Factor(['D'], np.array([0.50, 0.25, 0.25]))

# Pr(D|F)
f2 = Factor(['D', 'F'], np.array([[0.98, 0.02], [0.40, 0.60], [0.15, 0.85]]))

# Pr(D|DS)
f3 = Factor(['D', 'DS'], np.array([[0.98, 0.02], [0.15, 0.85], [0.40, 0.60]]))

# Pr(D|S, G)
f4 = Factor(['D', 'S', 'G'],
            np.array([[[0.98, 0.02], [0.15, 0.85], [0.15, 0.85]],
                      [[0.998, 0.002], [0.98, 0.02], [0.98, 0.02]]]))
Esempio n. 17
0
def FactorDiv(A, B):
    """ FactorProduct Computes the dividend of two factors.
%       Similiar to Factor Product, but if we divide 0/0, return 0
    see page 365 in Koller and Friedman for definition of FactorDivision """

    #print "A: ", A
    #print "===="
    #print "B: ", B
    C = Factor()

    #check for empty factors
    if len(A.getVar()) == 0:
        sys.stderr.write("A factor is empty!\n")
        return B
    if len(B.getVar()) == 0:
        sys.stderr.write("B factor is empty!\n")
        return A

    #check of  variables that in both A and B have the same cardinality
    #print 'A.getVar():  ', A.getVar()
    #print 'B.getVar(): ',B.getVar()
    #setA= set( A.getVar() )
    #setB= set( B.getVar() )
    #intersect=np.array( list( setA.intersection(setB)))
    intersect = np.intersect1d(A.getVar(), B.getVar()).tolist()
    #print "Intersection of variables in FactorProduct ", intersect
    #print "A var: ",  A.getVar()
    #print "B var: ",  B.getVar()

    #if the intersection of variables in the two factors
    #is non-zero, then make sure they have the same cardinality
    if len(intersect) > 0:
        #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array?
        iA = getIndex(A.getVar(), intersect)
        #print "iA: ", iA
        #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist()
        iB = getIndex(B.getVar(), intersect)
        #print "iB: ", iB

        # check to see if any of the comparisons in the  array resulting from  of a.getCard()[iA] == b.getCard()[iB]
        # are all False. If so print an error and exit
        if len(
                np.where(A.getCard()[iA].all() == B.getCard()[iB].all() ==
                         False)[0].tolist()) > 0:
            sys.stderr.write("dimensionality mismatch in factors!\n")
            sys.exit(1)

    #now set the variables of C to the union of variables in factors A and B
    #print 'setA ' ,setA
    #print 'setB ', setB
    #print list( setA.union(setB) )
    C.setVar(np.union1d(A.getVar(), B.getVar()).tolist())
    #C.setVar ( list( setA.union(setB) ) )
    mapA = isMember(A.getVar(), C.getVar())
    mapB = isMember(B.getVar(), C.getVar())

    #Set the cardinality of variables in C
    C.setCard(np.zeros(len(C.getVar())).tolist())
    C.getCard()[mapA] = A.getCard()
    C.getCard()[mapB] = B.getCard()

    #intitialize the values of the factor C to be zero
    C.setVal(np.zeros(np.prod(C.getCard())).tolist())

    #some helper indices to tell what indices of A and B values to multiply
    assignments = IndexToAssignment(np.arange(np.prod(
        C.getCard())), C.getCard())  #get the assignment of values of C
    indxA = AssignmentToIndex(assignments[:, mapA], A.getCard(
    )) - 1  # re-arrange the assignment of C, to what it would be in factor  A
    indxB = AssignmentToIndex(assignments[:, mapB], B.getCard(
    )) - 1  # re-arange the assignment of C to what it would be in  factorB

    numerator = A.getVal()[indxA.flatten().tolist()]
    denominator = B.getVal()[indxB.flatten().tolist()]

    #print numerator
    #print denominator
    #print zip(numerator, denominator)
    val = map(lambda x: common.zerodiv_tuple(x), zip(numerator, denominator))
    #print val
    C.setVal(val)

    return C
Esempio n. 18
0
def LogFactor(F):
    """ return a factor whose values are the  natural log of the orginal factor F  """

    return Factor(F.getVar().tolist(),
                  F.getCard().tolist(),
                  np.log(F.getVal()).tolist(), F.getName())
Esempio n. 19
0
def IdentityFactor(F):
    return Factor(F.getVar().tolist(),
                  F.getCard().tolist(), np.ones(np.prod(F.getCard())),
                  F.getName() + '_identity')
Esempio n. 20
0
def FactorProduct(A, B):
    """ FactorProduct Computes the product of two factors.
%       C = FactorProduct(A,B) computes the product between two factors, A and B,
%       where each factor is defined over a set of variables with given dimension.
%       The factor data structure has the following fields:
%       .var    Vector of variables in the factor, e.g. [1 2 3]
%       .card   Vector of cardinalities corresponding to .var, e.g. [2 2 2]
%       .val    Value table of size prod(.card)
%
%       See also FactorMarginalization  IndexToAssignment,
%       AssignmentToIndex, and https://github.com/indapa/PGM/blob/master/Prog1/FactorProduct.m """

    #print "A: ", A
    #print "===="
    #print "B: ", B
    C = Factor()

    #check for empty factors
    if len(A.getVar()) == 0:
        sys.stderr.write("A factor is empty!\n")
        return B
    if len(B.getVar()) == 0:
        sys.stderr.write("B factor is empty!\n")
        return A

    #check of  variables that in both A and B have the same cardinality
    #print 'A.getVar():  ', A.getVar()
    #print 'B.getVar(): ',B.getVar()
    #setA= set( A.getVar() )
    #setB= set( B.getVar() )
    #intersect=np.array( list( setA.intersection(setB)))
    intersect = np.intersect1d(A.getVar(), B.getVar()).tolist()
    #print "Intersection of variables in FactorProduct ", intersect
    #print "A var: ",  A.getVar()
    #print "B var: ",  B.getVar()

    #if the intersection of variables in the two factors
    #is non-zero, then make sure they have the same cardinality
    if len(intersect) > 0:
        #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array?
        iA = getIndex(A.getVar(), intersect)
        #print "iA: ", iA
        #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist()
        iB = getIndex(B.getVar(), intersect)
        #print "iB: ", iB

        # check to see if any of the comparisons in the  array resulting from  of a.getCard()[iA] == b.getCard()[iB]
        # are all False. If so print an error and exit
        if len(
                np.where(A.getCard()[iA].all() == B.getCard()[iB].all() ==
                         False)[0].tolist()) > 0:
            sys.stderr.write("dimensionality mismatch in factors!\n")
            sys.exit(1)

    #now set the variables of C to the union of variables in factors A and B
    #print 'setA ' ,setA
    #print 'setB ', setB
    #print list( setA.union(setB) )
    C.setVar(np.union1d(A.getVar(), B.getVar()).tolist())
    #C.setVar ( list( setA.union(setB) ) )
    mapA = isMember(A.getVar(), C.getVar())
    mapB = isMember(B.getVar(), C.getVar())

    #Set the cardinality of variables in C
    C.setCard(np.zeros(len(C.getVar())).tolist())
    C.getCard()[mapA] = A.getCard()
    C.getCard()[mapB] = B.getCard()

    #intitialize the values of the factor C to be zero
    C.setVal(np.zeros(np.prod(C.getCard())).tolist())

    #some helper indices to tell what indices of A and B values to multiply
    assignments = IndexToAssignment(np.arange(np.prod(
        C.getCard())), C.getCard())  #get the assignment of values of C
    indxA = AssignmentToIndex(assignments[:, mapA], A.getCard(
    )) - 1  # re-arrange the assignment of C, to what it would be in factor  A
    indxB = AssignmentToIndex(assignments[:, mapB], B.getCard(
    )) - 1  # re-arange the assignment of C to what it would be in  factorB

    c_val = A.getVal()[indxA.flatten().tolist()] * B.getVal()[indxB.flatten(
    ).tolist(
    )]  #now that we have the index into A.val and B.val vector, multiply them to factor product
    C.setVal(c_val.tolist())

    return C
Esempio n. 21
0
def FactorSum(A, B):
    """ FactorSum Computes the sum of two factors.
%       Similiar to FactorProduct
        We would use this in log space where multiplication becomes addition
%       Based on the code here https://github.com/indapa/PGM/blob/master/Prog4/FactorSum.m """

    C = Factor()

    #check for empty factors
    if len(A.getVar()) == 0:
        sys.stderr.write("A factor is empty!\n")
        return B
    if len(B.getVar()) == 0:
        sys.stderr.write("B factor is empty!\n")
        return A

    #check of  variables that in both A and B have the same cardinality
    #print 'A.getVar():  ', A.getVar()
    #print 'B.getVar(): ',B.getVar()
    #setA= set( A.getVar() )
    #setB= set( B.getVar() )
    #intersect=np.array( list( setA.intersection(setB)))
    intersect = np.intersect1d(A.getVar(), B.getVar()).tolist()
    #print "Intersection of variables in FactorProduct ", intersect
    #print "A var: ",  A.getVar()
    #print "B var: ",  B.getVar()

    #if the intersection of variables in the two factors
    #is non-zero, then make sure they have the same cardinality
    if len(intersect) > 0:
        #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array?
        iA = getIndex(A.getVar(), intersect)
        #print "iA: ", iA
        #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist()
        iB = getIndex(B.getVar(), intersect)
        #print "iB: ", iB

        # check to see if any of the comparisons in the  array resulting from  of a.getCard()[iA] == b.getCard()[iB]
        # are all False. If so print an error and exit
        if len(
                np.where(A.getCard()[iA].all() == B.getCard()[iB].all() ==
                         False)[0].tolist()) > 0:
            sys.stderr.write("dimensionality mismatch in factors!\n")
            sys.exit(1)

    #now set the variables of C to the union of variables in factors A and B
    #print 'setA ' ,setA
    #print 'setB ', setB
    #print list( setA.union(setB) )
    C.setVar(np.union1d(A.getVar(), B.getVar()).tolist())
    #C.setVar ( list( setA.union(setB) ) )
    mapA = isMember(A.getVar(), C.getVar())
    mapB = isMember(B.getVar(), C.getVar())

    #Set the cardinality of variables in C
    C.setCard(np.zeros(len(C.getVar())).tolist())
    C.getCard()[mapA] = A.getCard()
    C.getCard()[mapB] = B.getCard()

    #intitialize the values of the factor C to be zero
    C.setVal(np.zeros(np.prod(C.getCard())).tolist())

    #some helper indices to tell what indices of A and B values to multiply
    assignments = IndexToAssignment(np.arange(np.prod(
        C.getCard())), C.getCard())  #get the assignment of values of C
    indxA = AssignmentToIndex(assignments[:, mapA], A.getCard(
    )) - 1  # re-arrange the assignment of C, to what it would be in factor  A
    indxB = AssignmentToIndex(assignments[:, mapB], B.getCard(
    )) - 1  # re-arange the assignment of C to what it would be in  factorB
    #print 'indxA ', indxA
    #print 'indxB ', indxB

    c_val = A.getVal()[indxA.flatten().tolist()] + B.getVal()[indxB.flatten(
    ).tolist(
    )]  #now that we have the index into A.val and B.val vector, multiply them to factor product
    C.setVal(c_val.tolist())

    return C
Esempio n. 22
0
from Factor import *
from PGMcommon import *
from CliqueTree import *
from CliqueTreeOperations import *
from FactorOperations import *
import scipy.io as sio
import numpy as np
import pprint
import pdb
matfile='/Users/amit/BC_Classes/PGM/Prog4/PA4Sample.mat'
mat_contents=sio.loadmat(matfile)
mat_struct=mat_contents['SumProdCalibrate']
val=mat_struct[0,0]
input_edges = val['INPUT']['edges'][0][0]
input_cliqueList= val['INPUT']['cliqueList'][0][0][0]
clique_list_factorObj=[]
for tpl in input_cliqueList:
    (var, card, values)=tpl
    f= Factor( var[0].tolist(), card[0].tolist(), values[0].tolist(), 'factor' )
    clique_list_factorObj.append(f)

P=CliqueTree( clique_list_factorObj ,  input_edges, clique_list_factorObj, [])

P=CliqueTreeCalibrate(P)

for f in P.getNodeList():
    print f
    print "=="
Esempio n. 23
0
 def __init__(self, alleleFreqs, geneCopyVar):
     numAlleles = len(alleleFreqs)
     self.geneCopyFactor = Factor([geneCopyVar], [], [], 'founderHap')
     self.geneCopyFactor.setCard([numAlleles])
     self.geneCopyFactor.setVal(alleleFreqs)