Exemple #1
0
    def __init__(self, pileup_data, pileup_var, samplename):
        
        self.pileup=pileup_data
        self.name=name
        self.genotypeList=[]
        for ploidy in [2]:
            l=[ combo for combo in combinations_with_replacement(['A','C','G','T'],p) ]
            for g in l:
                genotype="".join( list(g) )
                self.genotypeList.append( Genotype( genotype, ploidy) )


        GL=Factor ( [ pileup_var ],[ 10 ], [], samplename )

        depth=length(pileup_data)
        likelihood_matrix=np.zeros(  depth, 10 )

        for i in range(depth):
            (sample, readgroup, aligned_read, basecall, bq)=self.pileup[i]
            for j in range( len (self.genotypeList)):
                likelihood_matrix[i,j]=self.genotypeList[j].calculateBaseLikelihood(basecall, ErrorProb( bq) )
        genotypelikelihoods=np.sum( np.log(likelihood_matrix), axis=0)
        GL.setVal(genotypelikelihoods.tolist())

        """ https://github.com/indapa/Pgmsnp/blob/master/PythonNotebook/genotypeLikelihoodMatrix.py  """
Exemple #2
0
    def __init__(self, allelefreqs, genotypeVar, name):
        self.allelefreq = allelefreqs
        #number of alleles == number of allele frequencies passed in
        numAlleles = len(allelefreqs)
        self.allelesToGenotypes = None
        self.genotypesToAlleles = None
        self.genotypeFactor = None

        #map alleles to genotypes and genotyeps to alleles
        (self.allelesToGenotypes,
         self.genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles)
        (ngenos, ploidy) = np.shape(self.genotypesToAlleles)

        self.genotypeFactor = Factor([genotypeVar], [], [], name)
        #the cardinality of the factor is the number of genotypes
        self.genotypeFactor.setCard([ngenos])

        #set the values to zero initially
        values = np.zeros((np.prod(self.genotypeFactor.getCard()))).tolist()

        for i in range(ngenos):
            alleles = self.genotypesToAlleles[i, :].tolist()

            if alleles[0] == alleles[1]:
                values[i] = np.prod([allelefreqs[j] for j in alleles])

            else:
                values[i] = np.prod([allelefreqs[j] for j in alleles]) * 2

        self.genotypeFactor.setVal(values)
Exemple #3
0
    def __init__(self, alphaList, numAlleles, geneCopyVarOne, geneCopyVarTwo,
                 phenotypeVar):

        self.numalleles = numAlleles
        self.alphaList = alphaList
        self.phenotypeFactor = Factor(
            [phenotypeVar, geneCopyVarOne, geneCopyVarTwo], [], [],
            'phenotype| geneCopy1, geneCopy2')

        ngenos = len(alphaList)
        self.phenotypeFactor.setCard([2, numAlleles, numAlleles])
        #phenotypeFactor.val = zeros(1, prod(phenotypeFactor.card));
        values = np.zeros(
            (1, np.prod(self.phenotypeFactor.getCard()))).flatten().tolist()

        affectedAlphas = alphaList
        unaffectedAlphas = [1 - alpha for alpha in alphaList]

        (allelesToGenotypes,
         genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles)
        assignments = IndexToAssignment(
            np.arange(np.prod(self.phenotypeFactor.getCard())),
            self.phenotypeFactor.getCard()) - 1
        for z in range(np.prod(self.phenotypeFactor.getCard())):
            curr_assign = assignments[z]
            curr_assign = assignments[z]
            genotype_num = allelesToGenotypes[curr_assign[1], curr_assign[2]]
            if curr_assign[0] == 0:
                values[z] = affectedAlphas[genotype_num]
            else:
                values[z] = unaffectedAlphas[genotype_num]
        self.phenotypeFactor.setVal(values)
Exemple #4
0
    def __init__(self, numAlleles, genotypeVarChild, genotypeVarParentOne,
                 genotypeVarParentTwo, name):
        self.genotypeFactor = Factor(
            [genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo], [],
            [], name)

        #map alleles to genotypes and genotyeps to alleles
        (self.allelesToGenotypes,
         self.genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles)

        (ngenos, ploidy) = np.shape(self.genotypesToAlleles)

        self.genotypeFactor.setCard([ngenos, ngenos, ngenos])
        #set the values to zero initially
        values = np.zeros((np.prod(self.genotypeFactor.getCard()))).tolist()

        #iterate thru variable assignments to random variables
        #assign probablities based on Punnet square crosses
        assignments = IndexToAssignment(
            np.arange(np.prod(self.genotypeFactor.getCard())),
            self.genotypeFactor.getCard()) - 1
        for z in range(np.prod(self.genotypeFactor.getCard())):
            curr_assign = assignments[z]
            childAssignment = int(curr_assign[0])

            parent1gametes = self.genotypesToAlleles[curr_assign[1], :]
            parent2gametes = self.genotypesToAlleles[curr_assign[2], :]
            #print 'parental gametes: ', parent1gametes, parent2gametes
            #print 'child assignment: ', childAssignment
            #list of tuples containing list of zygote(genotype) tuples
            zygote_list = list(
                itertools.product(parent1gametes, parent2gametes))
            punnet_freq = [
                self.allelesToGenotypes[zygote[0], zygote[1]]
                for zygote in zygote_list
            ]
            histc = {}
            hist = []
            for g in range(ngenos):
                histc[g] = 0.
            for x in punnet_freq:
                histc[x] += 1.
            #print histc.values()
            for g in range(ngenos):
                hist.append(histc[g])
            #print punnet_freq
            hist = (np.array(hist)) / 4
            #print 'hist:', hist
            #print zygote_list
            values[z] = hist[childAssignment]

        self.genotypeFactor.setVal(values)
Exemple #5
0
class ChildCopyGivenFreqFactor(object):
    """ for a founder, its particular haplotype is proprortional to the
    given allelel freq of the locus. This factor is part of the decoupled
    Bayesian Genetic network , along with ChildCopyGivenParentalsFactor"""
    
    def __init__(self, alleleFreqs, geneCopyVar):
        numAlleles = len(alleleFreqs)
        self.geneCopyFactor=Factor( [geneCopyVar], [], [], 'founderHap')
        self.geneCopyFactor.setCard ( [numAlleles])
        self.geneCopyFactor.setVal( alleleFreqs )
        #geneCopyFactor = struct('var', [], 'card', [], 'val', [])
        #geneCopyFactor.var(1) = geneCopyVar;
        #geneCopyFactor.card(1) = numAlleles;
        #geneCopyFactor.val = alleleFreqs';


    def getVar(self):
        return self.geneCopyFactor.getVar()
    def getCard(self):
        return self.geneCopyFactor.getCard()
    def getVal(self):
        return self.geneCopyFactor.getVal()
    def getFactor(self):
        return self.genCopyFactor
    def __str__(self):
        return self.geneCopyFactor.__str__()
Exemple #6
0
 def from_xml_file(self, filename):
     doc = minidom.parse(filename)
     rootnode = doc.getElementsByTagName("root")[0]
     for node in rootnode.childNodes:
         if node.nodeType == node.TEXT_NODE:
             continue
         if node.nodeName == "instruction":
             self.instruction = node.getAttribute("text")
         elif node.nodeName == "world":
             self.world = World()
             self.world.from_xml(node.toxml())
         else:
             self.root = Factor()
             self.root.from_xml(node.toxml())
Exemple #7
0
class ChildCopyGivenFreqFactor(object):
    """ for a founder, its particular haplotype is proprortional to the
    given allelel freq of the locus. This factor is part of the decoupled
    Bayesian Genetic network , along with ChildCopyGivenParentalsFactor"""
    def __init__(self, alleleFreqs, geneCopyVar):
        numAlleles = len(alleleFreqs)
        self.geneCopyFactor = Factor([geneCopyVar], [], [], 'founderHap')
        self.geneCopyFactor.setCard([numAlleles])
        self.geneCopyFactor.setVal(alleleFreqs)
        #geneCopyFactor = struct('var', [], 'card', [], 'val', [])
        #geneCopyFactor.var(1) = geneCopyVar;
        #geneCopyFactor.card(1) = numAlleles;
        #geneCopyFactor.val = alleleFreqs';

    def getVar(self):
        return self.geneCopyFactor.getVar()

    def getCard(self):
        return self.geneCopyFactor.getCard()

    def getVal(self):
        return self.geneCopyFactor.getVal()

    def getFactor(self):
        return self.genCopyFactor

    def __str__(self):
        return self.geneCopyFactor.__str__()
Exemple #8
0
    def __init__(self, alphaList, numAlleles, geneCopyVarOne, geneCopyVarTwo, phenotypeVar):
        
        self.numalleles=numAlleles
        self.alphaList=alphaList
        self.phenotypeFactor=Factor([phenotypeVar,geneCopyVarOne, geneCopyVarTwo], [], [], 'phenotype| geneCopy1, geneCopy2')

        ngenos=len(alphaList)
        self.phenotypeFactor.setCard( [ 2, numAlleles, numAlleles])
        #phenotypeFactor.val = zeros(1, prod(phenotypeFactor.card));
        values=np.zeros( (1, np.prod(self.phenotypeFactor.getCard()))).flatten().tolist()

        affectedAlphas=alphaList
        unaffectedAlphas=[ 1- alpha for alpha in alphaList]


        (allelesToGenotypes, genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles)
        assignments=IndexToAssignment( np.arange(np.prod(self.phenotypeFactor.getCard())), self.phenotypeFactor.getCard() )-1
        for z in range( np.prod(self.phenotypeFactor.getCard() ) ):
            curr_assign= assignments[z]
            curr_assign=assignments[z]
            genotype_num=allelesToGenotypes[curr_assign[1], curr_assign[2]]
            if curr_assign[0] == 0:
                values[z] = affectedAlphas[genotype_num]
            else:
                values[z] = unaffectedAlphas[genotype_num]
        self.phenotypeFactor.setVal( values )
Exemple #9
0
def multiply(factor1, factor2):
    largeFactor = factor1 if factor1.array.ndim >= factor2.array.ndim else factor2
    smallFactor = factor1 if factor1.array.ndim < factor2.array.ndim else factor2

    variableListFactor1 = factor1.variables
    coordList1 = [1] * 5
    for var in variableListFactor1:
        index = getVariableIndex(var)
        if (index == 1):
            coordList1[index] = 3
        else:
            coordList1[index] = 2
    coordTuple1 = tuple(coordList1)
    factor1Temp = factor1.array.reshape(coordTuple1)

    variableListFactor2 = factor2.variables
    coordList2 = [1] * 5
    for var in variableListFactor2:
        index = getVariableIndex(var)
        if (index == 1):
            coordList2[index] = 3
        else:
            coordList2[index] = 2

    coordTuple2 = tuple(coordList2)
    factor2Temp = factor2.array.reshape(coordTuple2)

    soln = np.squeeze(factor1Temp * factor2Temp)
    variables = largeFactor.variables + list(
        set(smallFactor.variables) - set(largeFactor.variables))
    return Factor(variables, soln)
Exemple #10
0
def returnNonFoundersFactor(  genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo, values, factorName="child|parent 1, parent2",numAlleles=4  ):
    """ return a Factor object that represents pr( offspring_genotype | genotype_mother, genotype_father )
        values are the transition probalities of pr(offspring_genotype|mother,father)
        These don't change, so we calculate them once and the pass them in as a parameter.
        The only thing you are doing is setting the variable names and cardinality (based on the number of alleles)
        Note, when you calculate the transition probablities in values with returnPunnetValues,
        make sure the numAlleles is the same. Otherwise there will be a dimenionality mismatch!"""
    f1= Factor( [genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo ], [ ], values, factorName )
    (allelesToGenotypes, genotypesToAlleles)=generateAlleleGenotypeMappers(numAlleles)
    (ngenos,ploidy)=np.shape(genotypesToAlleles)
    f1.setCard([ ngenos,ngenos,ngenos ] )
    #set the values to zero initially
    
    

    return f1
Exemple #11
0
    def __init__(self, allelefreqs, genotypeVar, name):
        self.allelefreq=allelefreqs
        #number of alleles == number of allele frequencies passed in
        numAlleles=len(allelefreqs)
        self.allelesToGenotypes=None
        self.genotypesToAlleles=None
        self.genotypeFactor=None

        #map alleles to genotypes and genotyeps to alleles
        (self.allelesToGenotypes, self.genotypesToAlleles)=generateAlleleGenotypeMappers(numAlleles)
        (ngenos,ploidy)=np.shape(self.genotypesToAlleles)


        self.genotypeFactor = Factor( [genotypeVar], [], [], name)
        #the cardinality of the factor is the number of genotypes
        self.genotypeFactor.setCard( [ngenos] )

        #set the values to zero initially
        values=np.zeros( (np.prod(self.genotypeFactor.getCard()))).tolist()
        
        for i in range (ngenos):
            alleles=self.genotypesToAlleles[i,:].tolist()
            

            if alleles[0] == alleles[1]:
                values[i]= np.prod( [ allelefreqs[j] for j in alleles ])
                
            else:
               values[i]= np.prod( [ allelefreqs[j] for j in alleles ]) * 2
        
        self.genotypeFactor.setVal( values )
Exemple #12
0
    def __init__(self, numAlleles, geneCopyVarChild, geneCopyHapOne, geneCopyHapTwo):
        self.numalleles=numAlleles
        self.hapone=geneCopyVarChild
        self.haptwo=geneCopyHapTwo

        #geneCopyFactor = struct('var', [], 'card', [], 'val', []);
        self.geneCopyFactor=Factor( [geneCopyVarChild, geneCopyHapOne, geneCopyHapTwo ], [], [], 'child|hap1,hap2')
        self.geneCopyFactor.setCard( [self.numalleles,self.numalleles,self.numalleles ])
        values=np.zeros( np.prod([ self.numalleles,self.numalleles,self.numalleles])).tolist()
        #this keeps track of what posiiton you are in the values list
        index=0
        #the number of iterations thru the nested for loops should be equal to numallels^3

        for i in range(numAlleles):
        #iterate through alleles from
        #grand(paternal) haplotype
            for j in range(numAlleles):
            #iterate through alleles from
            #grand(maternal) haplotype
                for k in range(numAlleles):
                #iterate thru child alleles
                    print i, j, k
                    if j==k:#child has grandmotherhap
                        if i==k:#grandfatherhap is the same
                            values[index]=1
                        else:
                            values[index]=.5
                    elif i==k:#child has grandfather hap
                        values[index]=.5
                    else:
                        pass
                    index+=1
        #print values
        self.geneCopyFactor.setVal( values )
Exemple #13
0
class ChildCopyGivenParentalsFactor(object):
    """ this represents a de-coupled factor
        given a parents two haplotypes, returns
        factor whose values are the probablity
        of inheriting (grand)paternal or (grand)maternal
        haplotype. This allows for some more flexibility
        in modeling inheritance, rather than clumping
        a single parent's haplotype into a genotype
        i.e. GenotypeGivenParentsFactor """

    def __init__(self, numAlleles, geneCopyVarChild, geneCopyHapOne, geneCopyHapTwo):
        self.numalleles=numAlleles
        self.hapone=geneCopyVarChild
        self.haptwo=geneCopyHapTwo

        #geneCopyFactor = struct('var', [], 'card', [], 'val', []);
        self.geneCopyFactor=Factor( [geneCopyVarChild, geneCopyHapOne, geneCopyHapTwo ], [], [], 'child|hap1,hap2')
        self.geneCopyFactor.setCard( [self.numalleles,self.numalleles,self.numalleles ])
        values=np.zeros( np.prod([ self.numalleles,self.numalleles,self.numalleles])).tolist()
        #this keeps track of what posiiton you are in the values list
        index=0
        #the number of iterations thru the nested for loops should be equal to numallels^3

        for i in range(numAlleles):
        #iterate through alleles from
        #grand(paternal) haplotype
            for j in range(numAlleles):
            #iterate through alleles from
            #grand(maternal) haplotype
                for k in range(numAlleles):
                #iterate thru child alleles
                    print i, j, k
                    if j==k:#child has grandmotherhap
                        if i==k:#grandfatherhap is the same
                            values[index]=1
                        else:
                            values[index]=.5
                    elif i==k:#child has grandfather hap
                        values[index]=.5
                    else:
                        pass
                    index+=1
        #print values
        self.geneCopyFactor.setVal( values )

    def getVar(self):
        return self.geneCopyFactor.getVar()
    def getCard(self):
        return self.geneCopyFactor.getCard()
    def getVal(self):
        return self.geneCopyFactor.getVal()
    def getFactor(self):
        return self.geneCopyFactor
    def __str__(self):
        return self.geneCopyFactor.__str__()
Exemple #14
0
def ComputeMarginal(V, F, E):
    """
        ComputeMarginal Computes the marginal over a set of given variables
        M = ComputeMarginal(V, F, E) computes the marginal over variables V
        in the distribution induced by the set of factors F, given evidence E

        M is a factor containing the marginal over variables V

        V is a vector containing the variables in the marginal e.g. [1 2 3] for X_1, X_2 and X_3.
        i.e. a result of FactorMarginalization

        F is a vector of factors (struct array) containing the factors
        defining the distribution

        E is an N-by-2 matrix, each row being a variable/value pair.
        Variables are in the first column and values are in the second column.
        If there is no evidence, pass in the empty matrix [] for E.

    """
    totalFactors = len(F)
    #reshape a 1d array to 1 x ncol array
    #since ObserveEvidence requires Nx2 array, we reshape to a 2 column array
    #see http://stackoverflow.com/a/12576163 for reshaping 1d array to 2d array
    EVIDENCE = np.reshape(np.array(E), (-1, 2))
    #print np.shape(EVIDENCE)

    if totalFactors == 0:
        sys.stderr.write("empty factor list given as input.\n")
        return Factor([], [], [])
    # union of all variables in list of factors F
    variableList = [
    ]  # a list of of lists, where each element is a list containing the variables of the factor in F
    for factor in F:
        var = factor.getVar().tolist()
        variableList.append(var)

    #get the union of variables across all the factor in F
    #see this http://stackoverflow.com/a/2151553, Pythonic Way to Create Union of All Values Contained in Multiple Lists
    union_variables = set().union(*variableList)
    #print union_variables
    #v contains the variables not in the list of variables in the marginal
    v = list(union_variables.difference(V))

    # compute the joint distribution, but then reduce it, given the evidence
    # ComputeJointDistribution returns a factor, but ObserveEvidence expects a list
    # of factors as the first argument, so hence the need for brackets [ ]
    # ObserveEvidence returns a list, but we want the first element so thats why the [0]
    jointE = ObserveEvidence([ComputeJointDistribution(F)], EVIDENCE)[0]

    #now we need to re-normaize the joint, since observe evidence doesn't do it for us
    jointE_normalizedVal = jointE.getVal() / np.sum(jointE.getVal())
    jointE.setVal(jointE_normalizedVal.tolist())

    return FactorMarginalization(jointE, v)
def get_rmw_factor_list(roe_list, stock_return_frame, market_capital_frame):
    # Initialize factor list
    rmw_factor_list = pd.DataFrame(np.zeros((len(roe_list.index), 1)),
                                   columns=["RMW"])

    # Calculate RMW factor
    for i in range(len(roe_list)):
        rmw_factor_list.iloc[i] = fct.RMW(roe_list.iloc[i],
                                          stock_return_frame.iloc[i],
                                          market_capital_frame.iloc[i])
    return rmw_factor_list
Exemple #16
0
    def test_computeMarginal1 (self):
        # Place into an array
        factor_array = []
        factor_array.append(self.factorA)
        factor_array.append(self.factorB)
        factor_array.append(self.factorC)

        expectedFactor = pgmf.Factor(np.array([2, 3]), np.array([2, 2]), np.array([0.0858, 0.0468, 0.1342, 0.7332]))
        resultFactor = pgmf.computeMarginal(np.array([2,3]), factor_array, np.array([1,2]))
        np.testing.assert_array_equal(resultFactor.varbs, expectedFactor.varbs)
        np.testing.assert_array_equal(resultFactor.card, expectedFactor.card)
        np.testing.assert_array_almost_equal(resultFactor.vals, expectedFactor.vals, decimal = 4)
Exemple #17
0
    def __init__(self, numAlleles, geneCopyVarChild, geneCopyHapOne,
                 geneCopyHapTwo):
        self.numalleles = numAlleles
        self.hapone = geneCopyVarChild
        self.haptwo = geneCopyHapTwo

        #geneCopyFactor = struct('var', [], 'card', [], 'val', []);
        self.geneCopyFactor = Factor(
            [geneCopyVarChild, geneCopyHapOne, geneCopyHapTwo], [], [],
            'child|hap1,hap2')
        self.geneCopyFactor.setCard(
            [self.numalleles, self.numalleles, self.numalleles])
        values = np.zeros(
            np.prod([self.numalleles, self.numalleles,
                     self.numalleles])).tolist()
        #this keeps track of what posiiton you are in the values list
        index = 0
        #the number of iterations thru the nested for loops should be equal to numallels^3

        for i in range(numAlleles):
            #iterate through alleles from
            #grand(paternal) haplotype
            for j in range(numAlleles):
                #iterate through alleles from
                #grand(maternal) haplotype
                for k in range(numAlleles):
                    #iterate thru child alleles
                    print i, j, k
                    if j == k:  #child has grandmotherhap
                        if i == k:  #grandfatherhap is the same
                            values[index] = 1
                        else:
                            values[index] = .5
                    elif i == k:  #child has grandfather hap
                        values[index] = .5
                    else:
                        pass
                    index += 1
        #print values
        self.geneCopyFactor.setVal(values)
 def from_xml_file(self, filename):
     doc = minidom.parse(filename)
     rootnode = doc.getElementsByTagName("root")[0]
     for node in rootnode.childNodes:
         if node.nodeType == node.TEXT_NODE:
             continue
         if node.nodeName == "instruction":
             self.instruction = node.getAttribute("text")
         elif node.nodeName == "world":
             self.world = World()
             self.world.from_xml(node.toxml())
         else:
             self.root = Factor()
             self.root.from_xml(node.toxml())
Exemple #19
0
    def __init__(self, isDominant, genotypeVar, phenotypeVar, name):

        #instantiate a Factor object
        phenotype = Factor([phenotypeVar, genotypeVar], [2, 3], [], name)

        phenotype.setVal(np.zeros(np.prod(phenotype.getCard())).tolist())
        #this enumerates the values the factor can take
        # since there are 2x3 cardinality, 6 possible assignments
        assignments = IndexToAssignment(
            np.arange(np.prod(phenotype.getCard())), phenotype.getCard())
        val = val = np.zeros(np.prod(phenotype.getCard()))
        (nrows, ncols) = np.shape(assignments)

        for i in range(np.prod([2, 3])):
            #if its dominant, if you have at least one copy, you have the phenotype
            (pheno, geno) = assignments[i]
            if isDominant == 1:
                if pheno == 1:  #affected
                    if geno == 1 or geno == 2:
                        val[i] = 1
                    else:
                        val[i] = 0
                else:  #uneffected
                    if geno == 3:
                        val[i] = 1

            if isDominant == 0:
                if pheno == 1:
                    if geno == 3:
                        val[i] = 1
                else:
                    if geno == 1 or geno == 2:
                        val[i] = 1

        phenotype.setVal(val.tolist())

        self.phenotype = phenotype
Exemple #20
0
class ExampleInstance(object):
    def __init__(self):
        self.instruction = ""
        self.world = None
        self.root = None

    def from_xml_file(self, filename):
        doc = minidom.parse(filename)
        rootnode = doc.getElementsByTagName("root")[0]
        for node in rootnode.childNodes:
            if node.nodeType == node.TEXT_NODE:
                continue
            if node.nodeName == "instruction":
                self.instruction = node.getAttribute("text")
            elif node.nodeName == "world":
                self.world = World()
                self.world.from_xml(node.toxml())
            else:
                self.root = Factor()
                self.root.from_xml(node.toxml())

    def __str__(self):
        out_str = "[instruction]" + self.instruction + "\n"
        out_str += self.world.__str__()
        out_str += self.root.__str__()
        return out_str

    def get_factors(self):
        factor_list = []
        self.scan_factor(self.root, factor_list)
        return factor_list

    def scan_factor(self, factor, factor_list):
        factor_list.append(factor)
        for child_factor in factor.children:
            self.scan_factor(child_factor, factor_list)
class ExampleInstance(object):
    def __init__(self):
        self.instruction = ""
        self.world = None
        self.root = None

    def from_xml_file(self, filename):
        doc = minidom.parse(filename)
        rootnode = doc.getElementsByTagName("root")[0]
        for node in rootnode.childNodes:
            if node.nodeType == node.TEXT_NODE:
                continue
            if node.nodeName == "instruction":
                self.instruction = node.getAttribute("text")
            elif node.nodeName == "world":
                self.world = World()
                self.world.from_xml(node.toxml())
            else:
                self.root = Factor()
                self.root.from_xml(node.toxml())

    def __str__(self):
        out_str = "[instruction]" + self.instruction + "\n"
        out_str += self.world.__str__()
        out_str += self.root.__str__()
        return out_str

    def get_factors(self):
        factor_list = []
        self.scan_factor(self.root, factor_list)
        return factor_list

    def scan_factor(self, factor, factor_list):
        factor_list.append(factor)
        for child_factor in factor.children:
            self.scan_factor(child_factor, factor_list)
Exemple #22
0
    def __init__(self, alphaList, phenotypeVar, genotypeVar, name):
        self.phenotypeFactor = Factor([phenotypeVar, genotypeVar], [], [],
                                      name)
        self.alpha = np.array(alphaList)

        ngenotypes = len(alphaList)
        self.phenotypeFactor.setCard([2, ngenotypes])

        values = [x for x in range(np.prod(self.phenotypeFactor.getCard()))]

        for i in range(len(alphaList)):

            values[i] = alphaList[i]
            values[i + 1] = 1 - alphaList[i]
        ctr = 0
        alphas = 2 * len(alphaList) * [None]
        for i in range(len(alphaList)):
            alphas[ctr] = alphaList[i]
            ctr = ctr + 1
            alphas[ctr] = 1 - alphaList[i]
            ctr = ctr + 1

        values = alphas
        self.phenotypeFactor.setVal(values)
Exemple #23
0
    def test_joint1 (self):
        # Reset Factors 
        self.factorA = pgmf.Factor(np.array([1]), np.array([2]), np.array([0.11, 0.89]))
        self.factorB = pgmf.Factor(np.array([2,1]), np.array([2,2]), np.array([0.59, 0.41, 0.22, 0.78]))
        self.factorC = pgmf.Factor(np.array([3,2]), np.array([2,2]), np.array([0.39, 0.61, 0.06, 0.94]))

        # Place into an array
        factor_array = []
        factor_array.append(self.factorA)
        factor_array.append(self.factorB)
        factor_array.append(self.factorC)

        expectedFactor = pgmf.Factor(np.array([1, 2, 3]), np.array([2, 2, 2]), np.array([0.025311, 0.076362, 0.002706, 0.041652, 0.039589, 0.119438, 0.042394, 0.652548]))
        resultFactor = pgmf.joint(factor_array)
        np.testing.assert_array_equal(resultFactor.varbs, expectedFactor.varbs)
        np.testing.assert_array_equal(resultFactor.card, expectedFactor.card)
        np.testing.assert_array_almost_equal(resultFactor.vals, expectedFactor.vals, decimal = 6)
Exemple #24
0
    def __init__(self, isDominant, genotypeVar, phenotypeVar, name):

        #instantiate a Factor object
        phenotype = Factor( [phenotypeVar, genotypeVar], [2, 3], [], name )
    
        phenotype.setVal( np.zeros ( np.prod(phenotype.getCard())).tolist() )
        #this enumerates the values the factor can take
        # since there are 2x3 cardinality, 6 possible assignments
        assignments=IndexToAssignment( np.arange(np.prod(phenotype.getCard())), phenotype.getCard() )
        val=val = np.zeros(np.prod(phenotype.getCard() ))
        (nrows,ncols)=np.shape(assignments)

        for i in range(np.prod([2,3])):
         #if its dominant, if you have at least one copy, you have the phenotype
            (pheno,geno)=assignments[i]
            if isDominant==1:
                if pheno ==1: #affected
                    if geno ==1 or geno ==2:
                        val[i]=1
                    else:
                        val[i]=0
                else:#uneffected
                    if geno == 3:
                        val[i]=1


            if isDominant == 0:
                if pheno == 1:
                    if geno==3:
                        val[i]=1
                else:
                    if geno ==1 or geno == 2:
                        val[i]=1


        phenotype.setVal( val.tolist() )

        self.phenotype=phenotype
Exemple #25
0
def returnGenotypeGivenParentsFactor(  genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo, factorName="child|parent 1, parent2", numAlleles=4  ):
    """ return a Factor object that represents pr( offspring_genotype | genotype_mother, genotype_father )
        basically this is a Punnet square """
    f1= Factor( [genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo ], [ ], [ ], factorName )
    (allelesToGenotypes, genotypesToAlleles)=generateAlleleGenotypeMappers(numAlleles)
    (ngenos,ploidy)=np.shape(genotypesToAlleles)
    f1.setCard([ ngenos,ngenos,ngenos ] )
    #set the values to zero initially
    values=np.zeros( (np.prod(f1.getCard()))).tolist()
    assignments=IndexToAssignment( np.arange(np.prod(f1.getCard())), f1.getCard() )-1
    for z in range( np.prod(f1.getCard() ) ):
        curr_assign= assignments[z]
        childAssignment=int(curr_assign[0])

        parent1gametes= genotypesToAlleles[curr_assign[1],:]
        parent2gametes= genotypesToAlleles[curr_assign[2],:]
        #print 'parental gametes: ', parent1gametes, parent2gametes
        #print 'child assignment: ', childAssignment
        #list of tuples containing list of zygote(genotype) tuples
        zygote_list=list(itertools.product(parent1gametes,parent2gametes))
        punnet_freq=[  allelesToGenotypes[zygote[0],zygote[1]] for zygote in zygote_list ]
        histc = defaultdict(int)
        hist=[]
        
        for x in punnet_freq:
            histc[x]+=1.
            #print histc.values()

        hist=[ histc[g] for g in range(ngenos) ]
        #for g in range (ngenos):
        #    hist.append ( histc[g] )
            #print punnet_freq
        hist=(np.array ( hist)) /4
        values[z]=hist[childAssignment]

        f1.setVal( values )

    return f1
def main():
	f = open('data/ASIA/asia.bif')
	BIF = f.readlines()

	BIF = BIFParser.fixWhiteSpace(BIF)
	BN = BIFParser.parseBIF(BIF)
	factors = []
	for nodes in BN:
		if not nodes.isRoot():
			tempArray = [nodes]
			tempArray.extend(nodes.getParents())
			factors.append(Factor.Factor(nodes.getDist(), tempArray))

	converged=False
	converNum=0
	while not converged:
		prevConverNum = copy.deepcopy(converNum)
		converNum=0
		for a in BN:
			for f in factors:
				if partOf(a,f):
					message = a.sendMarginal(f)
					f.receiveBelief(message, a)
		for f in factors:
			for a in BN:
				if partOf(a,f):
					message = f.sendBelief(a)
					a.receiveMarginal(message, f)
		for a in BN:
			a.updateMarginal()
			converNum += a.getMarginal()[a.getMarginal().keys()[0]]
		if (np.abs(converNum-prevConverNum) < .00001):
			converged=True		
	g=open("results.txt","w")

	for a in BN:
		g.write(a.getName() + " ")
		print a.getMarginal()
		i=len(a.getMarginal().keys())-1
		while(i >= 0):
			g.write(str(a.getMarginal()[a.getMarginal().keys()[i]]) + " ")
			i-=1
		g.write("\n")

	g.close()
Exemple #27
0
    def __init__(self,numAlleles, genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo, name):
        self.genotypeFactor =  Factor( [genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo ], [ ], [ ], name)

        #map alleles to genotypes and genotyeps to alleles
        (self.allelesToGenotypes, self.genotypesToAlleles)=generateAlleleGenotypeMappers(numAlleles)

        (ngenos,ploidy)=np.shape(self.genotypesToAlleles)
        

        
        self.genotypeFactor.setCard([ ngenos,ngenos,ngenos ] )
        #set the values to zero initially
        values=np.zeros( (np.prod(self.genotypeFactor.getCard()))).tolist()

        #iterate thru variable assignments to random variables
        #assign probablities based on Punnet square crosses
        assignments=IndexToAssignment( np.arange(np.prod(self.genotypeFactor.getCard())), self.genotypeFactor.getCard() )-1
        for z in range( np.prod(self.genotypeFactor.getCard() ) ):
            curr_assign= assignments[z]
            childAssignment=int(curr_assign[0])

            parent1gametes= self.genotypesToAlleles[curr_assign[1],:]
            parent2gametes= self.genotypesToAlleles[curr_assign[2],:]
            #print 'parental gametes: ', parent1gametes, parent2gametes
            #print 'child assignment: ', childAssignment
            #list of tuples containing list of zygote(genotype) tuples
            zygote_list=list(itertools.product(parent1gametes,parent2gametes))
            punnet_freq=[  self.allelesToGenotypes[zygote[0],zygote[1]] for zygote in zygote_list ]
            histc={}
            hist=[]
            for g in range( ngenos):
                histc[g]=0.
            for x in punnet_freq:
                histc[x]+=1.
            #print histc.values()
            for g in range (ngenos):
                hist.append ( histc[g] )
            #print punnet_freq
            hist=(np.array ( hist)) /4
            #print 'hist:', hist
            #print zygote_list
            values[z]=hist[childAssignment]

        self.genotypeFactor.setVal( values )
Exemple #28
0
def ComputeJointDistribution(INPUTS):
    """ ComputeJointDistribution Computes the joint distribution defined by a set of given factors

    Joint = ComputeJointDistribution(INPUTS) computes the joint distribution
    defined by a set of given factors

    Joint is a factor that encapsulates the joint distribution given by INPUTS
    INPUTS is a vector of Factor objects containing the factors defining the distribution

    """

    totalFactors = len(INPUTS)
    #check for empty list of INPUTS

    if totalFactors == 0:
        sys.stderr.write("Empty factor list given as input\n")
        return Factor([], [], [])

    else:
        # see http://docs.python.org/library/functions.html#reduce for description of Python reduce function
        return reduce(lambda x, y: FactorProduct(x, y), INPUTS)
Exemple #29
0
    def __init__(self,alphaList, phenotypeVar, genotypeVar , name):
        self.phenotypeFactor=Factor( [ phenotypeVar, genotypeVar], [], [], name)
        self.alpha=np.array ( alphaList)

        ngenotypes=len(alphaList)
        self.phenotypeFactor.setCard( [2, ngenotypes])

        values=[x for x in range( np.prod(self.phenotypeFactor.getCard()))]

        for i in range( len(alphaList )):

            values[i]=alphaList[i]
            values[i+1]=1-alphaList[i]
        ctr=0
        alphas=2*len(alphaList)*[None]
        for i in range(len(alphaList)):
            alphas[ctr]=alphaList[i];
            ctr=ctr+1
            alphas[ctr]=1-alphaList[i];
            ctr=ctr+1

        values=alphas
        self.phenotypeFactor.setVal( values)
Exemple #30
0
def returnGenotypePriorFounderFactor( refbase, factorVar, theta=0.001,ploidy=2 ):
    """ Not sure this is right, but its simple enough
        This function returns a factor representing genotype priors, passing in the
        reference base, and list of alt alelles in altbase. genotypePrior is the name of the variable
        theta is heterozygosity rate set to .001 by default and ploidy is set to 2
        prior(ref homoz=1-3(theta/2), het=theta, alt homoz=theta/2  """

    
    numAlleles=len( ['A','C','G','T'] )
    f1= Factor( [factorVar ], [ ], [ ], 'genotypePrior')
    (allelesToGenotypes, genotypesToAlleles)=generateAlleleGenotypeMappers(numAlleles)
    (ngenos,ploidy)=np.shape(genotypesToAlleles)
    #print ngenos
    f1.setCard([ ngenos] )
    values=np.zeros( (np.prod(f1.getCard()))).tolist()
    #print values
    # l is the exhaustive set possible genotypes for a given ploidy
    #l=[ "".join( list(combo)) for combo in itertools.combinations_with_replacement(['A','C','G','T'],ploidy) ]
    #print l
    
    for i in range(ngenos):
        
        genotype=indexToGenotype(i, ''.join( ['A','C','G','T'] ) )
        (a1,a2)=list(genotype)
        #print a1,a2
        if a1 == a2 and refbase not in genotype:
            #print genotype, 'non-ref homoz'
            values[i]=(theta / 2.)
        elif a1==a2==refbase:
            #print genotype, 'homoz reference'
            values[i]= 1 - (3*(theta/2.))
        elif a1!=a2 and refbase in genotype:
            #print genotype, 'heterzygote'
            values[i]=theta
        else:
            #print genotype, 'tri-alleleic het'
            values[i]=np.power( [ theta/2 ], 3).tolist()[0]
    #print values
    f1.setVal(values)
    return f1
Exemple #31
0
def FactorSum ( A, B):
    """ FactorSum Computes the sum of two factors.
%       Similiar to FactorProduct
        We would use this in log space where multiplication becomes addition
%       Based on the code here https://github.com/indapa/PGM/blob/master/Prog4/FactorSum.m """


    C=Factor()

   #check for empty factors
    if len( A.getVar() ) == 0 :
        sys.stderr.write("A factor is empty!\n")
        return B
    if len( B.getVar() ) == 0:
        sys.stderr.write("B factor is empty!\n")
        return A


    #check of  variables that in both A and B have the same cardinality
    #print 'A.getVar():  ', A.getVar()
    #print 'B.getVar(): ',B.getVar()
    #setA= set( A.getVar() )
    #setB= set( B.getVar() )
    #intersect=np.array( list( setA.intersection(setB)))
    intersect=np.intersect1d( A.getVar(), B.getVar() ).tolist()
    #print "Intersection of variables in FactorProduct ", intersect
    #print "A var: ",  A.getVar()
    #print "B var: ",  B.getVar()

    #if the intersection of variables in the two factors
    #is non-zero, then make sure they have the same cardinality
    if len(intersect) > 0:
        #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array?
        iA=getIndex( A.getVar(), intersect )
        #print "iA: ", iA
        #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist()
        iB = getIndex (  B.getVar(), intersect )
        #print "iB: ", iB

        # check to see if any of the comparisons in the  array resulting from  of a.getCard()[iA] == b.getCard()[iB]
        # are all False. If so print an error and exit
        if len( np.where( A.getCard()[iA].all() == B.getCard()[iB].all() ==False)[0].tolist() ) > 0:
            sys.stderr.write("dimensionality mismatch in factors!\n")
            sys.exit(1)

    #now set the variables of C to the union of variables in factors A and B
    #print 'setA ' ,setA
    #print 'setB ', setB
    #print list( setA.union(setB) )
    C.setVar( np.union1d ( A.getVar(), B.getVar() ).tolist()  )
    #C.setVar ( list( setA.union(setB) ) )
    mapA=isMember(A.getVar(), C.getVar() )
    mapB=isMember(B.getVar(), C.getVar() )



    #Set the cardinality of variables in C
    C.setCard( np.zeros( len(C.getVar())).tolist() )
    C.getCard()[mapA]=A.getCard()
    C.getCard()[mapB]=B.getCard()

    #intitialize the values of the factor C to be zero
    C.setVal( np.zeros(np.prod(C.getCard())).tolist() )

    #some helper indices to tell what indices of A and B values to multiply
    assignments=IndexToAssignment( np.arange(np.prod(C.getCard())), C.getCard() ) #get the assignment of values of C
    indxA=AssignmentToIndex(  assignments[:,mapA], A.getCard())-1 # re-arrange the assignment of C, to what it would be in factor  A
    indxB=AssignmentToIndex(  assignments[:,mapB], B.getCard())-1 # re-arange the assignment of C to what it would be in  factorB
    #print 'indxA ', indxA
    #print 'indxB ', indxB


    c_val=A.getVal()[indxA.flatten().tolist()] + B.getVal()[indxB.flatten().tolist()] #now that we have the index into A.val and B.val vector, multiply them to factor product
    C.setVal ( c_val.tolist() )

    return C
Exemple #32
0
def FactorDiv ( A, B):
    """ FactorProduct Computes the dividend of two factors.
%       Similiar to Factor Product, but if we divide 0/0, return 0
    see page 365 in Koller and Friedman for definition of FactorDivision """

    #print "A: ", A
    #print "===="
    #print "B: ", B
    C=Factor()

   #check for empty factors
    if len( A.getVar() ) == 0 :
        sys.stderr.write("A factor is empty!\n")
        return B
    if len( B.getVar() ) == 0:
        sys.stderr.write("B factor is empty!\n")
        return A


    #check of  variables that in both A and B have the same cardinality
    #print 'A.getVar():  ', A.getVar()
    #print 'B.getVar(): ',B.getVar()
    #setA= set( A.getVar() )
    #setB= set( B.getVar() )
    #intersect=np.array( list( setA.intersection(setB)))
    intersect=np.intersect1d( A.getVar(), B.getVar() ).tolist()
    #print "Intersection of variables in FactorProduct ", intersect
    #print "A var: ",  A.getVar()
    #print "B var: ",  B.getVar()

    #if the intersection of variables in the two factors
    #is non-zero, then make sure they have the same cardinality
    if len(intersect) > 0:
        #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array?
        iA=getIndex( A.getVar(), intersect )
        #print "iA: ", iA
        #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist()
        iB = getIndex (  B.getVar(), intersect )
        #print "iB: ", iB

        # check to see if any of the comparisons in the  array resulting from  of a.getCard()[iA] == b.getCard()[iB] 
        # are all False. If so print an error and exit
        if len( np.where( A.getCard()[iA].all() == B.getCard()[iB].all() ==False)[0].tolist() ) > 0:
            sys.stderr.write("dimensionality mismatch in factors!\n")
            sys.exit(1)

    #now set the variables of C to the union of variables in factors A and B
    #print 'setA ' ,setA
    #print 'setB ', setB
    #print list( setA.union(setB) )
    C.setVar( np.union1d ( A.getVar(), B.getVar() ).tolist()  )
    #C.setVar ( list( setA.union(setB) ) )
    mapA=isMember(A.getVar(), C.getVar() )
    mapB=isMember(B.getVar(), C.getVar() )

    

    #Set the cardinality of variables in C
    C.setCard( np.zeros( len(C.getVar())).tolist() )
    C.getCard()[mapA]=A.getCard()
    C.getCard()[mapB]=B.getCard()

    #intitialize the values of the factor C to be zero
    C.setVal( np.zeros(np.prod(C.getCard())).tolist() )

    #some helper indices to tell what indices of A and B values to multiply
    assignments=IndexToAssignment( np.arange(np.prod(C.getCard())), C.getCard() ) #get the assignment of values of C
    indxA=AssignmentToIndex(  assignments[:,mapA], A.getCard())-1 # re-arrange the assignment of C, to what it would be in factor  A
    indxB=AssignmentToIndex(  assignments[:,mapB], B.getCard())-1 # re-arange the assignment of C to what it would be in  factorB
    
    numerator=A.getVal()[indxA.flatten().tolist()]
    denominator=B.getVal()[indxB.flatten().tolist()]
    
    #print numerator
    #print denominator
    #print zip(numerator, denominator)
    val= map( lambda x: common.zerodiv_tuple(x), zip(numerator,denominator)  )
    #print val
    C.setVal ( val )
    
    return C
Exemple #33
0
class GenotypeGivenParentsFactor (object):
    """ construct factor that has prob of genotype of child given both parents
        Pr(g_child| g_mother, g_father """

    def __init__(self,numAlleles, genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo, name):
        self.genotypeFactor =  Factor( [genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo ], [ ], [ ], name)

        #map alleles to genotypes and genotyeps to alleles
        (self.allelesToGenotypes, self.genotypesToAlleles)=generateAlleleGenotypeMappers(numAlleles)

        (ngenos,ploidy)=np.shape(self.genotypesToAlleles)
        

        
        self.genotypeFactor.setCard([ ngenos,ngenos,ngenos ] )
        #set the values to zero initially
        values=np.zeros( (np.prod(self.genotypeFactor.getCard()))).tolist()

        #iterate thru variable assignments to random variables
        #assign probablities based on Punnet square crosses
        assignments=IndexToAssignment( np.arange(np.prod(self.genotypeFactor.getCard())), self.genotypeFactor.getCard() )-1
        for z in range( np.prod(self.genotypeFactor.getCard() ) ):
            curr_assign= assignments[z]
            childAssignment=int(curr_assign[0])

            parent1gametes= self.genotypesToAlleles[curr_assign[1],:]
            parent2gametes= self.genotypesToAlleles[curr_assign[2],:]
            #print 'parental gametes: ', parent1gametes, parent2gametes
            #print 'child assignment: ', childAssignment
            #list of tuples containing list of zygote(genotype) tuples
            zygote_list=list(itertools.product(parent1gametes,parent2gametes))
            punnet_freq=[  self.allelesToGenotypes[zygote[0],zygote[1]] for zygote in zygote_list ]
            histc={}
            hist=[]
            for g in range( ngenos):
                histc[g]=0.
            for x in punnet_freq:
                histc[x]+=1.
            #print histc.values()
            for g in range (ngenos):
                hist.append ( histc[g] )
            #print punnet_freq
            hist=(np.array ( hist)) /4
            #print 'hist:', hist
            #print zygote_list
            values[z]=hist[childAssignment]

        self.genotypeFactor.setVal( values )

    def getVar(self):
        return self.genotypeFactor.getVar()
    def getCard(self):
        return self.genotypeFactor.getCard()
    def getVal(self):
        return self.genotypeFactor.getVal()
    def setVal(self, val):
        self.genotypeFactor.setVal(val)

    def getFactor(self):
        return self.genotypeFactor
    def genotypeSlice(self):
        pass
        #see this http://stackoverflow.com/q/4257394/1735942

    def __str__(self):
        return self.genotypeFactor.__str__()
Exemple #34
0
def FactorMaxMarginalization( A, V ):
    """ computes the factor with the variables in V *maxed* out.
        The resulting factor will have all the variables in A minus
        those variables in V. This is quite similiar to FactorMarginalization, but rather then summing out variables in V
        we take the max. In the code, this translates passing np.max as the function to accum
        See section  13.2 in Koller and Friedman  for more information"""

    B=Factor()
    #check for empy factor or variable list
    if len( A.getVar() ) == 0 or len(V) == 0:
        return A
    Bvar=np.setdiff1d( A.getVar(), V)
    mapB=isMember(Bvar, A.getVar())

    if len(Bvar) == 0:
        sys.stderr.write("FactorMaxMarginalization: Error, resultant factor has empty scope...\n")
        return np.max (A.getVal() )
    #set the marginalized factor's variable scope and cardinality
    B.setVar( Bvar.tolist() )
    B.setCard( A.getCard()[mapB] )
    B.setVal( np.zeros(np.prod(B.getCard())).tolist() )

    #compute some helper indices
    assignments=IndexToAssignment ( np.arange(np.prod(A.getCard()) ), A.getCard() )
    #indxB tells which values in A to sum together when marginalizing out the variable(s) in B
    indxB=AssignmentToIndex( assignments[:,mapB], B.getCard())-1

    #here we pass in the function np.max
    #NumPy and Python are awesome
    max_vals=accum(indxB, A.getVal(), np.max )
    B.setVal( max_vals.tolist() )

    return B
Exemple #35
0
def LogFactor(F):
    """ return a factor whose values are the  natural log of the orginal factor F  """

    return Factor(F.getVar().tolist(),
                  F.getCard().tolist(),
                  np.log(F.getVal()).tolist(), F.getName())
Exemple #36
0
class GenotypeGivenParentsFactor(object):
    """ construct factor that has prob of genotype of child given both parents
        Pr(g_child| g_mother, g_father """
    def __init__(self, numAlleles, genotypeVarChild, genotypeVarParentOne,
                 genotypeVarParentTwo, name):
        self.genotypeFactor = Factor(
            [genotypeVarChild, genotypeVarParentOne, genotypeVarParentTwo], [],
            [], name)

        #map alleles to genotypes and genotyeps to alleles
        (self.allelesToGenotypes,
         self.genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles)

        (ngenos, ploidy) = np.shape(self.genotypesToAlleles)

        self.genotypeFactor.setCard([ngenos, ngenos, ngenos])
        #set the values to zero initially
        values = np.zeros((np.prod(self.genotypeFactor.getCard()))).tolist()

        #iterate thru variable assignments to random variables
        #assign probablities based on Punnet square crosses
        assignments = IndexToAssignment(
            np.arange(np.prod(self.genotypeFactor.getCard())),
            self.genotypeFactor.getCard()) - 1
        for z in range(np.prod(self.genotypeFactor.getCard())):
            curr_assign = assignments[z]
            childAssignment = int(curr_assign[0])

            parent1gametes = self.genotypesToAlleles[curr_assign[1], :]
            parent2gametes = self.genotypesToAlleles[curr_assign[2], :]
            #print 'parental gametes: ', parent1gametes, parent2gametes
            #print 'child assignment: ', childAssignment
            #list of tuples containing list of zygote(genotype) tuples
            zygote_list = list(
                itertools.product(parent1gametes, parent2gametes))
            punnet_freq = [
                self.allelesToGenotypes[zygote[0], zygote[1]]
                for zygote in zygote_list
            ]
            histc = {}
            hist = []
            for g in range(ngenos):
                histc[g] = 0.
            for x in punnet_freq:
                histc[x] += 1.
            #print histc.values()
            for g in range(ngenos):
                hist.append(histc[g])
            #print punnet_freq
            hist = (np.array(hist)) / 4
            #print 'hist:', hist
            #print zygote_list
            values[z] = hist[childAssignment]

        self.genotypeFactor.setVal(values)

    def getVar(self):
        return self.genotypeFactor.getVar()

    def getCard(self):
        return self.genotypeFactor.getCard()

    def getVal(self):
        return self.genotypeFactor.getVal()

    def setVal(self, val):
        self.genotypeFactor.setVal(val)

    def getFactor(self):
        return self.genotypeFactor

    def genotypeSlice(self):
        pass
        #see this http://stackoverflow.com/q/4257394/1735942

    def __str__(self):
        return self.genotypeFactor.__str__()
Exemple #37
0
def FactorMarginalization(A, V):
    """   FactorMarginalization Sums given variables out of a factor.
          B = FactorMarginalization(A,V) computes the factor with the variables
          in V summed out. The factor data structure has the following fields:
          .var    Vector of variables in the factor, e.g. [1 2 3]
          .card   Vector of cardinalities corresponding to .var, e.g. [2 2 2]
          .val    Value table of size prod(.card)

          The resultant factor should have at least one variable remaining or this
          function will throw an error.   See also FactorProduct, IndexToAssignment , and AssignmentToIndex
          Based on matlab code found here: https://github.com/indapa/PGM/blob/master/Prog1/FactorMarginalization.m """

    #the resulting factor after marginalizing out variables in python list V that are in
    #the factor A
    B = Factor()

    #check for empy factor or variable list
    if len(A.getVar()) == 0 or len(V) == 0:
        return A

    #construct the variables of the marginalized factor by
    #computing the set difference between A.var and V
    #These variables in the difference set will be the scope of the new factor
    setA = set(A.getVar())
    setV = set(V)
    Bvar = np.array(list(setA.difference(setV)))
    mapB = isMember(Bvar, A.getVar(
    ))  #indices of the variables of the new factor in the original factor A
    #print mapB,  Bvar

    #check to see if the new factor has empty scope
    if len(Bvar) == 0:
        sys.stderr.write(
            "FactorMarginalization:Error, resultant factor has empty scope...\n"
        )
        return None
    #set the marginalized factor's variable scope and cardinality
    B.setVar(Bvar.tolist())
    B.setCard(A.getCard()[mapB])
    B.setVal(np.zeros(np.prod(B.getCard())).tolist())

    #compute some helper indices
    assignments = IndexToAssignment(np.arange(np.prod(A.getCard())),
                                    A.getCard())
    #indxB tells which values in A to sum together when marginalizing out the variable(s) in B
    indxB = AssignmentToIndex(assignments[:, mapB], B.getCard()) - 1

    #accum is a numpy implementation of matlab accumarray
    #accumarray sums data in each group
    #here the group(s) are defined in indxB
    #indxB is a map to tell which value in A.val to map the sum to
    #see http://blogs.mathworks.com/loren/2008/02/20/under-appreciated-accumarray/
    marginal_vals = accum(indxB, A.getVal())

    #set the marginal values to the new factor with teh variable(s) in V summed(marginalized) out
    B.setVal(marginal_vals.tolist())
    return B
    def __init__(self, name, desc, factors, weights=None, univPP=None):
        '''
        initialize the composite factor by specifying the calculator
        :param factors: a list of Factor or CompositeFactor objects
        :param weights: could be None, a list of float, or a pd.Series of pd.DataFrame, where the index of Series is
        Date, and the index of DataFrame is StockID and the columns are factors.
        Each row represents an array of weights for a stock, and therefore its sum is equal to 1.0
        If it is None, then equal weights will be applied.
        If it is a list of float, it must have the same length as factors and the static weights will be applied.
        If it is a pd.Series, the dynamic weights will be applied
        :return: a composite factor object
        '''
        self.NumFactors = len(factors)
        self.Factors = factors
        if weights is None:
            weights = [1.0/self.NumFactors] * self.NumFactors       # equal weights
            self.Weights = weights
        elif type(weights) is list:
            if len(weights) != self.NumFactors:
                print "ERROR: the length of weights is NOT equal to the number of factors"
                exit(-1)

            noNegative = weights[weights > 0]
            if len(noNegative) > 0:
                print "ERROR: Negative weights"
                exit(-1)

            tot = math.fsum(weights)
            if math.fabs(tot - 1) > 0.01:
                print ("WARN: the sum of weights " + str(math.floor(tot,2)) + "is not equal to 1. ReWeight to 1.")
                weights = weights * 1.0 / tot

            self.Weights = weights
        elif type(weights) is pd.Series:
            pass  #todo dynamic weights, for backtest
        elif isfunction(weights):
            pass  #todo dynamic weights, for backtest & live trading, function arg is StockID,date,factorID
        else:
            print ("Unknown weights type: " + str(type(weights)))
            exit(-1)


        def myCalc(stockID, date):
            '''
            aggregate factor scores with linear weights, where weights might be a list, or a Series, or a function that takes stockID and date,
             and return a composite score for the stock on that date
            :param stockID: wind stock id
            :param date: DateTime or date string in a format of 'yyyymmdd'
            :return: a float
            '''
            wts = None
            if type(self.Weights) is list:
                wts = self.Weights
            elif type(weights) is pd.Series:
                wts = self.Weights[date]
            elif isfunction(weights):
                wts = self.Weights(stockID, date)

            compScore = 0
            nanWts = 0
            for i in range(self.NumFactors):
                score = wts[i] * self.Factors[i].GetScore(stockID, date)
                if np.isnan(score):
                    score = 0
                    nanWts += wts[i]
                if nanWts > 0.8:
                    compScore = np.nan
                elif nanWts > 0.5:
                    log.debug('the total weights with NaN score is ' + str(nanWts))
                    compScore += score
                else:
                    compScore += score

            return compScore

        Factor.__init__(self, name, desc, myCalc, univPP)
Exemple #39
0
class PhenotypeGivenGenotypeFactor(object):
    """ construct factor of phenotype|genotype
        #prob of being effected, given the ith genotype
        #alphaList[i] is the prob of being effected given the ith genotype """
    def __init__(self,alphaList, phenotypeVar, genotypeVar , name):
        self.phenotypeFactor=Factor( [ phenotypeVar, genotypeVar], [], [], name)
        self.alpha=np.array ( alphaList)

        ngenotypes=len(alphaList)
        self.phenotypeFactor.setCard( [2, ngenotypes])

        values=[x for x in range( np.prod(self.phenotypeFactor.getCard()))]

        for i in range( len(alphaList )):

            values[i]=alphaList[i]
            values[i+1]=1-alphaList[i]
        ctr=0
        alphas=2*len(alphaList)*[None]
        for i in range(len(alphaList)):
            alphas[ctr]=alphaList[i];
            ctr=ctr+1
            alphas[ctr]=1-alphaList[i];
            ctr=ctr+1

        values=alphas
        self.phenotypeFactor.setVal( values)

    def getVar(self):
        return self.phenotypeFactor.getVar()
    def getCard(self):
        return self.phenotypeFactor.getCard()
    def getVal(self):
        return self.phenotypeFactor.getVal()
    def setVal(self,val):
        self.phenotypeFactor.setVal(val)
    def getFactor(self):
        return self.phenotypeFactor


    def __str__(self):
        return self.phenotypeFactor.__str__()
Exemple #40
0
    def eliminateVar(self, Z, E, factorList):
        """ a variable elimination function
            based on https://github.com/indapa/PGM/blob/master/Prog4/EliminateVar.m

            Z is the variable to be eliminated. We base this code on the matlab file
            linked to above as well as the Sum-product VE pseudo code in Koller and Friedman
            page 298

            E is a numpy 2d matrix representing adjacency matrix of variables
            It represents the induced VE graph
            Once a variable is eliminated, its edges are removed from E

            """

        useFactors = []  #the index of the factor that contains the variable Z
        scope = []

        #print 'Z: ', Z

        #get a list containining the index in self.factorLlist of factors
        #that contain the variable Z to be eliminated
        # get the scope of variables from the factors that contain variable Z
        for i in range(len(factorList)):

            if Z in factorList[i].getVar().tolist():
                useFactors.append(
                    i
                )  #the ith factor is being currently involved in elimination
                scope = list(
                    set.union(set(scope), factorList[i].getVar().tolist()))

        # update edge map
        """ These represent the induced edges for the VE graph.
         once the variable Z is eliminated, its edges are removed from the graph
         but in the process of elimination, we create a new factor. This
         introduces fill edges (see pg. 307 Koller and Friedman)
         Z is one based, but the indices in E are zero based, hence Z-1
         also the variable names in scope are 1 based, so we subtract 1 when
         updating the induced VE graph """

        for i in range(len(scope)):
            for j in range(len(scope)):
                if i != j:
                    E[scope[i] - 1, scope[j] - 1] = 1
                    E[scope[j] - 1, scope[i] - 1] = 1
        E[Z - 1, :] = 0
        E[:, Z - 1] = 0

        #G=nx.from_numpy_matrix(E)
        #print 'induced graph edges:\n', (G.edges())
        #nx.draw_shell(G)
        #plt.show()

        #these are the indices of factorList which are not involved in VE
        unusedFactors = list(
            set.difference(set(range(len(factorList))), set(useFactors)))

        newF = None
        #check first if there are any unused factors left!
        if len(unusedFactors) > 0:
            newF = len(unusedFactors) * [None]
            newmap = np.zeros(max(unusedFactors) + 1, dtype=int).tolist()

            #newF is a new factor list, we populate it first
            #with the unused factors
            #newmap is maps the new location of ith unusedFactor
            for i in range(len(unusedFactors)):
                newF[i] = factorList[unusedFactors[i]]
                newmap[unusedFactors[i]] = i

        #print 'newmap ', newmap,"\n"
        #print 'length of newmap: ', len(newmap), "\n"

        newFactor = Factor([], [], [], 'newFactor')

        #we multiple in all the factors that contain the variable Z
        for i in range(len(useFactors)):
            newFactor = FactorProduct(newFactor, factorList[useFactors[i]])

        #then we marginalize Z out and obtain a new factor
        #then append it the end of newF, the new factor list
        newFactor = FactorMarginalization(newFactor, [Z])
        #print 'newFactor: ',newFactor
        #newF(length(nonUseFactors)+1) = newFactor;
        if newFactor != None:
            newF.append(newFactor)

        if newF != None:
            factorList = newF
        #return E

        ########################################################################
        """ the remaining code builds the edges of the clique tree """
        """ add new node with the factors that contain the variable Z
            adding a  new node represents new clique.
            The scope of every factor generated during the variable elimination process is a clique pg. 309 Koller & Friedman """

        self.nodeList.append(scope)

        #newC is the total number of nodes in the clique tree
        newC = len(self.nodeList)
        #print 'newC: ', newC

        #factorInds are individual factors with one variable ... I think
        self.factorInds.append(len(unusedFactors) + 1)

        #print 'range( newC -1) ', range( newC-1  )
        #print 'factorInds: ', self.factorInds
        #print 'useFactors: ', useFactors
        #pdb.set_trace()
        """ we update the edges of the clique tree """
        for i in range(newC - 1):

            #if self.factorInds [ i ] -1 in useFactors:
            #there was the off by onoe erorr - the values in factorInds
            #were one-based, need to subtract 1
            if self.factorInds[i] - 1 in useFactors:

                self.edges[i, newC - 1] = 1
                self.edges[newC - 1, i] = 1
                self.factorInds[i] = 0
            else:
                if self.factorInds[i] != 0:
                    #print 'i: ', i
                    #print 'factorInds: ', self.factorInds
                    #print 'newmap: ', newmap
                    #print 'newmap [ self.factorInds[i] -1: ', newmap [ self.factorInds[i] -1 ]
                    #print 'self.factorInds[ i ]  = newmap [ self.factorInds[i] - 1  ] + 1 '
                    if len(unusedFactors) > 0:
                        #self.factorInds[ i ]  = newmap [ self.factorInds[i] -1  ] +1
                        self.factorInds[i] = newmap[self.factorInds[i] - 1] + 1
                        #self.factorInds[ i ]  = newmap [ self.factorInds[i]   ]

        #print 'factorInds right before returning: ', self.factorInds
        return E, factorList
Exemple #41
0
def IdentityFactor(F):
    return Factor(F.getVar().tolist(),
                  F.getCard().tolist(), np.ones(np.prod(F.getCard())),
                  F.getName() + '_identity')
Exemple #42
0
class phenotypeGivenHaplotypesFactor(object):
    """ factor represents Pr(phenotype| paternal haplotype, maternal haplotype)
    very similiar to PhenotypeGivenGenotypeFactor, but we are de-coupling into
    paternal and maternal alleles rather than genotype"""

    def __init__(self, alphaList, numAlleles, geneCopyVarOne, geneCopyVarTwo, phenotypeVar):
        
        self.numalleles=numAlleles
        self.alphaList=alphaList
        self.phenotypeFactor=Factor([phenotypeVar,geneCopyVarOne, geneCopyVarTwo], [], [], 'phenotype| geneCopy1, geneCopy2')

        ngenos=len(alphaList)
        self.phenotypeFactor.setCard( [ 2, numAlleles, numAlleles])
        #phenotypeFactor.val = zeros(1, prod(phenotypeFactor.card));
        values=np.zeros( (1, np.prod(self.phenotypeFactor.getCard()))).flatten().tolist()

        affectedAlphas=alphaList
        unaffectedAlphas=[ 1- alpha for alpha in alphaList]


        (allelesToGenotypes, genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles)
        assignments=IndexToAssignment( np.arange(np.prod(self.phenotypeFactor.getCard())), self.phenotypeFactor.getCard() )-1
        for z in range( np.prod(self.phenotypeFactor.getCard() ) ):
            curr_assign= assignments[z]
            curr_assign=assignments[z]
            genotype_num=allelesToGenotypes[curr_assign[1], curr_assign[2]]
            if curr_assign[0] == 0:
                values[z] = affectedAlphas[genotype_num]
            else:
                values[z] = unaffectedAlphas[genotype_num]
        self.phenotypeFactor.setVal( values )


            #genotype_num=allelesToGenotypes(assignment(2), assignment(3));


    def getVar(self):
        return self.phenotypeFactor.getVar()
    def getCard(self):
        return self.phenotypeFactor.getCard()
    def getVal(self):
        return self.phenotypeFactor.getVal()
    def getFactor(self):
        return self.phenotypeFactor
    def __str__(self):
        return self.phenotypeFactor.__str__()

    def __str__(self):
        return self.phenotypeFactor.__str__()
Exemple #43
0
 def __init__(self, alleleFreqs, geneCopyVar):
     numAlleles = len(alleleFreqs)
     self.geneCopyFactor=Factor( [geneCopyVar], [], [], 'founderHap')
     self.geneCopyFactor.setCard ( [numAlleles])
     self.geneCopyFactor.setVal( alleleFreqs )
Exemple #44
0
def FactorMaxMarginalization(A, V):
    """ computes the factor with the variables in V *maxed* out.
        The resulting factor will have all the variables in A minus
        those variables in V. This is quite similiar to FactorMarginalization, but rather then summing out variables in V
        we take the max. In the code, this translates passing np.max as the function to accum
        See section  13.2 in Koller and Friedman  for more information"""

    B = Factor()
    #check for empy factor or variable list
    if len(A.getVar()) == 0 or len(V) == 0:
        return A
    Bvar = np.setdiff1d(A.getVar(), V)
    mapB = isMember(Bvar, A.getVar())

    if len(Bvar) == 0:
        sys.stderr.write(
            "FactorMaxMarginalization: Error, resultant factor has empty scope...\n"
        )
        return np.max(A.getVal())
    #set the marginalized factor's variable scope and cardinality
    B.setVar(Bvar.tolist())
    B.setCard(A.getCard()[mapB])
    B.setVal(np.zeros(np.prod(B.getCard())).tolist())

    #compute some helper indices
    assignments = IndexToAssignment(np.arange(np.prod(A.getCard())),
                                    A.getCard())
    #indxB tells which values in A to sum together when marginalizing out the variable(s) in B
    indxB = AssignmentToIndex(assignments[:, mapB], B.getCard()) - 1

    #here we pass in the function np.max
    #NumPy and Python are awesome
    max_vals = accum(indxB, A.getVal(), np.max)
    B.setVal(max_vals.tolist())

    return B
Exemple #45
0
def FactorSum(A, B):
    """ FactorSum Computes the sum of two factors.
%       Similiar to FactorProduct
        We would use this in log space where multiplication becomes addition
%       Based on the code here https://github.com/indapa/PGM/blob/master/Prog4/FactorSum.m """

    C = Factor()

    #check for empty factors
    if len(A.getVar()) == 0:
        sys.stderr.write("A factor is empty!\n")
        return B
    if len(B.getVar()) == 0:
        sys.stderr.write("B factor is empty!\n")
        return A

    #check of  variables that in both A and B have the same cardinality
    #print 'A.getVar():  ', A.getVar()
    #print 'B.getVar(): ',B.getVar()
    #setA= set( A.getVar() )
    #setB= set( B.getVar() )
    #intersect=np.array( list( setA.intersection(setB)))
    intersect = np.intersect1d(A.getVar(), B.getVar()).tolist()
    #print "Intersection of variables in FactorProduct ", intersect
    #print "A var: ",  A.getVar()
    #print "B var: ",  B.getVar()

    #if the intersection of variables in the two factors
    #is non-zero, then make sure they have the same cardinality
    if len(intersect) > 0:
        #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array?
        iA = getIndex(A.getVar(), intersect)
        #print "iA: ", iA
        #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist()
        iB = getIndex(B.getVar(), intersect)
        #print "iB: ", iB

        # check to see if any of the comparisons in the  array resulting from  of a.getCard()[iA] == b.getCard()[iB]
        # are all False. If so print an error and exit
        if len(
                np.where(A.getCard()[iA].all() == B.getCard()[iB].all() ==
                         False)[0].tolist()) > 0:
            sys.stderr.write("dimensionality mismatch in factors!\n")
            sys.exit(1)

    #now set the variables of C to the union of variables in factors A and B
    #print 'setA ' ,setA
    #print 'setB ', setB
    #print list( setA.union(setB) )
    C.setVar(np.union1d(A.getVar(), B.getVar()).tolist())
    #C.setVar ( list( setA.union(setB) ) )
    mapA = isMember(A.getVar(), C.getVar())
    mapB = isMember(B.getVar(), C.getVar())

    #Set the cardinality of variables in C
    C.setCard(np.zeros(len(C.getVar())).tolist())
    C.getCard()[mapA] = A.getCard()
    C.getCard()[mapB] = B.getCard()

    #intitialize the values of the factor C to be zero
    C.setVal(np.zeros(np.prod(C.getCard())).tolist())

    #some helper indices to tell what indices of A and B values to multiply
    assignments = IndexToAssignment(np.arange(np.prod(
        C.getCard())), C.getCard())  #get the assignment of values of C
    indxA = AssignmentToIndex(assignments[:, mapA], A.getCard(
    )) - 1  # re-arrange the assignment of C, to what it would be in factor  A
    indxB = AssignmentToIndex(assignments[:, mapB], B.getCard(
    )) - 1  # re-arange the assignment of C to what it would be in  factorB
    #print 'indxA ', indxA
    #print 'indxB ', indxB

    c_val = A.getVal()[indxA.flatten().tolist()] + B.getVal()[indxB.flatten(
    ).tolist(
    )]  #now that we have the index into A.val and B.val vector, multiply them to factor product
    C.setVal(c_val.tolist())

    return C
Exemple #46
0
    print "\n ** FINAL NORMALIZED SOLUTION ** "
    answer = normalize(answer)
    print answer


def printfactorList(factorList):
    print " *** FACTOR LIST *** "
    for factor in factorList:
        print factor
    print " *** *********** *** "


#  FACTORS
# Pr(G)
f0 = Factor(['G'], np.array([0.90, 0.1]))

# Pr(D)
f1 = Factor(['D'], np.array([0.50, 0.25, 0.25]))

# Pr(D|F)
f2 = Factor(['D', 'F'], np.array([[0.98, 0.02], [0.40, 0.60], [0.15, 0.85]]))

# Pr(D|DS)
f3 = Factor(['D', 'DS'], np.array([[0.98, 0.02], [0.15, 0.85], [0.40, 0.60]]))

# Pr(D|S, G)
f4 = Factor(['D', 'S', 'G'],
            np.array([[[0.98, 0.02], [0.15, 0.85], [0.15, 0.85]],
                      [[0.998, 0.002], [0.98, 0.02], [0.98, 0.02]]]))
Exemple #47
0
class phenotypeGivenHaplotypesFactor(object):
    """ factor represents Pr(phenotype| paternal haplotype, maternal haplotype)
    very similiar to PhenotypeGivenGenotypeFactor, but we are de-coupling into
    paternal and maternal alleles rather than genotype"""
    def __init__(self, alphaList, numAlleles, geneCopyVarOne, geneCopyVarTwo,
                 phenotypeVar):

        self.numalleles = numAlleles
        self.alphaList = alphaList
        self.phenotypeFactor = Factor(
            [phenotypeVar, geneCopyVarOne, geneCopyVarTwo], [], [],
            'phenotype| geneCopy1, geneCopy2')

        ngenos = len(alphaList)
        self.phenotypeFactor.setCard([2, numAlleles, numAlleles])
        #phenotypeFactor.val = zeros(1, prod(phenotypeFactor.card));
        values = np.zeros(
            (1, np.prod(self.phenotypeFactor.getCard()))).flatten().tolist()

        affectedAlphas = alphaList
        unaffectedAlphas = [1 - alpha for alpha in alphaList]

        (allelesToGenotypes,
         genotypesToAlleles) = generateAlleleGenotypeMappers(numAlleles)
        assignments = IndexToAssignment(
            np.arange(np.prod(self.phenotypeFactor.getCard())),
            self.phenotypeFactor.getCard()) - 1
        for z in range(np.prod(self.phenotypeFactor.getCard())):
            curr_assign = assignments[z]
            curr_assign = assignments[z]
            genotype_num = allelesToGenotypes[curr_assign[1], curr_assign[2]]
            if curr_assign[0] == 0:
                values[z] = affectedAlphas[genotype_num]
            else:
                values[z] = unaffectedAlphas[genotype_num]
        self.phenotypeFactor.setVal(values)

        #genotype_num=allelesToGenotypes(assignment(2), assignment(3));

    def getVar(self):
        return self.phenotypeFactor.getVar()

    def getCard(self):
        return self.phenotypeFactor.getCard()

    def getVal(self):
        return self.phenotypeFactor.getVal()

    def getFactor(self):
        return self.phenotypeFactor

    def __str__(self):
        return self.phenotypeFactor.__str__()

    def __str__(self):
        return self.phenotypeFactor.__str__()
Exemple #48
0
def FactorProduct(A, B):
    """ FactorProduct Computes the product of two factors.
%       C = FactorProduct(A,B) computes the product between two factors, A and B,
%       where each factor is defined over a set of variables with given dimension.
%       The factor data structure has the following fields:
%       .var    Vector of variables in the factor, e.g. [1 2 3]
%       .card   Vector of cardinalities corresponding to .var, e.g. [2 2 2]
%       .val    Value table of size prod(.card)
%
%       See also FactorMarginalization  IndexToAssignment,
%       AssignmentToIndex, and https://github.com/indapa/PGM/blob/master/Prog1/FactorProduct.m """

    #print "A: ", A
    #print "===="
    #print "B: ", B
    C = Factor()

    #check for empty factors
    if len(A.getVar()) == 0:
        sys.stderr.write("A factor is empty!\n")
        return B
    if len(B.getVar()) == 0:
        sys.stderr.write("B factor is empty!\n")
        return A

    #check of  variables that in both A and B have the same cardinality
    #print 'A.getVar():  ', A.getVar()
    #print 'B.getVar(): ',B.getVar()
    #setA= set( A.getVar() )
    #setB= set( B.getVar() )
    #intersect=np.array( list( setA.intersection(setB)))
    intersect = np.intersect1d(A.getVar(), B.getVar()).tolist()
    #print "Intersection of variables in FactorProduct ", intersect
    #print "A var: ",  A.getVar()
    #print "B var: ",  B.getVar()

    #if the intersection of variables in the two factors
    #is non-zero, then make sure they have the same cardinality
    if len(intersect) > 0:
        #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array?
        iA = getIndex(A.getVar(), intersect)
        #print "iA: ", iA
        #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist()
        iB = getIndex(B.getVar(), intersect)
        #print "iB: ", iB

        # check to see if any of the comparisons in the  array resulting from  of a.getCard()[iA] == b.getCard()[iB]
        # are all False. If so print an error and exit
        if len(
                np.where(A.getCard()[iA].all() == B.getCard()[iB].all() ==
                         False)[0].tolist()) > 0:
            sys.stderr.write("dimensionality mismatch in factors!\n")
            sys.exit(1)

    #now set the variables of C to the union of variables in factors A and B
    #print 'setA ' ,setA
    #print 'setB ', setB
    #print list( setA.union(setB) )
    C.setVar(np.union1d(A.getVar(), B.getVar()).tolist())
    #C.setVar ( list( setA.union(setB) ) )
    mapA = isMember(A.getVar(), C.getVar())
    mapB = isMember(B.getVar(), C.getVar())

    #Set the cardinality of variables in C
    C.setCard(np.zeros(len(C.getVar())).tolist())
    C.getCard()[mapA] = A.getCard()
    C.getCard()[mapB] = B.getCard()

    #intitialize the values of the factor C to be zero
    C.setVal(np.zeros(np.prod(C.getCard())).tolist())

    #some helper indices to tell what indices of A and B values to multiply
    assignments = IndexToAssignment(np.arange(np.prod(
        C.getCard())), C.getCard())  #get the assignment of values of C
    indxA = AssignmentToIndex(assignments[:, mapA], A.getCard(
    )) - 1  # re-arrange the assignment of C, to what it would be in factor  A
    indxB = AssignmentToIndex(assignments[:, mapB], B.getCard(
    )) - 1  # re-arange the assignment of C to what it would be in  factorB

    c_val = A.getVal()[indxA.flatten().tolist()] * B.getVal()[indxB.flatten(
    ).tolist(
    )]  #now that we have the index into A.val and B.val vector, multiply them to factor product
    C.setVal(c_val.tolist())

    return C
Exemple #49
0
 def __init__(self, alleleFreqs, geneCopyVar):
     numAlleles = len(alleleFreqs)
     self.geneCopyFactor = Factor([geneCopyVar], [], [], 'founderHap')
     self.geneCopyFactor.setCard([numAlleles])
     self.geneCopyFactor.setVal(alleleFreqs)
Exemple #50
0
class PhenotypeGivenGenotypeFactor(object):
    """ construct factor of phenotype|genotype
        #prob of being effected, given the ith genotype
        #alphaList[i] is the prob of being effected given the ith genotype """
    def __init__(self, alphaList, phenotypeVar, genotypeVar, name):
        self.phenotypeFactor = Factor([phenotypeVar, genotypeVar], [], [],
                                      name)
        self.alpha = np.array(alphaList)

        ngenotypes = len(alphaList)
        self.phenotypeFactor.setCard([2, ngenotypes])

        values = [x for x in range(np.prod(self.phenotypeFactor.getCard()))]

        for i in range(len(alphaList)):

            values[i] = alphaList[i]
            values[i + 1] = 1 - alphaList[i]
        ctr = 0
        alphas = 2 * len(alphaList) * [None]
        for i in range(len(alphaList)):
            alphas[ctr] = alphaList[i]
            ctr = ctr + 1
            alphas[ctr] = 1 - alphaList[i]
            ctr = ctr + 1

        values = alphas
        self.phenotypeFactor.setVal(values)

    def getVar(self):
        return self.phenotypeFactor.getVar()

    def getCard(self):
        return self.phenotypeFactor.getCard()

    def getVal(self):
        return self.phenotypeFactor.getVal()

    def setVal(self, val):
        self.phenotypeFactor.setVal(val)

    def getFactor(self):
        return self.phenotypeFactor

    def __str__(self):
        return self.phenotypeFactor.__str__()
Exemple #51
0
def FactorDiv(A, B):
    """ FactorProduct Computes the dividend of two factors.
%       Similiar to Factor Product, but if we divide 0/0, return 0
    see page 365 in Koller and Friedman for definition of FactorDivision """

    #print "A: ", A
    #print "===="
    #print "B: ", B
    C = Factor()

    #check for empty factors
    if len(A.getVar()) == 0:
        sys.stderr.write("A factor is empty!\n")
        return B
    if len(B.getVar()) == 0:
        sys.stderr.write("B factor is empty!\n")
        return A

    #check of  variables that in both A and B have the same cardinality
    #print 'A.getVar():  ', A.getVar()
    #print 'B.getVar(): ',B.getVar()
    #setA= set( A.getVar() )
    #setB= set( B.getVar() )
    #intersect=np.array( list( setA.intersection(setB)))
    intersect = np.intersect1d(A.getVar(), B.getVar()).tolist()
    #print "Intersection of variables in FactorProduct ", intersect
    #print "A var: ",  A.getVar()
    #print "B var: ",  B.getVar()

    #if the intersection of variables in the two factors
    #is non-zero, then make sure they have the same cardinality
    if len(intersect) > 0:
        #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array?
        iA = getIndex(A.getVar(), intersect)
        #print "iA: ", iA
        #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist()
        iB = getIndex(B.getVar(), intersect)
        #print "iB: ", iB

        # check to see if any of the comparisons in the  array resulting from  of a.getCard()[iA] == b.getCard()[iB]
        # are all False. If so print an error and exit
        if len(
                np.where(A.getCard()[iA].all() == B.getCard()[iB].all() ==
                         False)[0].tolist()) > 0:
            sys.stderr.write("dimensionality mismatch in factors!\n")
            sys.exit(1)

    #now set the variables of C to the union of variables in factors A and B
    #print 'setA ' ,setA
    #print 'setB ', setB
    #print list( setA.union(setB) )
    C.setVar(np.union1d(A.getVar(), B.getVar()).tolist())
    #C.setVar ( list( setA.union(setB) ) )
    mapA = isMember(A.getVar(), C.getVar())
    mapB = isMember(B.getVar(), C.getVar())

    #Set the cardinality of variables in C
    C.setCard(np.zeros(len(C.getVar())).tolist())
    C.getCard()[mapA] = A.getCard()
    C.getCard()[mapB] = B.getCard()

    #intitialize the values of the factor C to be zero
    C.setVal(np.zeros(np.prod(C.getCard())).tolist())

    #some helper indices to tell what indices of A and B values to multiply
    assignments = IndexToAssignment(np.arange(np.prod(
        C.getCard())), C.getCard())  #get the assignment of values of C
    indxA = AssignmentToIndex(assignments[:, mapA], A.getCard(
    )) - 1  # re-arrange the assignment of C, to what it would be in factor  A
    indxB = AssignmentToIndex(assignments[:, mapB], B.getCard(
    )) - 1  # re-arange the assignment of C to what it would be in  factorB

    numerator = A.getVal()[indxA.flatten().tolist()]
    denominator = B.getVal()[indxB.flatten().tolist()]

    #print numerator
    #print denominator
    #print zip(numerator, denominator)
    val = map(lambda x: common.zerodiv_tuple(x), zip(numerator, denominator))
    #print val
    C.setVal(val)

    return C
Exemple #52
0
 def test_product1 (self):
     expectedFactor = pgmf.Factor(np.array([1,2]), np.array([2,2]), np.array([0.0649, 0.1958, 0.0451, 0.6942]))
     productFactor  = pgmf.product(self.factorA, self.factorB)
     np.testing.assert_array_equal(productFactor.varbs, expectedFactor.varbs)
     np.testing.assert_array_equal(productFactor.card, expectedFactor.card)
     np.testing.assert_array_almost_equal(productFactor.vals, expectedFactor.vals, decimal = 5)
Exemple #53
0
from CliqueTree import *
from CliqueTreeOperations import *
from FactorOperations import *
import scipy.io as sio
import numpy as np
import pprint
import pdb
matfile = '/Users/amit/BC_Classes/PGM/Prog4/PA4Sample.mat'
mat_contents = sio.loadmat(matfile)
mat_struct = mat_contents['FactorMax']
val = mat_struct[0, 0]
input_factors = val['INPUT1'][0][0]
var = input_factors[0].flatten().tolist()
card = input_factors[1].flatten().tolist()
value = input_factors[2].flatten().tolist()
print var
print card
print value
INPUT1 = Factor(var, card, value, 'test')
INPUT2 = val['INPUT2'].flatten()
print INPUT1
print INPUT2
print FactorMaxMarginalization(INPUT1, INPUT2)
#example used in section 13.2 pg 555 of Friedman and Koller
print "====="
psi = Factor([1, 2, 3], [3, 2, 2],
             [.25, .05, .15, .08, 0, .09, .35, .07, .21, .16, 0, .18])
maxfactor = FactorMaxMarginalization(psi, [2])
print maxfactor
print IndexToAssignment(np.arange(6), [3, 2])
Exemple #54
0
class ChildCopyGivenParentalsFactor(object):
    """ this represents a de-coupled factor
        given a parents two haplotypes, returns
        factor whose values are the probablity
        of inheriting (grand)paternal or (grand)maternal
        haplotype. This allows for some more flexibility
        in modeling inheritance, rather than clumping
        a single parent's haplotype into a genotype
        i.e. GenotypeGivenParentsFactor """
    def __init__(self, numAlleles, geneCopyVarChild, geneCopyHapOne,
                 geneCopyHapTwo):
        self.numalleles = numAlleles
        self.hapone = geneCopyVarChild
        self.haptwo = geneCopyHapTwo

        #geneCopyFactor = struct('var', [], 'card', [], 'val', []);
        self.geneCopyFactor = Factor(
            [geneCopyVarChild, geneCopyHapOne, geneCopyHapTwo], [], [],
            'child|hap1,hap2')
        self.geneCopyFactor.setCard(
            [self.numalleles, self.numalleles, self.numalleles])
        values = np.zeros(
            np.prod([self.numalleles, self.numalleles,
                     self.numalleles])).tolist()
        #this keeps track of what posiiton you are in the values list
        index = 0
        #the number of iterations thru the nested for loops should be equal to numallels^3

        for i in range(numAlleles):
            #iterate through alleles from
            #grand(paternal) haplotype
            for j in range(numAlleles):
                #iterate through alleles from
                #grand(maternal) haplotype
                for k in range(numAlleles):
                    #iterate thru child alleles
                    print i, j, k
                    if j == k:  #child has grandmotherhap
                        if i == k:  #grandfatherhap is the same
                            values[index] = 1
                        else:
                            values[index] = .5
                    elif i == k:  #child has grandfather hap
                        values[index] = .5
                    else:
                        pass
                    index += 1
        #print values
        self.geneCopyFactor.setVal(values)

    def getVar(self):
        return self.geneCopyFactor.getVar()

    def getCard(self):
        return self.geneCopyFactor.getCard()

    def getVal(self):
        return self.geneCopyFactor.getVal()

    def getFactor(self):
        return self.geneCopyFactor

    def __str__(self):
        return self.geneCopyFactor.__str__()
Exemple #55
0
def FactorProduct ( A, B):
    """ FactorProduct Computes the product of two factors.
%       C = FactorProduct(A,B) computes the product between two factors, A and B,
%       where each factor is defined over a set of variables with given dimension.
%       The factor data structure has the following fields:
%       .var    Vector of variables in the factor, e.g. [1 2 3]
%       .card   Vector of cardinalities corresponding to .var, e.g. [2 2 2]
%       .val    Value table of size prod(.card)
%
%       See also FactorMarginalization  IndexToAssignment,
%       AssignmentToIndex, and https://github.com/indapa/PGM/blob/master/Prog1/FactorProduct.m """

    #print "A: ", A
    #print "===="
    #print "B: ", B
    C=Factor()

   #check for empty factors
    if len( A.getVar() ) == 0 :
        sys.stderr.write("A factor is empty!\n")
        return B
    if len( B.getVar() ) == 0:
        sys.stderr.write("B factor is empty!\n")
        return A


    #check of  variables that in both A and B have the same cardinality
    #print 'A.getVar():  ', A.getVar()
    #print 'B.getVar(): ',B.getVar()
    #setA= set( A.getVar() )
    #setB= set( B.getVar() )
    #intersect=np.array( list( setA.intersection(setB)))
    intersect=np.intersect1d( A.getVar(), B.getVar() ).tolist()
    #print "Intersection of variables in FactorProduct ", intersect
    #print "A var: ",  A.getVar()
    #print "B var: ",  B.getVar()

    #if the intersection of variables in the two factors
    #is non-zero, then make sure they have the same cardinality
    if len(intersect) > 0:
        #iA=np.nonzero(intersect - A.getVar()==0)[0].tolist() # see this http://stackoverflow.com/a/432146, return the index of something in an array?
        iA=getIndex( A.getVar(), intersect )
        #print "iA: ", iA
        #iB=np.nonzero(intersect - B.getVar()==0)[0].tolist()
        iB = getIndex (  B.getVar(), intersect )
        #print "iB: ", iB

        # check to see if any of the comparisons in the  array resulting from  of a.getCard()[iA] == b.getCard()[iB] 
        # are all False. If so print an error and exit
        if len( np.where( A.getCard()[iA].all() == B.getCard()[iB].all() ==False)[0].tolist() ) > 0:
            sys.stderr.write("dimensionality mismatch in factors!\n")
            sys.exit(1)

    #now set the variables of C to the union of variables in factors A and B
    #print 'setA ' ,setA
    #print 'setB ', setB
    #print list( setA.union(setB) )
    C.setVar( np.union1d ( A.getVar(), B.getVar() ).tolist()  )
    #C.setVar ( list( setA.union(setB) ) )
    mapA=isMember(A.getVar(), C.getVar() )
    mapB=isMember(B.getVar(), C.getVar() )

    

    #Set the cardinality of variables in C
    C.setCard( np.zeros( len(C.getVar())).tolist() )
    C.getCard()[mapA]=A.getCard()
    C.getCard()[mapB]=B.getCard()

    #intitialize the values of the factor C to be zero
    C.setVal( np.zeros(np.prod(C.getCard())).tolist() )

    #some helper indices to tell what indices of A and B values to multiply
    assignments=IndexToAssignment( np.arange(np.prod(C.getCard())), C.getCard() ) #get the assignment of values of C
    indxA=AssignmentToIndex(  assignments[:,mapA], A.getCard())-1 # re-arrange the assignment of C, to what it would be in factor  A
    indxB=AssignmentToIndex(  assignments[:,mapB], B.getCard())-1 # re-arange the assignment of C to what it would be in  factorB

    

    c_val=A.getVal()[indxA.flatten().tolist()] * B.getVal()[indxB.flatten().tolist()] #now that we have the index into A.val and B.val vector, multiply them to factor product
    C.setVal ( c_val.tolist() )

    return C
Exemple #56
0
def FactorMarginalization(A,V):
    """   FactorMarginalization Sums given variables out of a factor.
          B = FactorMarginalization(A,V) computes the factor with the variables
          in V summed out. The factor data structure has the following fields:
          .var    Vector of variables in the factor, e.g. [1 2 3]
          .card   Vector of cardinalities corresponding to .var, e.g. [2 2 2]
          .val    Value table of size prod(.card)

          The resultant factor should have at least one variable remaining or this
          function will throw an error.   See also FactorProduct, IndexToAssignment , and AssignmentToIndex
          Based on matlab code found here: https://github.com/indapa/PGM/blob/master/Prog1/FactorMarginalization.m """

    #the resulting factor after marginalizing out variables in python list V that are in 
    #the factor A
    B=Factor()

    #check for empy factor or variable list
    if len( A.getVar() ) == 0 or len(V) == 0:
        return A

    #construct the variables of the marginalized factor by 
    #computing the set difference between A.var and V
    #These variables in the difference set will be the scope of the new factor
    setA=set( A.getVar() )
    setV=set(V)
    Bvar=np.array( list( setA.difference(setV)))
    mapB=isMember(Bvar, A.getVar()) #indices of the variables of the new factor in the original factor A
    #print mapB,  Bvar

    #check to see if the new factor has empty scope
    if len(Bvar) == 0:
        sys.stderr.write("FactorMarginalization:Error, resultant factor has empty scope...\n")
        return None
    #set the marginalized factor's variable scope and cardinality
    B.setVar( Bvar.tolist() )
    B.setCard( A.getCard()[mapB] )
    B.setVal( np.zeros(np.prod(B.getCard())).tolist() )

    #compute some helper indices
    assignments=IndexToAssignment ( np.arange(np.prod(A.getCard()) ), A.getCard() )
    #indxB tells which values in A to sum together when marginalizing out the variable(s) in B
    indxB=AssignmentToIndex( assignments[:,mapB], B.getCard())-1

    #accum is a numpy implementation of matlab accumarray
    #accumarray sums data in each group
    #here the group(s) are defined in indxB
    #indxB is a map to tell which value in A.val to map the sum to
    #see http://blogs.mathworks.com/loren/2008/02/20/under-appreciated-accumarray/
    marginal_vals=accum(indxB, A.getVal() )
    
    #set the marginal values to the new factor with teh variable(s) in V summed(marginalized) out
    B.setVal( marginal_vals.tolist() )
    return B
Exemple #57
0
class GenotypeAlleleFreqFactor (object):
    """ construct a factor that has the probability of each genotype
        given allele frequencies Pr(genotype|allele_freq)"""

    def __init__(self, allelefreqs, genotypeVar, name):
        self.allelefreq=allelefreqs
        #number of alleles == number of allele frequencies passed in
        numAlleles=len(allelefreqs)
        self.allelesToGenotypes=None
        self.genotypesToAlleles=None
        self.genotypeFactor=None

        #map alleles to genotypes and genotyeps to alleles
        (self.allelesToGenotypes, self.genotypesToAlleles)=generateAlleleGenotypeMappers(numAlleles)
        (ngenos,ploidy)=np.shape(self.genotypesToAlleles)


        self.genotypeFactor = Factor( [genotypeVar], [], [], name)
        #the cardinality of the factor is the number of genotypes
        self.genotypeFactor.setCard( [ngenos] )

        #set the values to zero initially
        values=np.zeros( (np.prod(self.genotypeFactor.getCard()))).tolist()
        
        for i in range (ngenos):
            alleles=self.genotypesToAlleles[i,:].tolist()
            

            if alleles[0] == alleles[1]:
                values[i]= np.prod( [ allelefreqs[j] for j in alleles ])
                
            else:
               values[i]= np.prod( [ allelefreqs[j] for j in alleles ]) * 2
        
        self.genotypeFactor.setVal( values )


    def getVar(self):
        return self.genotypeFactor.getVar()
    def getCard(self):
        return self.genotypeFactor.getCard()
    def getVal(self):
        return self.genotypeFactor.getVal()
    def setVal(self,val):
        self.genotypeFactor.setVal(val)
    def getFactor(self):
        return self.genotypeFactor


    def __str__(self):
        return self.genotypeFactor.__str__()
from Factor import *
from PGMcommon import *
from CliqueTree import *
from CliqueTreeOperations import *
from FactorOperations import *
import scipy.io as sio
import numpy as np
import pprint
import pdb
matfile='/Users/amit/BC_Classes/PGM/Prog4/PA4Sample.mat'
mat_contents=sio.loadmat(matfile)
mat_struct=mat_contents['SumProdCalibrate']
val=mat_struct[0,0]
input_edges = val['INPUT']['edges'][0][0]
input_cliqueList= val['INPUT']['cliqueList'][0][0][0]
clique_list_factorObj=[]
for tpl in input_cliqueList:
    (var, card, values)=tpl
    f= Factor( var[0].tolist(), card[0].tolist(), values[0].tolist(), 'factor' )
    clique_list_factorObj.append(f)

P=CliqueTree( clique_list_factorObj ,  input_edges, clique_list_factorObj, [])

P=CliqueTreeCalibrate(P)

for f in P.getNodeList():
    print f
    print "=="
 def testprimeFactors(self):
     self.assertEqual(Factor.primeFactorsOf(1),[])
     self.assertEqual(Factor.primeFactorsOf(2), [2])
     self.assertEqual(Factor.primeFactorsOf(3), [3])
     self.assertEqual(Factor.primeFactorsOf(4), [2,2])
     self.assertEqual(Factor.primeFactorsOf(5), [5])
     self.assertEqual(Factor.primeFactorsOf(6),[2,3])
     self.assertEqual(Factor.primeFactorsOf(7), [7])
     self.assertEqual(Factor.primeFactorsOf(8), [2,2,2])
     self.assertEqual(Factor.primeFactorsOf(9), [3,3])
     self.assertEqual(Factor.primeFactorsOf(10), [2,5])
     self.assertEqual(Factor.primeFactorsOf(11), [11])