def getSiteLikes(self): """Likelihoods, not log likes. Placed in self.siteLikes, a list.""" self._commonCStuff() self.logLike = pf.p4_treeLogLike(self.cTree, 1) # second arg is getSiteLikes self.siteLikes = [] for p in self.data.parts: self.siteLikes += pf.getSiteLikes(p.cPart)
def calcLogLike(self, verbose=1, resetEmpiricalComps=True): """Calculate the likelihood of the tree, without optimization.""" self._commonCStuff(resetEmpiricalComps=resetEmpiricalComps) #print "about to p4_treeLogLike()..." self.logLike = pf.p4_treeLogLike(self.cTree, 0) # second arg is getSiteLikes if verbose: print("Tree.calcLogLike(). %f" % self.logLike)
def optLogLike(self, verbose=1, newtAndBrentPowell=1, allBrentPowell=0, simplex=0): """Calculate the likelihood of the tree, with optimization. There are 3 optimization methods-- choose one. I've made 'newtAndBrentPowell' the default, as it is fast and seems to be working. The 'allBrentPowell' optimizer used to be the default, as it seems to be the most robust, although it is slow. It would be good for checking important calculations. The simplex optimizer is the slowest, and will sometimes find better optima for difficult data, but often fails to optimize (with no warning).""" if verbose: theStartTime = time.clock() self._commonCStuff() # We want only one opt method. if newtAndBrentPowell: newtAndBrentPowell = 1 if allBrentPowell: allBrentPowell = 1 if simplex: simplex = 1 if (newtAndBrentPowell + allBrentPowell + simplex) != 1: gm = ['Tree.optLogLike()'] gm.append("Choose 1 opt method.") raise Glitch, gm # Do the opt. if allBrentPowell: pf.p4_allBrentPowellOptimize(self.cTree) elif simplex: from Tree import Tree pf.p4_simplexOptimize(self.cTree, self, Tree.simplexDump) else: pf.p4_newtSetup(self.cTree) pf.p4_newtAndBrentPowellOpt(self.cTree) self.logLike = pf.p4_treeLogLike(self.cTree, 0) # second arg is getSiteLikes # get the brLens brLens = pf.p4_getBrLens(self.cTree) for n in self.iterNodesNoRoot(): n.br.len = brLens[n.nodeNum] # get the other free prams prams = pf.p4_getFreePrams(self.cTree) self.model.restoreFreePrams(prams) if verbose: print "optLogLike = %f" % self.logLike theEndTime = time.clock() print "cpu time %s seconds." % (theEndTime - theStartTime)
def calcLogLike(self, verbose=1, resetEmpiricalComps=True): """Calculate the likelihood of the tree, without optimization.""" self._commonCStuff(resetEmpiricalComps=resetEmpiricalComps) #print "about to p4_treeLogLike()..." self.logLike = pf.p4_treeLogLike(self.cTree, 0) # second arg is getSiteLikes if verbose: print "Tree.calcLogLike(). %f" % self.logLike
def optLogLike(self, verbose=1, newtAndBrentPowell=1, allBrentPowell=0, simplex=0): """Calculate the likelihood of the tree, with optimization. There are 3 optimization methods-- choose one. I've made 'newtAndBrentPowell' the default, as it is fast and seems to be working. The 'allBrentPowell' optimizer used to be the default, as it seems to be the most robust, although it is slow. It would be good for checking important calculations. The simplex optimizer is the slowest, and will sometimes find better optima for difficult data, but often fails to optimize (with no warning).""" if verbose: theStartTime = time.clock() self._commonCStuff() # We want only one opt method. if newtAndBrentPowell: newtAndBrentPowell = 1 if allBrentPowell: allBrentPowell = 1 if simplex: simplex = 1 if (newtAndBrentPowell + allBrentPowell + simplex) != 1: gm = ['Tree.optLogLike()'] gm.append("Choose 1 opt method.") raise Glitch(gm) # Do the opt. if allBrentPowell: pf.p4_allBrentPowellOptimize(self.cTree) elif simplex: from .Tree import Tree pf.p4_simplexOptimize(self.cTree, self, Tree.simplexDump) else: pf.p4_newtSetup(self.cTree) pf.p4_newtAndBrentPowellOpt(self.cTree) self.logLike = pf.p4_treeLogLike(self.cTree, 0) # second arg is getSiteLikes # get the brLens brLens = pf.p4_getBrLens(self.cTree) for n in self.iterNodesNoRoot(): n.br.len = brLens[n.nodeNum] # get the other free prams prams = pf.p4_getFreePrams(self.cTree) self.model.restoreFreePrams(prams) if verbose: print("optLogLike = %f" % self.logLike) theEndTime = time.clock() print("cpu time %s seconds." % (theEndTime - theStartTime))
def optLogLike(self, verbose=1, newtAndBrentPowell=1, allBrentPowell=0): """Calculate the likelihood of the tree, with optimization. There are two optimization methods-- choose one. I've made 'newtAndBrentPowell' the default, as it is fast and seems to be working. The 'allBrentPowell' optimizer used to be the default, as it seems to be the most robust, although it is slow. It would be good for checking important calculations. """ if verbose: theStartTime = time.clock() self._commonCStuff() # We want only one opt method. if newtAndBrentPowell: newtAndBrentPowell = 1 if allBrentPowell: allBrentPowell = 1 if (newtAndBrentPowell + allBrentPowell) != 1: gm = ['Tree.optLogLike()'] gm.append("Choose 1 opt method.") raise P4Error(gm) # Do the opt. if allBrentPowell: pf.p4_allBrentPowellOptimize(self.cTree) else: pf.p4_newtSetup(self.cTree) pf.p4_newtAndBrentPowellOpt(self.cTree) # second arg is getSiteLikes self.logLike = pf.p4_treeLogLike(self.cTree, 0) # get the brLens brLens = pf.p4_getBrLens(self.cTree) for n in self.iterNodesNoRoot(): n.br.len = brLens[n.nodeNum] # get the other free prams prams = pf.p4_getFreePrams(self.cTree) self.model.restoreFreePrams(prams) if verbose: print "optLogLike = %f" % self.logLike theEndTime = time.clock() print "cpu time %s seconds." % (theEndTime - theStartTime)
def optTest(self): self._commonCStuff() theStartTime = time.clock() doXfer = 0 for i in range(1): if doXfer: self.model.setCStuff() self.setCStuff() pf.p4_setPrams(self.cTree, -1) self.logLike = pf.p4_treeLogLike(self.cTree, 0) if doXfer: # get the brLens brLens = pf.p4_getBrLens(self.cTree) for i in range(len(self.nodes)): n = self.nodes[i] if n != self.root: n.br.len = brLens[i] # get the other free prams prams = pf.p4_getFreePrams(self.cTree) self.model.restoreFreePrams(prams) print "time %s seconds." % (time.clock() - theStartTime)
def optTest(self): self._commonCStuff() theStartTime = time.clock() doXfer = 0 for i in range(1): if doXfer: self.model.setCStuff() self.setCStuff() pf.p4_setPrams(self.cTree, -1) self.logLike = pf.p4_treeLogLike(self.cTree, 0) if doXfer: # get the brLens brLens = pf.p4_getBrLens(self.cTree) for i in range(len(self.nodes)): n = self.nodes[i] if n != self.root: n.br.len = brLens[i] # get the other free prams prams = pf.p4_getFreePrams(self.cTree) self.model.restoreFreePrams(prams) print("time %s seconds." % (time.clock() - theStartTime))
def getSiteRates(self): """Get posterior mean site rate, and gamma category. This says two things -- 1. The posterior mean site rate, calculated like PAML 2. Which GDASRV category contributes most to the likelihood. The posterior mean site rate calculation requires that there be only one gdasrv over the tree, which will usually be the case. For placement in categories, if its a tie score, then it is placed in the first one. The list of site rates, and the list of categories, both with one value for each site, are put into separate numpy arrays, returned as a list, ie [siteRatesArray, categoriesArray] There is one of these lists for each data partition, and the results as a whole are returned as a list. So if you only have one data partition, then you get a 1-item list, and that single item is a list with 2 numpy arrays. Ie [[siteRatesArray, categoriesArray]] If nGammaCat for a partition is 1, it will give that partition an array of ones for the site rates and zeros for the categories. """ self._commonCStuff() self.logLike = pf.p4_treeLogLike(self.cTree, 0) # second arg is getSiteLikes #self.winningGammaCats = [] #for p in self.data.parts: # self.winningGammaCats += pf.getWinningGammaCats(p.cPart) results = [] for partNum in range(len(self.data.parts)): if len(self.model.parts[partNum].gdasrvs) > 1: gm = ['Tree.getSiteRates()'] gm.append("Part %i has %i gdasrvs. Maximum 1 allowed." % (partNum, len(self.model.parts[partNum].gdasrvs))) raise Glitch, gm for partNum in range(len(self.data.parts)): p = self.data.parts[partNum] if self.model.parts[partNum].nGammaCat == 1: siteRates = numpy.ones(p.nChar, numpy.float) gammaCats = numpy.zeros(p.nChar, numpy.int32) elif self.model.parts[partNum].nGammaCat > 1: siteRates = numpy.zeros(p.nChar, numpy.float) gammaCats = numpy.zeros(p.nChar, numpy.int32) work = numpy.zeros(self.model.parts[partNum].nGammaCat, numpy.float) for charNum in range(p.nChar): gammaCats[charNum] = -1 #pf.getWinningGammaCats(self.cTree, p.cPart, i, gammaCats, work) pf.getSiteRates(self.cTree, p.cPart, partNum, siteRates, gammaCats, work) #print siteRates #print gammaCats #print work if 0: counts = numpy.zeros(self.model.parts[partNum].nGammaCat, numpy.int32) for charNum in range(p.nChar): counts[winningGammaCats[charNum]] += 1 print counts else: raise Glitch, "This should not happen." results.append([siteRates, gammaCats]) return results
def getSiteRates(self): """Get posterior mean site rate, and gamma category. This says two things -- 1. The posterior mean site rate, calculated like PAML 2. Which GDASRV category contributes most to the likelihood. The posterior mean site rate calculation requires that there be only one gdasrv over the tree, which will usually be the case. For placement in categories, if its a tie score, then it is placed in the first one. The list of site rates, and the list of categories, both with one value for each site, are put into separate numpy arrays, returned as a list, ie [siteRatesArray, categoriesArray] There is one of these lists for each data partition, and the results as a whole are returned as a list. So if you only have one data partition, then you get a 1-item list, and that single item is a list with 2 numpy arrays. Ie [[siteRatesArray, categoriesArray]] If nGammaCat for a partition is 1, it will give that partition an array of ones for the site rates and zeros for the categories. """ self._commonCStuff() self.logLike = pf.p4_treeLogLike(self.cTree, 0) # second arg is getSiteLikes #self.winningGammaCats = [] #for p in self.data.parts: # self.winningGammaCats += pf.getWinningGammaCats(p.cPart) results = [] for partNum in range(len(self.data.parts)): if len(self.model.parts[partNum].gdasrvs) > 1: gm = ['Tree.getSiteRates()'] gm.append("Part %i has %i gdasrvs. Maximum 1 allowed." % ( partNum, len(self.model.parts[partNum].gdasrvs))) raise Glitch(gm) for partNum in range(len(self.data.parts)): p = self.data.parts[partNum] if self.model.parts[partNum].nGammaCat == 1: siteRates = numpy.ones(p.nChar, numpy.float) gammaCats = numpy.zeros(p.nChar, numpy.int32) elif self.model.parts[partNum].nGammaCat > 1: siteRates = numpy.zeros(p.nChar, numpy.float) gammaCats = numpy.zeros(p.nChar, numpy.int32) work = numpy.zeros(self.model.parts[partNum].nGammaCat, numpy.float) for charNum in range(p.nChar): gammaCats[charNum] = -1 #pf.getWinningGammaCats(self.cTree, p.cPart, i, gammaCats, work) pf.getSiteRates(self.cTree, p.cPart, partNum, siteRates, gammaCats, work) #print siteRates #print gammaCats #print work if 0: counts = numpy.zeros(self.model.parts[partNum].nGammaCat, numpy.int32) for charNum in range(p.nChar): counts[winningGammaCats[charNum]] += 1 print(counts) else: raise Glitch("This should not happen.") results.append([siteRates, gammaCats]) return results