def setUp(self): k, l = 2, 2 eta = 0.01 gamma = 1e-3 self.probabilityCalculatorObject = pc.ProbabilityCalculator(eta, k, l) self.p_gamma_distribution = pdf.probabilityDistributionFactory(2, 2).get_p_eta(gamma) self.p_eta_distribution = pdf.probabilityDistributionFactory(2, 2).get_p_eta(eta)
def setUp(self): k, l = 2, 2 eta = 0.01 gamma = 1e-3 self.probabilityCalculatorObject = pc.ProbabilityCalculator(eta, k, l) self.p_gamma_distribution = pdf.probabilityDistributionFactory( 2, 2).get_p_eta(gamma) self.p_eta_distribution = pdf.probabilityDistributionFactory( 2, 2).get_p_eta(eta)
def generateDictFromNTo_KL_divergenceList(self): """ :Effect: Convert normalizedKL_divergenceList to NToKL_divergenceList by multiplying by KLDistributionFromP_etaToUniform. """ p_eta = pdf.probabilityDistributionFactory(self.k, self.l).get_p_eta(self.eta) uniformDistribution = pdf.probabilityDistributionFactory(self.k, self.l).get_p_eta(0.0) KLDistributionFromP_etaToUniform = p_eta.KL_divergence_as_base(uniformDistribution.distribution) rawKLDivergenceList = KLDistributionFromP_etaToUniform*np.array(self.normalizedKL_divergenceList) self.NToKL_divergenceList = {} for N in self.NList: self.NToKL_divergenceList[N] = [KLDivergence for KLDivergence in rawKLDivergenceList if KLDivergence < self.NToMaxKL_DivergenceDict[N]]
def RobbinsEstimateOfEmissionProbabilityTimesCharFunctionOfTauMinusGamma(self, firstMarginal, secondMarginal, t): """ Evaluate the function estimating, from above, the probability of emission of a type of size N "close to" the the probability distribution parameterized by the triple (firstMarginal, secondMarginal, t) Before calling the actual evaluation of the probabilityCalculatorObject, checks that the three parameters (firstMarginal, secondMarginal, t) actually parameterize a valid probability distribution Only implemented for binary/binary (k=l=2 case) yet """ N = self.N #checking the marginals are within bounds or probability simplex: in case not or if the parameter t #is out of bounds we return 0 if firstMarginal > 1 - max_t_comparison_tolerance or firstMarginal < 0 + max_t_comparison_tolerance: return 0 if secondMarginal > 1 - max_t_comparison_tolerance or secondMarginal < 0 + max_t_comparison_tolerance: return 0 pathBasedAtMarginals = pdpf.probabilityDistributionPathFactory([firstMarginal, secondMarginal], self.k,self.l).construct() max_t = pathBasedAtMarginals.t_max min_t = pathBasedAtMarginals.t_min if t > max_t - max_t_comparison_tolerance or t < min_t + max_t_comparison_tolerance: return 0 KLDivergenceAt_t = pathBasedAtMarginals.KL_divergence_at_t(t) if KLDivergenceAt_t > self.gamma: return 0 else: p_gammaDistribution = pdf.probabilityDistributionFactory(self.k, self.l).distributionWrappingParameters(pathBasedAtMarginals.distribution_at_t(t)) return self.probabilityCalculatorObject.emissionProbabilityFromP_eta_ofProductLikeTypeSizeN( p_gammaDistribution, N) """
def convertMinimumGammaToMaxKL_Divergence(self, eta): """ :Parameters: eta : float, used to construct p^\eta in the below :Explanation: Max KL divergence is KL(p^\gamma_minimum \| p^\eta) unless gamma_minimum is > eta, in which case it's zero self must have NToMinimumGammaDict """ def MaxKL_Divergence(eta, p_eta, probabilityDistFactory, gamma_minimum): if gamma_minimum > eta: return 0 else: return p_eta.KL_divergence_as_base( probabilityDistFactory.get_p_eta( gamma_minimum).distribution) if not self.NToMinimumGammaDict: raise ValueError("No minimum gamma dictionary") self.NToMaxKL_DivergenceDict = {} probabilityDistFactory = pdf.probabilityDistributionFactory( self.k, self.l) p_eta = probabilityDistFactory.get_p_eta(eta) for N in self.NToMinimumGammaDict.keys(): self.NToMaxKL_DivergenceDict[N] = MaxKL_Divergence( eta, p_eta, probabilityDistFactory, self.NToMinimumGammaDict[N])
def setUp(self): self.factory = pdpf.probabilityDistributionPathFactory([0.1,0.9], 2, 2) self.path = self.factory.construct() self.factoryUniform = pdpf.probabilityDistributionPathFactory([0.5,0.5], 2, 2) self.pathUniform = self.factoryUniform.construct() self.pathUniform.markP_eta(0.01) self.distributionFactory = pdf.probabilityDistributionFactory(2,2)
def returnCDFAccountingForTypesOfModuloClassk(N, eta, k): resultCDF = CDF() p_eta = pdf.probabilityDistributionFactory(2, 2).get_p_eta(eta) resultCDF.referenceDistribution = p_eta resultCDF.setN(N) resultCDF.accountForTypesForWhichFirstEntryIs_k_Mod_10(k) return resultCDF
def generateDictFromNTo_KL_divergenceListAndGammaListIncludingGammaGreaterThanEta(self): #tested "Also gives numGammasGreaterThanEta Gammas" if self.normalizedKL_divergenceList is None or len(self.normalizedKL_divergenceList) == 0: raise Exception("Beta table has no normalized KL_divergnece list") if not self.NumberOfGammasGreaterThanEta: raise Exception("Beta table has no variable for number of gammas greater than eta") self.generateDictFromNTo_KL_divergenceList() self.generateDictFromNToGammaList() p_eta = pdf.probabilityDistributionFactory(self.k, self.l).get_p_eta(self.eta) uniformMarginals = [1.0/self.k,1.0/self.l] probabilityDistPathBasedAtUniformMarginals = pdpf.probabilityDistributionPathFactory(uniformMarginals, self.k, self.l).construct() t_max = probabilityDistPathBasedAtUniformMarginals.t_max distributionAt_t_max_OneUniformBasedPath = probabilityDistPathBasedAtUniformMarginals.distribution_at_t(t_max) KLDivergenceFromP_etaToDistributionAtTMaxOnPath = p_eta.KL_divergence_as_base(distributionAt_t_max_OneUniformBasedPath) probabilityDistPathBasedAtUniform = pdpf.probabilityDistributionPathFactory([1.0/self.k, 1.0/self.l], self.k, self.l).construct() probabilityDistPathBasedAtUniform.markP_eta(self.eta) numLgGam = int(self.NumberOfGammasGreaterThanEta) rawKLDivergenceListForGammaGreaterThanEta = KLDivergenceFromP_etaToDistributionAtTMaxOnPath* ( (1.0-tolerance)/numLgGam )*np.array(range(numLgGam+1) ) for N in self.NList: self.NToKL_divergenceList[N].extend(rawKLDivergenceListForGammaGreaterThanEta) GammaListForGammaGreaterThanEta = [ probabilityDistPathBasedAtUniform.KL_divergence_at_t( probabilityDistPathBasedAtUniform.t_at_specifiedDivergenceFromMarkedDistAwayFromBase(KLDivergence)) for KLDivergence in rawKLDivergenceListForGammaGreaterThanEta] self.NToGammaList[N] = np.append(self.NToGammaList[N],np.array(GammaListForGammaGreaterThanEta))
def convertMinimumGammaToMaxKL_Divergence(self, eta): """ :Parameters: eta : float, used to construct p^\eta in the below :Explanation: Max KL divergence is KL(p^\gamma_minimum \| p^\eta) unless gamma_minimum is > eta, in which case it's zero self must have NToMinimumGammaDict """ def MaxKL_Divergence(eta, p_eta, probabilityDistFactory, gamma_minimum): if gamma_minimum > eta: return 0 else: return p_eta.KL_divergence_as_base(probabilityDistFactory.get_p_eta(gamma_minimum).distribution) if not self.NToMinimumGammaDict: raise ValueError("No minimum gamma dictionary") self.NToMaxKL_DivergenceDict = {} probabilityDistFactory = pdf.probabilityDistributionFactory(self.k,self.l) p_eta = probabilityDistFactory.get_p_eta(eta) for N in self.NToMinimumGammaDict.keys(): self.NToMaxKL_DivergenceDict[N] = MaxKL_Divergence( eta,p_eta,probabilityDistFactory,self.NToMinimumGammaDict[N])
def returnCDFAccountingForTypesOfModuloClassk(N,eta,k): resultCDF = CDF() p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(eta) resultCDF.referenceDistribution = p_eta resultCDF.setN(N) resultCDF.accountForTypesForWhichFirstEntryIs_k_Mod_10(k) return resultCDF
def RobbinsEstimateOfEmissionProbabilityTimesCharFunctionOfTauMinusGamma( self, firstMarginal, secondMarginal, t): """ Evaluate the function estimating, from above, the probability of emission of a type of size N "close to" the the probability distribution parameterized by the triple (firstMarginal, secondMarginal, t) Before calling the actual evaluation of the probabilityCalculatorObject, checks that the three parameters (firstMarginal, secondMarginal, t) actually parameterize a valid probability distribution Only implemented for binary/binary (k=l=2 case) yet """ N = self.N #checking the marginals are within bounds or probability simplex: in case not or if the parameter t #is out of bounds we return 0 if firstMarginal > 1 - max_t_comparison_tolerance or firstMarginal < 0 + max_t_comparison_tolerance: return 0 if secondMarginal > 1 - max_t_comparison_tolerance or secondMarginal < 0 + max_t_comparison_tolerance: return 0 pathBasedAtMarginals = pdpf.probabilityDistributionPathFactory( [firstMarginal, secondMarginal], self.k, self.l).construct() max_t = pathBasedAtMarginals.t_max min_t = pathBasedAtMarginals.t_min if t > max_t - max_t_comparison_tolerance or t < min_t + max_t_comparison_tolerance: return 0 KLDivergenceAt_t = pathBasedAtMarginals.KL_divergence_at_t(t) if KLDivergenceAt_t > self.gamma: return 0 else: p_gammaDistribution = pdf.probabilityDistributionFactory( self.k, self.l).distributionWrappingParameters( pathBasedAtMarginals.distribution_at_t(t)) return self.probabilityCalculatorObject.emissionProbabilityFromP_eta_ofProductLikeTypeSizeN( p_gammaDistribution, N) """
def testAccountForAllTypesRobbins(self): p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1) self.CDF.referenceDistribution = p_eta self.CDF.setN(30) self.CDF.setn(4) self.CDF.accountForAllTypesRobbins() self.failUnlessAlmostEqual( self.CDF.assignCumulativeProbability(0.1), 0.49556072704210913)
def testAccountForAllTypesRobbins(self): p_eta = pdf.probabilityDistributionFactory(2, 2).get_p_eta(0.1) self.CDF.referenceDistribution = p_eta self.CDF.setN(30) self.CDF.setn(4) self.CDF.accountForAllTypesRobbins() self.failUnlessAlmostEqual(self.CDF.assignCumulativeProbability(0.1), 0.49556072704210913)
def setUp(self): self.factory = pdpf.probabilityDistributionPathFactory([0.1, 0.9], 2, 2) self.path = self.factory.construct() self.factoryUniform = pdpf.probabilityDistributionPathFactory( [0.5, 0.5], 2, 2) self.pathUniform = self.factoryUniform.construct() self.pathUniform.markP_eta(0.01) self.distributionFactory = pdf.probabilityDistributionFactory(2, 2)
def __init__(self, eta, k,l): ''' Constructor ''' p_eta_distribution = pdf.probabilityDistributionFactory(k,l).get_p_eta(eta) self.underlyingDistribution = p_eta_distribution self.k = k self.l = l self.m = k*l
def testAccountForAllTypesWithTwoElementPrefix(self): p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1) self.CDF.referenceDistribution = p_eta self.CDF.setN(5) prefix = tp.typePrefix(5,data=[2,1],n=4) self.CDF.accountForTypesWithPrefix(prefix) self.failUnlessAlmostEqual(self.CDF.Dictionary, {0.013844293808390619: 0.054900966738391482, 0.11849392256130019: 0.065587032834470232, 0.2911031660323688: 0.13610213450308134} )
def __init__(self): ''' Parameterless Constructor: Start with empty Discontinuity list and Probability list ''' self.AscendingDiscontinuityList = [] self.Dictionary = dict([]) #keys are elements of AscendingDiscontinuityList, values are the probabilities self.referenceDistribution = None self.N = None self.n = None self.probDistributionFactory = pdf.probabilityDistributionFactory(2,2)
def testAccountForAllTypesWithTwoElementPrefix(self): p_eta = pdf.probabilityDistributionFactory(2, 2).get_p_eta(0.1) self.CDF.referenceDistribution = p_eta self.CDF.setN(5) prefix = tp.typePrefix(5, data=[2, 1], n=4) self.CDF.accountForTypesWithPrefix(prefix) self.failUnlessAlmostEqual( self.CDF.Dictionary, { 0.013844293808390619: 0.054900966738391482, 0.11849392256130019: 0.065587032834470232, 0.2911031660323688: 0.13610213450308134 })
def generateDictFromNTo_KL_divergenceList(self): """ :Effect: Convert normalizedKL_divergenceList to NToKL_divergenceList by multiplying by KLDistributionFromP_etaToUniform. """ p_eta = pdf.probabilityDistributionFactory(self.k, self.l).get_p_eta(self.eta) uniformDistribution = pdf.probabilityDistributionFactory( self.k, self.l).get_p_eta(0.0) KLDistributionFromP_etaToUniform = p_eta.KL_divergence_as_base( uniformDistribution.distribution) rawKLDivergenceList = KLDistributionFromP_etaToUniform * np.array( self.normalizedKL_divergenceList) self.NToKL_divergenceList = {} for N in self.NList: self.NToKL_divergenceList[N] = [ KLDivergence for KLDivergence in rawKLDivergenceList if KLDivergence < self.NToMaxKL_DivergenceDict[N] ]
def __init__(self): ''' Parameterless Constructor: Start with empty Discontinuity list and Probability list ''' self.AscendingDiscontinuityList = [] self.Dictionary = dict( [] ) #keys are elements of AscendingDiscontinuityList, values are the probabilities self.referenceDistribution = None self.N = None self.n = None self.probDistributionFactory = pdf.probabilityDistributionFactory(2, 2)
def KL_DivFromP_etaOfP_gamma(self, gamma, k, l, largestKL_DivergenceForThisN=10000): probabilityDistFactoryUniformMarginals = pdf.probabilityDistributionFactory( k, l) p_eta = probabilityDistFactoryUniformMarginals.get_p_eta(self.eta) KL_DivergenceFromP_etaOfP_gamma = p_eta.KLDivergenceOfP_gammaFromDist( gamma) #largestKL_DivergenceForThisN = 10000 #TODO: eliminate return min(KL_DivergenceFromP_etaOfP_gamma, largestKL_DivergenceForThisN)
def unSerialize(self, pickleFile): #TODO: refactor to name "deSerialize" #Load in results stored in serialized pickle format #auxiliary function def N_KLDiv_Pts(arrayOfResultsForThisEta,p_eta): N_KLDiv_Pts = [tuple([alist['N'], alist['gamma'], alist['beta']]) for alist in arrayOfResultsForThisEta] N_KLDiv_Pts = np.array( N_KLDiv_Pts, dtype=[('N', '<i8'), ('KL_Div','<f8'), ('beta', '<f8')]) #import ipdb; ipdb.set_trace() for row in N_KLDiv_Pts: row['KL_Div'] = p_eta.KLDivergenceOfP_gammaFromDist(row['KL_Div']) N_KLDiv_Pts.sort(order=['N','KL_Div']) return N_KLDiv_Pts #main body: if not self.eta: raise Exception("Eta must be defined prior to unserializing results of beta computation") probabilityDistFactoryUniformMarginals = pdf.probabilityDistributionFactory(self.k, self.l) p_eta = probabilityDistFactoryUniformMarginals.get_p_eta(self.eta) listOfResults= pickle.load(open(pickleFile,'rb')) print pickleFile listOfResultsAsTuples = [tuple(alist) for alist in listOfResults] arrayOfResults = np.array(listOfResultsAsTuples, dtype=[('time', '<f8'),('eta', '<f8'), ('N', '<i8'), ('gamma', '<f8'), ('beta', '<f8')]) arrayOfResultsForThisEtaGammaLessThanEta = arrayOfResults[ arrayOfResults['eta'] == self.eta] #and arrayOfResults['gamma'] < self.eta] arrayOfResultsForThisEtaGammaLessThanEta = arrayOfResultsForThisEtaGammaLessThanEta[arrayOfResultsForThisEtaGammaLessThanEta['gamma'] < self.eta] arrayOfResultsForThisEtaGammaAtLeastEta = arrayOfResults[ arrayOfResults['eta'] == self.eta] arrayOfResultsForThisEtaGammaAtLeastEta = arrayOfResultsForThisEtaGammaAtLeastEta[ arrayOfResultsForThisEtaGammaAtLeastEta ['gamma'] >= self.eta ] self.betasByAscendingNAscendingGamma = np.array(listOfResultsAsTuples, dtype=[('time', '<f8'),('eta', '<f8'), ('N', '<i8'), ('gamma', '<f8'), ('beta', '<f8')]) self.betasByAscendingNAscendingGamma.sort(order = ['N', 'gamma']) self.N_KLDivPtsForInterpolationGammaLessThanEta = N_KLDiv_Pts(arrayOfResultsForThisEtaGammaLessThanEta,p_eta) self.N_KLDivPtsForInterpolationGammaAtLeastEta = N_KLDiv_Pts(arrayOfResultsForThisEtaGammaAtLeastEta,p_eta) #import ipdb; ipdb.set_trace() N_KLDivPtsForInterpolation_nd_GammaLessThanEta = self.N_KLDivPtsForInterpolationGammaLessThanEta[['N', 'KL_Div']].view(np.ndarray).reshape(len(self.N_KLDivPtsForInterpolationGammaLessThanEta), -1) self.points_GammaLessThanEta = np.array([list(arow[0].view(np.ndarray)) for arow in N_KLDivPtsForInterpolation_nd_GammaLessThanEta]) N_KLDivPtsForInterpolation_nd_GammaAtLeastEta = self.N_KLDivPtsForInterpolationGammaAtLeastEta[['N', 'KL_Div']].view(np.ndarray).reshape(len(self.N_KLDivPtsForInterpolationGammaAtLeastEta), -1) self.points_GammaAtLeastEta = np.array([list(arow[0].view(np.ndarray)) for arow in N_KLDivPtsForInterpolation_nd_GammaAtLeastEta]) self.valuesForInterpolationGammaLessthanEta = [np.log(row['beta']) for row in self.N_KLDivPtsForInterpolationGammaLessThanEta] self.valuesForInterpolationGammaAtLeastEta = [np.log(row['beta']) for row in self.N_KLDivPtsForInterpolationGammaAtLeastEta] self.largestN = self.betasByAscendingNAscendingGamma['N'][-1] # if N is greater than the last (largest) N in the N_KLDivPtsForInterpolation['N'], compute value of function # on this largest N and the given gamma, then second largest N and given gamma, and extrapolate from those two. self.nextToLargestN, self.nextToLargestNLastIndex = self.nextLargestNAndLastIndex(self.largestN) self.largestNFirstIndex = self.nextToLargestNLastIndex thirdLargestN,thirdLargestNLastIndex = self.nextLargestNAndLastIndex(self.nextToLargestN, self.nextToLargestNLastIndex) self.nextToLargestNFirstIndex = thirdLargestNLastIndex self.NList = sorted(set(self.betasByAscendingNAscendingGamma['N'])) self.NFirstIndexHash = iP.firstIndexHash(set(self.NList),list(self.betasByAscendingNAscendingGamma['N']))
def testAccountForType(self): """ When the (partial) CDF accounts for only one, type T=[1,1,1,4] there is only one discontinuity point, which is locted at tau(T), and is a jump in the CDF from 0 to the emission probability of T from the reference distribution """ p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1) self.CDF.referenceDistribution = p_eta self.CDF.setN(7) self.CDF.accountForType([1,1,1,4]) self.failUnlessAlmostEqual(CDF.tauOfType([1,1,1,4], 7),0.0427972344694) self.failUnlessAlmostEqual(self.CDF.Dictionary, {0.042797234469424295: 0.024888873765445504})
def testAccountForType(self): """ When the (partial) CDF accounts for only one, type T=[1,1,1,4] there is only one discontinuity point, which is locted at tau(T), and is a jump in the CDF from 0 to the emission probability of T from the reference distribution """ p_eta = pdf.probabilityDistributionFactory(2, 2).get_p_eta(0.1) self.CDF.referenceDistribution = p_eta self.CDF.setN(7) self.CDF.accountForType([1, 1, 1, 4]) self.failUnlessAlmostEqual(CDF.tauOfType([1, 1, 1, 4], 7), 0.0427972344694) self.failUnlessAlmostEqual( self.CDF.Dictionary, {0.042797234469424295: 0.024888873765445504})
def generateDictFromNTo_KL_divergenceListAndGammaListIncludingGammaGreaterThanEta( self): #tested "Also gives numGammasGreaterThanEta Gammas" if self.normalizedKL_divergenceList is None or len( self.normalizedKL_divergenceList) == 0: raise Exception("Beta table has no normalized KL_divergnece list") if not self.NumberOfGammasGreaterThanEta: raise Exception( "Beta table has no variable for number of gammas greater than eta" ) self.generateDictFromNTo_KL_divergenceList() self.generateDictFromNToGammaList() p_eta = pdf.probabilityDistributionFactory(self.k, self.l).get_p_eta(self.eta) uniformMarginals = [1.0 / self.k, 1.0 / self.l] probabilityDistPathBasedAtUniformMarginals = pdpf.probabilityDistributionPathFactory( uniformMarginals, self.k, self.l).construct() t_max = probabilityDistPathBasedAtUniformMarginals.t_max distributionAt_t_max_OneUniformBasedPath = probabilityDistPathBasedAtUniformMarginals.distribution_at_t( t_max) KLDivergenceFromP_etaToDistributionAtTMaxOnPath = p_eta.KL_divergence_as_base( distributionAt_t_max_OneUniformBasedPath) probabilityDistPathBasedAtUniform = pdpf.probabilityDistributionPathFactory( [1.0 / self.k, 1.0 / self.l], self.k, self.l).construct() probabilityDistPathBasedAtUniform.markP_eta(self.eta) numLgGam = int(self.NumberOfGammasGreaterThanEta) rawKLDivergenceListForGammaGreaterThanEta = KLDivergenceFromP_etaToDistributionAtTMaxOnPath * ( (1.0 - tolerance) / numLgGam) * np.array(range(numLgGam + 1)) for N in self.NList: self.NToKL_divergenceList[N].extend( rawKLDivergenceListForGammaGreaterThanEta) GammaListForGammaGreaterThanEta = [ probabilityDistPathBasedAtUniform.KL_divergence_at_t( probabilityDistPathBasedAtUniform. t_at_specifiedDivergenceFromMarkedDistAwayFromBase( KLDivergence)) for KLDivergence in rawKLDivergenceListForGammaGreaterThanEta ] self.NToGammaList[N] = np.append( self.NToGammaList[N], np.array(GammaListForGammaGreaterThanEta))
def testAccountForAllTypes(self): p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1) self.CDF.referenceDistribution = p_eta self.CDF.setN(30) self.CDF.setn(4) self.CDF.accountForAllTypes() self.failUnlessEqual(len(self.CDF.Dictionary), 2009) self.failUnlessEqual(len(self.CDF.AscendingDiscontinuityList), 2009) self.failUnlessAlmostEqual(self.CDF.Dictionary[self.CDF.AscendingDiscontinuityList[-1]], 1.0) self.failUnlessAlmostEqual(self.CDF.assignCumulativeProbability(0.1), 0.470627961298) self.CDF2.referenceDistribution = p_eta self.CDF2.setN(30) self.CDF2.setn(4) self.CDF2.accountForAllTypes() self.failUnlessEqual(len(self.CDF2.Dictionary), 2009) self.failUnlessEqual(len(self.CDF2.AscendingDiscontinuityList), 2009) self.failUnlessAlmostEqual(self.CDF2.Dictionary[self.CDF.AscendingDiscontinuityList[-1]], 1.0) self.failUnlessAlmostEqual(self.CDF2.assignCumulativeProbability(0.1), 0.470627961298)
def testAccountForAllTypes(self): p_eta = pdf.probabilityDistributionFactory(2, 2).get_p_eta(0.1) self.CDF.referenceDistribution = p_eta self.CDF.setN(30) self.CDF.setn(4) self.CDF.accountForAllTypes() self.failUnlessEqual(len(self.CDF.Dictionary), 2009) self.failUnlessEqual(len(self.CDF.AscendingDiscontinuityList), 2009) self.failUnlessAlmostEqual( self.CDF.Dictionary[self.CDF.AscendingDiscontinuityList[-1]], 1.0) self.failUnlessAlmostEqual(self.CDF.assignCumulativeProbability(0.1), 0.470627961298) self.CDF2.referenceDistribution = p_eta self.CDF2.setN(30) self.CDF2.setn(4) self.CDF2.accountForAllTypes() self.failUnlessEqual(len(self.CDF2.Dictionary), 2009) self.failUnlessEqual(len(self.CDF2.AscendingDiscontinuityList), 2009) self.failUnlessAlmostEqual( self.CDF2.Dictionary[self.CDF.AscendingDiscontinuityList[-1]], 1.0) self.failUnlessAlmostEqual(self.CDF2.assignCumulativeProbability(0.1), 0.470627961298)
def testRobbinsEstimatedEmissionProbability(self): p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1) aType = [200,100,100,100] N = 500 np.testing.assert_almost_equal(p_eta.exactEmissionProbability(aType, N), 2.64474060719e-19) np.testing.assert_almost_equal(p_eta.RobbinsEstimatedEmissionProbability(aType, N), 2.65202363049e-19)
def unSerialize(self, pickleFile): #TODO: refactor to name "deSerialize" #Load in results stored in serialized pickle format #auxiliary function def N_KLDiv_Pts(arrayOfResultsForThisEta, p_eta): N_KLDiv_Pts = [ tuple([alist['N'], alist['gamma'], alist['beta']]) for alist in arrayOfResultsForThisEta ] N_KLDiv_Pts = np.array(N_KLDiv_Pts, dtype=[('N', '<i8'), ('KL_Div', '<f8'), ('beta', '<f8')]) #import ipdb; ipdb.set_trace() for row in N_KLDiv_Pts: row['KL_Div'] = p_eta.KLDivergenceOfP_gammaFromDist( row['KL_Div']) N_KLDiv_Pts.sort(order=['N', 'KL_Div']) return N_KLDiv_Pts #main body: if not self.eta: raise Exception( "Eta must be defined prior to unserializing results of beta computation" ) probabilityDistFactoryUniformMarginals = pdf.probabilityDistributionFactory( self.k, self.l) p_eta = probabilityDistFactoryUniformMarginals.get_p_eta(self.eta) listOfResults = pickle.load(open(pickleFile, 'rb')) print pickleFile listOfResultsAsTuples = [tuple(alist) for alist in listOfResults] arrayOfResults = np.array(listOfResultsAsTuples, dtype=[('time', '<f8'), ('eta', '<f8'), ('N', '<i8'), ('gamma', '<f8'), ('beta', '<f8')]) arrayOfResultsForThisEtaGammaLessThanEta = arrayOfResults[ arrayOfResults['eta'] == self.eta] #and arrayOfResults['gamma'] < self.eta] arrayOfResultsForThisEtaGammaLessThanEta = arrayOfResultsForThisEtaGammaLessThanEta[ arrayOfResultsForThisEtaGammaLessThanEta['gamma'] < self.eta] arrayOfResultsForThisEtaGammaAtLeastEta = arrayOfResults[ arrayOfResults['eta'] == self.eta] arrayOfResultsForThisEtaGammaAtLeastEta = arrayOfResultsForThisEtaGammaAtLeastEta[ arrayOfResultsForThisEtaGammaAtLeastEta['gamma'] >= self.eta] self.betasByAscendingNAscendingGamma = np.array(listOfResultsAsTuples, dtype=[ ('time', '<f8'), ('eta', '<f8'), ('N', '<i8'), ('gamma', '<f8'), ('beta', '<f8') ]) self.betasByAscendingNAscendingGamma.sort(order=['N', 'gamma']) self.N_KLDivPtsForInterpolationGammaLessThanEta = N_KLDiv_Pts( arrayOfResultsForThisEtaGammaLessThanEta, p_eta) self.N_KLDivPtsForInterpolationGammaAtLeastEta = N_KLDiv_Pts( arrayOfResultsForThisEtaGammaAtLeastEta, p_eta) #import ipdb; ipdb.set_trace() N_KLDivPtsForInterpolation_nd_GammaLessThanEta = self.N_KLDivPtsForInterpolationGammaLessThanEta[ ['N', 'KL_Div']].view(np.ndarray).reshape( len(self.N_KLDivPtsForInterpolationGammaLessThanEta), -1) self.points_GammaLessThanEta = np.array([ list(arow[0].view(np.ndarray)) for arow in N_KLDivPtsForInterpolation_nd_GammaLessThanEta ]) N_KLDivPtsForInterpolation_nd_GammaAtLeastEta = self.N_KLDivPtsForInterpolationGammaAtLeastEta[ ['N', 'KL_Div']].view(np.ndarray).reshape( len(self.N_KLDivPtsForInterpolationGammaAtLeastEta), -1) self.points_GammaAtLeastEta = np.array([ list(arow[0].view(np.ndarray)) for arow in N_KLDivPtsForInterpolation_nd_GammaAtLeastEta ]) self.valuesForInterpolationGammaLessthanEta = [ np.log(row['beta']) for row in self.N_KLDivPtsForInterpolationGammaLessThanEta ] self.valuesForInterpolationGammaAtLeastEta = [ np.log(row['beta']) for row in self.N_KLDivPtsForInterpolationGammaAtLeastEta ] self.largestN = self.betasByAscendingNAscendingGamma['N'][-1] # if N is greater than the last (largest) N in the N_KLDivPtsForInterpolation['N'], compute value of function # on this largest N and the given gamma, then second largest N and given gamma, and extrapolate from those two. self.nextToLargestN, self.nextToLargestNLastIndex = self.nextLargestNAndLastIndex( self.largestN) self.largestNFirstIndex = self.nextToLargestNLastIndex thirdLargestN, thirdLargestNLastIndex = self.nextLargestNAndLastIndex( self.nextToLargestN, self.nextToLargestNLastIndex) self.nextToLargestNFirstIndex = thirdLargestNLastIndex self.NList = sorted(set(self.betasByAscendingNAscendingGamma['N'])) self.NFirstIndexHash = iP.firstIndexHash( set(self.NList), list(self.betasByAscendingNAscendingGamma['N']))
def KL_DivFromP_etaOfP_gamma(self, gamma,k,l, largestKL_DivergenceForThisN=10000): probabilityDistFactoryUniformMarginals = pdf.probabilityDistributionFactory(k, l) p_eta = probabilityDistFactoryUniformMarginals.get_p_eta(self.eta) KL_DivergenceFromP_etaOfP_gamma = p_eta.KLDivergenceOfP_gammaFromDist(gamma) #largestKL_DivergenceForThisN = 10000 #TODO: eliminate return min(KL_DivergenceFromP_etaOfP_gamma, largestKL_DivergenceForThisN)
def testExactEmissionProbability(self): p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1) np.testing.assert_almost_equal(p_eta.exactEmissionProbability([2,1,1,1],5),0.054900966738391482) np.testing.assert_almost_equal(p_eta.exactEmissionProbability([1,2,1,1],5),0.021372132192157535)
def KLDivergenceOfP_gammaFromDist(self, gamma, k=2, l=2): p_gamma = pdf.probabilityDistributionFactory(k, l).get_p_eta(gamma) return self.KL_divergence_as_base(p_gamma.distribution)
def setReferenceDistribution_p_eta(self,eta): k,l = 2,2 #hardcoding binary variables for now self.referenceDistribution = pdf.probabilityDistributionFactory(k,l).get_p_eta(eta)
def testProbabilityDistributionFactory(self): p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1) np.testing.assert_almost_equal(p_eta.distribution, np.array([[ 0.35989731, 0.14010269],[ 0.14010269, 0.35989731]]) )
def setReferenceDistribution_p_eta(self, eta): k, l = 2, 2 #hardcoding binary variables for now self.referenceDistribution = pdf.probabilityDistributionFactory( k, l).get_p_eta(eta)