def setUp(self):
     k, l = 2, 2
     eta = 0.01
     gamma = 1e-3
     self.probabilityCalculatorObject = pc.ProbabilityCalculator(eta, k, l)
     self.p_gamma_distribution = pdf.probabilityDistributionFactory(2, 2).get_p_eta(gamma)
     self.p_eta_distribution = pdf.probabilityDistributionFactory(2, 2).get_p_eta(eta)
Example #2
0
 def setUp(self):
     k, l = 2, 2
     eta = 0.01
     gamma = 1e-3
     self.probabilityCalculatorObject = pc.ProbabilityCalculator(eta, k, l)
     self.p_gamma_distribution = pdf.probabilityDistributionFactory(
         2, 2).get_p_eta(gamma)
     self.p_eta_distribution = pdf.probabilityDistributionFactory(
         2, 2).get_p_eta(eta)
Example #3
0
    def generateDictFromNTo_KL_divergenceList(self):
        """
        :Effect:
        Convert normalizedKL_divergenceList to NToKL_divergenceList
        by multiplying by KLDistributionFromP_etaToUniform.
        """

        p_eta = pdf.probabilityDistributionFactory(self.k, self.l).get_p_eta(self.eta)
        uniformDistribution = pdf.probabilityDistributionFactory(self.k, self.l).get_p_eta(0.0)
        KLDistributionFromP_etaToUniform = p_eta.KL_divergence_as_base(uniformDistribution.distribution)
        rawKLDivergenceList = KLDistributionFromP_etaToUniform*np.array(self.normalizedKL_divergenceList)
        self.NToKL_divergenceList = {}
        for N in self.NList:
            self.NToKL_divergenceList[N] = [KLDivergence for KLDivergence in rawKLDivergenceList if KLDivergence < self.NToMaxKL_DivergenceDict[N]]
 def RobbinsEstimateOfEmissionProbabilityTimesCharFunctionOfTauMinusGamma(self, firstMarginal, secondMarginal, t):
     """
     Evaluate the function estimating, from above, the probability of emission of a type of size N "close to" 
     the the probability distribution parameterized by the triple (firstMarginal, secondMarginal, t)
     Before calling the actual evaluation of the probabilityCalculatorObject, checks that
     the three parameters (firstMarginal, secondMarginal, t) actually parameterize a valid probability distribution
     Only implemented for binary/binary (k=l=2 case) yet
     """
     N = self.N
     #checking the marginals are within bounds or probability simplex: in case not or if the parameter t
     #is out of bounds we return 0
     if firstMarginal > 1 - max_t_comparison_tolerance or firstMarginal < 0 + max_t_comparison_tolerance:
         return 0
     if secondMarginal > 1 - max_t_comparison_tolerance or secondMarginal < 0 + max_t_comparison_tolerance:
         return 0
     pathBasedAtMarginals = pdpf.probabilityDistributionPathFactory([firstMarginal, secondMarginal], self.k,self.l).construct()
     max_t = pathBasedAtMarginals.t_max
     min_t = pathBasedAtMarginals.t_min
     if t > max_t - max_t_comparison_tolerance or t < min_t + max_t_comparison_tolerance:
         return 0
     KLDivergenceAt_t = pathBasedAtMarginals.KL_divergence_at_t(t)
     if KLDivergenceAt_t > self.gamma:
         return 0
     else:
         p_gammaDistribution = pdf.probabilityDistributionFactory(self.k, self.l).distributionWrappingParameters(pathBasedAtMarginals.distribution_at_t(t))
         return self.probabilityCalculatorObject.emissionProbabilityFromP_eta_ofProductLikeTypeSizeN( p_gammaDistribution, N)  
     
     """
Example #5
0
    def convertMinimumGammaToMaxKL_Divergence(self, eta):
        """
        :Parameters:
        eta : float, used to construct p^\eta in the below
        
        :Explanation:
        Max KL divergence is KL(p^\gamma_minimum \| p^\eta)
        unless gamma_minimum is > eta, in which case it's zero
        
        self must have NToMinimumGammaDict
        """
        def MaxKL_Divergence(eta, p_eta, probabilityDistFactory,
                             gamma_minimum):
            if gamma_minimum > eta:
                return 0
            else:
                return p_eta.KL_divergence_as_base(
                    probabilityDistFactory.get_p_eta(
                        gamma_minimum).distribution)

        if not self.NToMinimumGammaDict:
            raise ValueError("No minimum gamma dictionary")
        self.NToMaxKL_DivergenceDict = {}
        probabilityDistFactory = pdf.probabilityDistributionFactory(
            self.k, self.l)
        p_eta = probabilityDistFactory.get_p_eta(eta)
        for N in self.NToMinimumGammaDict.keys():
            self.NToMaxKL_DivergenceDict[N] = MaxKL_Divergence(
                eta, p_eta, probabilityDistFactory,
                self.NToMinimumGammaDict[N])
 def setUp(self):
     self.factory = pdpf.probabilityDistributionPathFactory([0.1,0.9], 2, 2)
     self.path = self.factory.construct()
     self.factoryUniform = pdpf.probabilityDistributionPathFactory([0.5,0.5], 2, 2)
     self.pathUniform = self.factoryUniform.construct()
     self.pathUniform.markP_eta(0.01)
     self.distributionFactory = pdf.probabilityDistributionFactory(2,2)
Example #7
0
def returnCDFAccountingForTypesOfModuloClassk(N, eta, k):
    resultCDF = CDF()
    p_eta = pdf.probabilityDistributionFactory(2, 2).get_p_eta(eta)
    resultCDF.referenceDistribution = p_eta
    resultCDF.setN(N)
    resultCDF.accountForTypesForWhichFirstEntryIs_k_Mod_10(k)
    return resultCDF
Example #8
0
    def generateDictFromNTo_KL_divergenceListAndGammaListIncludingGammaGreaterThanEta(self): #tested
        "Also gives numGammasGreaterThanEta Gammas"

        if self.normalizedKL_divergenceList is None or len(self.normalizedKL_divergenceList) == 0:
            raise Exception("Beta table has no normalized KL_divergnece list")
        if not self.NumberOfGammasGreaterThanEta:
            raise Exception("Beta table has no variable for number of gammas greater than eta")
        self.generateDictFromNTo_KL_divergenceList()
        self.generateDictFromNToGammaList()
        p_eta = pdf.probabilityDistributionFactory(self.k, self.l).get_p_eta(self.eta)

        uniformMarginals = [1.0/self.k,1.0/self.l]
        probabilityDistPathBasedAtUniformMarginals = pdpf.probabilityDistributionPathFactory(uniformMarginals, self.k, self.l).construct()
        t_max = probabilityDistPathBasedAtUniformMarginals.t_max
        distributionAt_t_max_OneUniformBasedPath = probabilityDistPathBasedAtUniformMarginals.distribution_at_t(t_max)
        KLDivergenceFromP_etaToDistributionAtTMaxOnPath = p_eta.KL_divergence_as_base(distributionAt_t_max_OneUniformBasedPath)

        probabilityDistPathBasedAtUniform = pdpf.probabilityDistributionPathFactory([1.0/self.k, 1.0/self.l], self.k, self.l).construct()
        probabilityDistPathBasedAtUniform.markP_eta(self.eta)

        numLgGam = int(self.NumberOfGammasGreaterThanEta)
        rawKLDivergenceListForGammaGreaterThanEta = KLDivergenceFromP_etaToDistributionAtTMaxOnPath* ( (1.0-tolerance)/numLgGam )*np.array(range(numLgGam+1) )


        for N in self.NList:
            self.NToKL_divergenceList[N].extend(rawKLDivergenceListForGammaGreaterThanEta)
            GammaListForGammaGreaterThanEta = [ probabilityDistPathBasedAtUniform.KL_divergence_at_t(
                probabilityDistPathBasedAtUniform.t_at_specifiedDivergenceFromMarkedDistAwayFromBase(KLDivergence)) for KLDivergence in rawKLDivergenceListForGammaGreaterThanEta]
            self.NToGammaList[N] = np.append(self.NToGammaList[N],np.array(GammaListForGammaGreaterThanEta))
Example #9
0
    def convertMinimumGammaToMaxKL_Divergence(self, eta):
        """
        :Parameters:
        eta : float, used to construct p^\eta in the below
        
        :Explanation:
        Max KL divergence is KL(p^\gamma_minimum \| p^\eta)
        unless gamma_minimum is > eta, in which case it's zero
        
        self must have NToMinimumGammaDict
        """
        def MaxKL_Divergence(eta, p_eta, probabilityDistFactory, gamma_minimum):
            if gamma_minimum > eta:
                return 0
            else:
                return p_eta.KL_divergence_as_base(probabilityDistFactory.get_p_eta(gamma_minimum).distribution)

        if not self.NToMinimumGammaDict:
            raise ValueError("No minimum gamma dictionary")
        self.NToMaxKL_DivergenceDict = {}
        probabilityDistFactory = pdf.probabilityDistributionFactory(self.k,self.l)
        p_eta = probabilityDistFactory.get_p_eta(eta)
        for N in self.NToMinimumGammaDict.keys():
            self.NToMaxKL_DivergenceDict[N] = MaxKL_Divergence(
                eta,p_eta,probabilityDistFactory,self.NToMinimumGammaDict[N])
Example #10
0
def returnCDFAccountingForTypesOfModuloClassk(N,eta,k):
    resultCDF = CDF()
    p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(eta)
    resultCDF.referenceDistribution = p_eta
    resultCDF.setN(N)
    resultCDF.accountForTypesForWhichFirstEntryIs_k_Mod_10(k)
    return resultCDF
 def RobbinsEstimateOfEmissionProbabilityTimesCharFunctionOfTauMinusGamma(
         self, firstMarginal, secondMarginal, t):
     """
     Evaluate the function estimating, from above, the probability of emission of a type of size N "close to" 
     the the probability distribution parameterized by the triple (firstMarginal, secondMarginal, t)
     Before calling the actual evaluation of the probabilityCalculatorObject, checks that
     the three parameters (firstMarginal, secondMarginal, t) actually parameterize a valid probability distribution
     Only implemented for binary/binary (k=l=2 case) yet
     """
     N = self.N
     #checking the marginals are within bounds or probability simplex: in case not or if the parameter t
     #is out of bounds we return 0
     if firstMarginal > 1 - max_t_comparison_tolerance or firstMarginal < 0 + max_t_comparison_tolerance:
         return 0
     if secondMarginal > 1 - max_t_comparison_tolerance or secondMarginal < 0 + max_t_comparison_tolerance:
         return 0
     pathBasedAtMarginals = pdpf.probabilityDistributionPathFactory(
         [firstMarginal, secondMarginal], self.k, self.l).construct()
     max_t = pathBasedAtMarginals.t_max
     min_t = pathBasedAtMarginals.t_min
     if t > max_t - max_t_comparison_tolerance or t < min_t + max_t_comparison_tolerance:
         return 0
     KLDivergenceAt_t = pathBasedAtMarginals.KL_divergence_at_t(t)
     if KLDivergenceAt_t > self.gamma:
         return 0
     else:
         p_gammaDistribution = pdf.probabilityDistributionFactory(
             self.k, self.l).distributionWrappingParameters(
                 pathBasedAtMarginals.distribution_at_t(t))
         return self.probabilityCalculatorObject.emissionProbabilityFromP_eta_ofProductLikeTypeSizeN(
             p_gammaDistribution, N)
     """
Example #12
0
 def testAccountForAllTypesRobbins(self):
     p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1)
     self.CDF.referenceDistribution = p_eta
     self.CDF.setN(30)
     self.CDF.setn(4)
     self.CDF.accountForAllTypesRobbins()
     self.failUnlessAlmostEqual(
       self.CDF.assignCumulativeProbability(0.1), 0.49556072704210913)
Example #13
0
 def testAccountForAllTypesRobbins(self):
     p_eta = pdf.probabilityDistributionFactory(2, 2).get_p_eta(0.1)
     self.CDF.referenceDistribution = p_eta
     self.CDF.setN(30)
     self.CDF.setn(4)
     self.CDF.accountForAllTypesRobbins()
     self.failUnlessAlmostEqual(self.CDF.assignCumulativeProbability(0.1),
                                0.49556072704210913)
 def setUp(self):
     self.factory = pdpf.probabilityDistributionPathFactory([0.1, 0.9], 2,
                                                            2)
     self.path = self.factory.construct()
     self.factoryUniform = pdpf.probabilityDistributionPathFactory(
         [0.5, 0.5], 2, 2)
     self.pathUniform = self.factoryUniform.construct()
     self.pathUniform.markP_eta(0.01)
     self.distributionFactory = pdf.probabilityDistributionFactory(2, 2)
Example #15
0
 def __init__(self, eta, k,l):
     '''
     Constructor
     '''
     p_eta_distribution = pdf.probabilityDistributionFactory(k,l).get_p_eta(eta)  
     self.underlyingDistribution = p_eta_distribution
     self.k = k
     self.l = l
     self.m = k*l
Example #16
0
 def testAccountForAllTypesWithTwoElementPrefix(self):
     p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1)
     self.CDF.referenceDistribution = p_eta
     self.CDF.setN(5)
     prefix = tp.typePrefix(5,data=[2,1],n=4)
     self.CDF.accountForTypesWithPrefix(prefix)
     self.failUnlessAlmostEqual(self.CDF.Dictionary, {0.013844293808390619: 0.054900966738391482,
                                                      0.11849392256130019: 0.065587032834470232,
                                                      0.2911031660323688: 0.13610213450308134} )
Example #17
0
 def __init__(self):
     '''
     Parameterless Constructor: Start with empty Discontinuity list and Probability list
     '''
     self.AscendingDiscontinuityList = []
     self.Dictionary = dict([])  #keys are elements of AscendingDiscontinuityList, values are the probabilities
     self.referenceDistribution = None
     self.N = None
     self.n = None
     self.probDistributionFactory = pdf.probabilityDistributionFactory(2,2)
Example #18
0
 def testAccountForAllTypesWithTwoElementPrefix(self):
     p_eta = pdf.probabilityDistributionFactory(2, 2).get_p_eta(0.1)
     self.CDF.referenceDistribution = p_eta
     self.CDF.setN(5)
     prefix = tp.typePrefix(5, data=[2, 1], n=4)
     self.CDF.accountForTypesWithPrefix(prefix)
     self.failUnlessAlmostEqual(
         self.CDF.Dictionary, {
             0.013844293808390619: 0.054900966738391482,
             0.11849392256130019: 0.065587032834470232,
             0.2911031660323688: 0.13610213450308134
         })
Example #19
0
    def generateDictFromNTo_KL_divergenceList(self):
        """
        :Effect:
        Convert normalizedKL_divergenceList to NToKL_divergenceList
        by multiplying by KLDistributionFromP_etaToUniform.
        """

        p_eta = pdf.probabilityDistributionFactory(self.k,
                                                   self.l).get_p_eta(self.eta)
        uniformDistribution = pdf.probabilityDistributionFactory(
            self.k, self.l).get_p_eta(0.0)
        KLDistributionFromP_etaToUniform = p_eta.KL_divergence_as_base(
            uniformDistribution.distribution)
        rawKLDivergenceList = KLDistributionFromP_etaToUniform * np.array(
            self.normalizedKL_divergenceList)
        self.NToKL_divergenceList = {}
        for N in self.NList:
            self.NToKL_divergenceList[N] = [
                KLDivergence for KLDivergence in rawKLDivergenceList
                if KLDivergence < self.NToMaxKL_DivergenceDict[N]
            ]
Example #20
0
 def __init__(self):
     '''
     Parameterless Constructor: Start with empty Discontinuity list and Probability list
     '''
     self.AscendingDiscontinuityList = []
     self.Dictionary = dict(
         []
     )  #keys are elements of AscendingDiscontinuityList, values are the probabilities
     self.referenceDistribution = None
     self.N = None
     self.n = None
     self.probDistributionFactory = pdf.probabilityDistributionFactory(2, 2)
Example #21
0
 def KL_DivFromP_etaOfP_gamma(self,
                              gamma,
                              k,
                              l,
                              largestKL_DivergenceForThisN=10000):
     probabilityDistFactoryUniformMarginals = pdf.probabilityDistributionFactory(
         k, l)
     p_eta = probabilityDistFactoryUniformMarginals.get_p_eta(self.eta)
     KL_DivergenceFromP_etaOfP_gamma = p_eta.KLDivergenceOfP_gammaFromDist(
         gamma)
     #largestKL_DivergenceForThisN =  10000 #TODO: eliminate
     return min(KL_DivergenceFromP_etaOfP_gamma,
                largestKL_DivergenceForThisN)
Example #22
0
    def unSerialize(self, pickleFile):     #TODO: refactor to name "deSerialize" 
        #Load in results stored in serialized pickle format
        #auxiliary function
        def N_KLDiv_Pts(arrayOfResultsForThisEta,p_eta):
            N_KLDiv_Pts = [tuple([alist['N'], alist['gamma'], alist['beta']]) for alist in arrayOfResultsForThisEta]
            N_KLDiv_Pts = np.array( N_KLDiv_Pts, dtype=[('N', '<i8'), ('KL_Div','<f8'), ('beta', '<f8')])
            #import ipdb; ipdb.set_trace() 
            for row in N_KLDiv_Pts:
                row['KL_Div'] = p_eta.KLDivergenceOfP_gammaFromDist(row['KL_Div'])
            N_KLDiv_Pts.sort(order=['N','KL_Div'])
            return N_KLDiv_Pts

        #main body:
        if not self.eta:
            raise Exception("Eta must be defined prior to unserializing results of beta computation")
        probabilityDistFactoryUniformMarginals = pdf.probabilityDistributionFactory(self.k, self.l)
        p_eta = probabilityDistFactoryUniformMarginals.get_p_eta(self.eta)
        listOfResults= pickle.load(open(pickleFile,'rb'))
        print pickleFile
        listOfResultsAsTuples = [tuple(alist) for alist in listOfResults]
        arrayOfResults = np.array(listOfResultsAsTuples, dtype=[('time', '<f8'),('eta', '<f8'), ('N', '<i8'), ('gamma', '<f8'), ('beta', '<f8')])
        arrayOfResultsForThisEtaGammaLessThanEta = arrayOfResults[ arrayOfResults['eta'] == self.eta] #and arrayOfResults['gamma'] < self.eta]
        arrayOfResultsForThisEtaGammaLessThanEta = arrayOfResultsForThisEtaGammaLessThanEta[arrayOfResultsForThisEtaGammaLessThanEta['gamma'] < self.eta]
        arrayOfResultsForThisEtaGammaAtLeastEta =  arrayOfResults[ arrayOfResults['eta'] == self.eta]
        arrayOfResultsForThisEtaGammaAtLeastEta = arrayOfResultsForThisEtaGammaAtLeastEta[ arrayOfResultsForThisEtaGammaAtLeastEta ['gamma'] >= self.eta ]

        self.betasByAscendingNAscendingGamma = np.array(listOfResultsAsTuples, dtype=[('time', '<f8'),('eta', '<f8'), ('N', '<i8'), ('gamma', '<f8'), ('beta', '<f8')])
        self.betasByAscendingNAscendingGamma.sort(order = ['N', 'gamma'])

        self.N_KLDivPtsForInterpolationGammaLessThanEta =  N_KLDiv_Pts(arrayOfResultsForThisEtaGammaLessThanEta,p_eta)
        self.N_KLDivPtsForInterpolationGammaAtLeastEta = N_KLDiv_Pts(arrayOfResultsForThisEtaGammaAtLeastEta,p_eta)
        #import ipdb; ipdb.set_trace()     
        N_KLDivPtsForInterpolation_nd_GammaLessThanEta = self.N_KLDivPtsForInterpolationGammaLessThanEta[['N', 'KL_Div']].view(np.ndarray).reshape(len(self.N_KLDivPtsForInterpolationGammaLessThanEta), -1)
        self.points_GammaLessThanEta = np.array([list(arow[0].view(np.ndarray)) for arow in N_KLDivPtsForInterpolation_nd_GammaLessThanEta])
        N_KLDivPtsForInterpolation_nd_GammaAtLeastEta = self.N_KLDivPtsForInterpolationGammaAtLeastEta[['N', 'KL_Div']].view(np.ndarray).reshape(len(self.N_KLDivPtsForInterpolationGammaAtLeastEta), -1)
        self.points_GammaAtLeastEta = np.array([list(arow[0].view(np.ndarray)) for arow in N_KLDivPtsForInterpolation_nd_GammaAtLeastEta])
        self.valuesForInterpolationGammaLessthanEta = [np.log(row['beta']) for row in self.N_KLDivPtsForInterpolationGammaLessThanEta]

        self.valuesForInterpolationGammaAtLeastEta = [np.log(row['beta']) for row in self.N_KLDivPtsForInterpolationGammaAtLeastEta]

        self.largestN = self.betasByAscendingNAscendingGamma['N'][-1]
        #  if N is greater than the last (largest) N in the N_KLDivPtsForInterpolation['N'], compute value of function
        #  on this largest N and the given gamma, then second largest N and given gamma, and extrapolate from those two.
        self.nextToLargestN, self.nextToLargestNLastIndex = self.nextLargestNAndLastIndex(self.largestN)

        self.largestNFirstIndex = self.nextToLargestNLastIndex
        thirdLargestN,thirdLargestNLastIndex = self.nextLargestNAndLastIndex(self.nextToLargestN, self.nextToLargestNLastIndex)
        self.nextToLargestNFirstIndex = thirdLargestNLastIndex
        self.NList = sorted(set(self.betasByAscendingNAscendingGamma['N']))

        self.NFirstIndexHash = iP.firstIndexHash(set(self.NList),list(self.betasByAscendingNAscendingGamma['N']))
Example #23
0
 def testAccountForType(self):
     """
     When the (partial) CDF accounts for only one, type
       T=[1,1,1,4]
     there is only one discontinuity point, which is locted at tau(T),
     and is a jump in the CDF from 0 to the emission probability of T
     from the reference distribution
     """
     p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1)
     self.CDF.referenceDistribution = p_eta
     self.CDF.setN(7)
     self.CDF.accountForType([1,1,1,4])
     self.failUnlessAlmostEqual(CDF.tauOfType([1,1,1,4], 7),0.0427972344694)
     self.failUnlessAlmostEqual(self.CDF.Dictionary, {0.042797234469424295: 0.024888873765445504})
Example #24
0
 def testAccountForType(self):
     """
     When the (partial) CDF accounts for only one, type
       T=[1,1,1,4]
     there is only one discontinuity point, which is locted at tau(T),
     and is a jump in the CDF from 0 to the emission probability of T
     from the reference distribution
     """
     p_eta = pdf.probabilityDistributionFactory(2, 2).get_p_eta(0.1)
     self.CDF.referenceDistribution = p_eta
     self.CDF.setN(7)
     self.CDF.accountForType([1, 1, 1, 4])
     self.failUnlessAlmostEqual(CDF.tauOfType([1, 1, 1, 4], 7),
                                0.0427972344694)
     self.failUnlessAlmostEqual(
         self.CDF.Dictionary, {0.042797234469424295: 0.024888873765445504})
Example #25
0
    def generateDictFromNTo_KL_divergenceListAndGammaListIncludingGammaGreaterThanEta(
            self):  #tested
        "Also gives numGammasGreaterThanEta Gammas"

        if self.normalizedKL_divergenceList is None or len(
                self.normalizedKL_divergenceList) == 0:
            raise Exception("Beta table has no normalized KL_divergnece list")
        if not self.NumberOfGammasGreaterThanEta:
            raise Exception(
                "Beta table has no variable for number of gammas greater than eta"
            )
        self.generateDictFromNTo_KL_divergenceList()
        self.generateDictFromNToGammaList()
        p_eta = pdf.probabilityDistributionFactory(self.k,
                                                   self.l).get_p_eta(self.eta)

        uniformMarginals = [1.0 / self.k, 1.0 / self.l]
        probabilityDistPathBasedAtUniformMarginals = pdpf.probabilityDistributionPathFactory(
            uniformMarginals, self.k, self.l).construct()
        t_max = probabilityDistPathBasedAtUniformMarginals.t_max
        distributionAt_t_max_OneUniformBasedPath = probabilityDistPathBasedAtUniformMarginals.distribution_at_t(
            t_max)
        KLDivergenceFromP_etaToDistributionAtTMaxOnPath = p_eta.KL_divergence_as_base(
            distributionAt_t_max_OneUniformBasedPath)

        probabilityDistPathBasedAtUniform = pdpf.probabilityDistributionPathFactory(
            [1.0 / self.k, 1.0 / self.l], self.k, self.l).construct()
        probabilityDistPathBasedAtUniform.markP_eta(self.eta)

        numLgGam = int(self.NumberOfGammasGreaterThanEta)
        rawKLDivergenceListForGammaGreaterThanEta = KLDivergenceFromP_etaToDistributionAtTMaxOnPath * (
            (1.0 - tolerance) / numLgGam) * np.array(range(numLgGam + 1))

        for N in self.NList:
            self.NToKL_divergenceList[N].extend(
                rawKLDivergenceListForGammaGreaterThanEta)
            GammaListForGammaGreaterThanEta = [
                probabilityDistPathBasedAtUniform.KL_divergence_at_t(
                    probabilityDistPathBasedAtUniform.
                    t_at_specifiedDivergenceFromMarkedDistAwayFromBase(
                        KLDivergence))
                for KLDivergence in rawKLDivergenceListForGammaGreaterThanEta
            ]
            self.NToGammaList[N] = np.append(
                self.NToGammaList[N],
                np.array(GammaListForGammaGreaterThanEta))
Example #26
0
 def testAccountForAllTypes(self):
     p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1)
     self.CDF.referenceDistribution = p_eta
     self.CDF.setN(30)
     self.CDF.setn(4)
     self.CDF.accountForAllTypes()
     self.failUnlessEqual(len(self.CDF.Dictionary), 2009)
     self.failUnlessEqual(len(self.CDF.AscendingDiscontinuityList), 2009)
     self.failUnlessAlmostEqual(self.CDF.Dictionary[self.CDF.AscendingDiscontinuityList[-1]], 1.0)
     self.failUnlessAlmostEqual(self.CDF.assignCumulativeProbability(0.1), 0.470627961298)
     
     self.CDF2.referenceDistribution = p_eta
     self.CDF2.setN(30)
     self.CDF2.setn(4)
     self.CDF2.accountForAllTypes()
     self.failUnlessEqual(len(self.CDF2.Dictionary), 2009)
     self.failUnlessEqual(len(self.CDF2.AscendingDiscontinuityList), 2009)
     self.failUnlessAlmostEqual(self.CDF2.Dictionary[self.CDF.AscendingDiscontinuityList[-1]], 1.0)
     self.failUnlessAlmostEqual(self.CDF2.assignCumulativeProbability(0.1), 0.470627961298)
Example #27
0
    def testAccountForAllTypes(self):
        p_eta = pdf.probabilityDistributionFactory(2, 2).get_p_eta(0.1)
        self.CDF.referenceDistribution = p_eta
        self.CDF.setN(30)
        self.CDF.setn(4)
        self.CDF.accountForAllTypes()
        self.failUnlessEqual(len(self.CDF.Dictionary), 2009)
        self.failUnlessEqual(len(self.CDF.AscendingDiscontinuityList), 2009)
        self.failUnlessAlmostEqual(
            self.CDF.Dictionary[self.CDF.AscendingDiscontinuityList[-1]], 1.0)
        self.failUnlessAlmostEqual(self.CDF.assignCumulativeProbability(0.1),
                                   0.470627961298)

        self.CDF2.referenceDistribution = p_eta
        self.CDF2.setN(30)
        self.CDF2.setn(4)
        self.CDF2.accountForAllTypes()
        self.failUnlessEqual(len(self.CDF2.Dictionary), 2009)
        self.failUnlessEqual(len(self.CDF2.AscendingDiscontinuityList), 2009)
        self.failUnlessAlmostEqual(
            self.CDF2.Dictionary[self.CDF.AscendingDiscontinuityList[-1]], 1.0)
        self.failUnlessAlmostEqual(self.CDF2.assignCumulativeProbability(0.1),
                                   0.470627961298)
 def testRobbinsEstimatedEmissionProbability(self):
     p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1)
     aType = [200,100,100,100]
     N = 500
     np.testing.assert_almost_equal(p_eta.exactEmissionProbability(aType, N), 2.64474060719e-19)
     np.testing.assert_almost_equal(p_eta.RobbinsEstimatedEmissionProbability(aType, N), 2.65202363049e-19)
Example #29
0
    def unSerialize(self, pickleFile):  #TODO: refactor to name "deSerialize"
        #Load in results stored in serialized pickle format
        #auxiliary function
        def N_KLDiv_Pts(arrayOfResultsForThisEta, p_eta):
            N_KLDiv_Pts = [
                tuple([alist['N'], alist['gamma'], alist['beta']])
                for alist in arrayOfResultsForThisEta
            ]
            N_KLDiv_Pts = np.array(N_KLDiv_Pts,
                                   dtype=[('N', '<i8'), ('KL_Div', '<f8'),
                                          ('beta', '<f8')])
            #import ipdb; ipdb.set_trace()
            for row in N_KLDiv_Pts:
                row['KL_Div'] = p_eta.KLDivergenceOfP_gammaFromDist(
                    row['KL_Div'])
            N_KLDiv_Pts.sort(order=['N', 'KL_Div'])
            return N_KLDiv_Pts

        #main body:
        if not self.eta:
            raise Exception(
                "Eta must be defined prior to unserializing results of beta computation"
            )
        probabilityDistFactoryUniformMarginals = pdf.probabilityDistributionFactory(
            self.k, self.l)
        p_eta = probabilityDistFactoryUniformMarginals.get_p_eta(self.eta)
        listOfResults = pickle.load(open(pickleFile, 'rb'))
        print pickleFile
        listOfResultsAsTuples = [tuple(alist) for alist in listOfResults]
        arrayOfResults = np.array(listOfResultsAsTuples,
                                  dtype=[('time', '<f8'), ('eta', '<f8'),
                                         ('N', '<i8'), ('gamma', '<f8'),
                                         ('beta', '<f8')])
        arrayOfResultsForThisEtaGammaLessThanEta = arrayOfResults[
            arrayOfResults['eta'] ==
            self.eta]  #and arrayOfResults['gamma'] < self.eta]
        arrayOfResultsForThisEtaGammaLessThanEta = arrayOfResultsForThisEtaGammaLessThanEta[
            arrayOfResultsForThisEtaGammaLessThanEta['gamma'] < self.eta]
        arrayOfResultsForThisEtaGammaAtLeastEta = arrayOfResults[
            arrayOfResults['eta'] == self.eta]
        arrayOfResultsForThisEtaGammaAtLeastEta = arrayOfResultsForThisEtaGammaAtLeastEta[
            arrayOfResultsForThisEtaGammaAtLeastEta['gamma'] >= self.eta]

        self.betasByAscendingNAscendingGamma = np.array(listOfResultsAsTuples,
                                                        dtype=[
                                                            ('time', '<f8'),
                                                            ('eta', '<f8'),
                                                            ('N', '<i8'),
                                                            ('gamma', '<f8'),
                                                            ('beta', '<f8')
                                                        ])
        self.betasByAscendingNAscendingGamma.sort(order=['N', 'gamma'])

        self.N_KLDivPtsForInterpolationGammaLessThanEta = N_KLDiv_Pts(
            arrayOfResultsForThisEtaGammaLessThanEta, p_eta)
        self.N_KLDivPtsForInterpolationGammaAtLeastEta = N_KLDiv_Pts(
            arrayOfResultsForThisEtaGammaAtLeastEta, p_eta)
        #import ipdb; ipdb.set_trace()
        N_KLDivPtsForInterpolation_nd_GammaLessThanEta = self.N_KLDivPtsForInterpolationGammaLessThanEta[
            ['N', 'KL_Div']].view(np.ndarray).reshape(
                len(self.N_KLDivPtsForInterpolationGammaLessThanEta), -1)
        self.points_GammaLessThanEta = np.array([
            list(arow[0].view(np.ndarray))
            for arow in N_KLDivPtsForInterpolation_nd_GammaLessThanEta
        ])
        N_KLDivPtsForInterpolation_nd_GammaAtLeastEta = self.N_KLDivPtsForInterpolationGammaAtLeastEta[
            ['N', 'KL_Div']].view(np.ndarray).reshape(
                len(self.N_KLDivPtsForInterpolationGammaAtLeastEta), -1)
        self.points_GammaAtLeastEta = np.array([
            list(arow[0].view(np.ndarray))
            for arow in N_KLDivPtsForInterpolation_nd_GammaAtLeastEta
        ])
        self.valuesForInterpolationGammaLessthanEta = [
            np.log(row['beta'])
            for row in self.N_KLDivPtsForInterpolationGammaLessThanEta
        ]

        self.valuesForInterpolationGammaAtLeastEta = [
            np.log(row['beta'])
            for row in self.N_KLDivPtsForInterpolationGammaAtLeastEta
        ]

        self.largestN = self.betasByAscendingNAscendingGamma['N'][-1]
        #  if N is greater than the last (largest) N in the N_KLDivPtsForInterpolation['N'], compute value of function
        #  on this largest N and the given gamma, then second largest N and given gamma, and extrapolate from those two.
        self.nextToLargestN, self.nextToLargestNLastIndex = self.nextLargestNAndLastIndex(
            self.largestN)

        self.largestNFirstIndex = self.nextToLargestNLastIndex
        thirdLargestN, thirdLargestNLastIndex = self.nextLargestNAndLastIndex(
            self.nextToLargestN, self.nextToLargestNLastIndex)
        self.nextToLargestNFirstIndex = thirdLargestNLastIndex
        self.NList = sorted(set(self.betasByAscendingNAscendingGamma['N']))

        self.NFirstIndexHash = iP.firstIndexHash(
            set(self.NList), list(self.betasByAscendingNAscendingGamma['N']))
Example #30
0
 def KL_DivFromP_etaOfP_gamma(self, gamma,k,l, largestKL_DivergenceForThisN=10000):
     probabilityDistFactoryUniformMarginals = pdf.probabilityDistributionFactory(k, l)
     p_eta = probabilityDistFactoryUniformMarginals.get_p_eta(self.eta)
     KL_DivergenceFromP_etaOfP_gamma = p_eta.KLDivergenceOfP_gammaFromDist(gamma)
     #largestKL_DivergenceForThisN =  10000 #TODO: eliminate
     return min(KL_DivergenceFromP_etaOfP_gamma, largestKL_DivergenceForThisN)
 def testExactEmissionProbability(self):
     p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1)
     np.testing.assert_almost_equal(p_eta.exactEmissionProbability([2,1,1,1],5),0.054900966738391482)
     np.testing.assert_almost_equal(p_eta.exactEmissionProbability([1,2,1,1],5),0.021372132192157535)
 def KLDivergenceOfP_gammaFromDist(self, gamma, k=2, l=2):
     p_gamma  = pdf.probabilityDistributionFactory(k, l).get_p_eta(gamma)
     return self.KL_divergence_as_base(p_gamma.distribution)
Example #33
0
 def setReferenceDistribution_p_eta(self,eta):
     k,l = 2,2  #hardcoding binary variables for now
     self.referenceDistribution = pdf.probabilityDistributionFactory(k,l).get_p_eta(eta)
 def testProbabilityDistributionFactory(self):
     p_eta = pdf.probabilityDistributionFactory(2,2).get_p_eta(0.1)
     np.testing.assert_almost_equal(p_eta.distribution, np.array([[ 0.35989731,  0.14010269],[ 0.14010269,  0.35989731]]) ) 
Example #35
0
 def setReferenceDistribution_p_eta(self, eta):
     k, l = 2, 2  #hardcoding binary variables for now
     self.referenceDistribution = pdf.probabilityDistributionFactory(
         k, l).get_p_eta(eta)