Python MCSamples.removeBurn Examples

Programming Language: Python

Namespace/Package Name: getdist

Class/Type: MCSamples

Method/Function: removeBurn

Examples at hotexamples.com: 2

Python MCSamples.removeBurn - 2 examples found. These are the top rated real world Python examples of getdist.MCSamples.removeBurn extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MCSamples(30)

updateSettings(7)

updateBaseStatistics(5)

makeSingle(3)

setSamples(3)

deleteFixedParams(3)

getLikeStats(2)

getMargeStats(2)

getNumSampleSummaryText(2)

getParamNames(2)

getParams(2)

PCA(2)

loadChains(2)

removeBurn(2)

out_dir(2)

getCorrelatedVariable2DPlots(2)

removeBurnFraction(2)

rootdirname(2)

rootname(2)

smooth_scale_2D(2)

thin_indices(2)

writeCorrelationMatrix(2)

writeCovMatrix(2)

getInlineLatex(2)

getTable(2)

getConvergeTests(2)

cov(2)

_setDensitiesandMarge1D(2)

addDerived(2)

cool(2)

copy(2)

writeThinData(2)

get1DDensity(2)

get2DDensity(2)

fine_bins_2D(1)

_initParamRanges(1)

_writeScriptPlots2D(1)

_writeScriptPlots1D(1)

thin(1)

getAutoBandwidth1D(1)

thin_poisson(1)

twoTailLimits(1)

_binSamples(1)

_get1DNeff(1)

sampler(1)

__init__(1)

writeScriptPlots1D(1)

writeScriptPlots2D(1)

writeScriptPlots3D(1)

writeScriptPlotsTri(1)

Example #1

Show file

def main():
    import getdist
    from getdist import plots, MCSamples,  loadMCSamples
    import numpy as np
    import pandas as pd
    args = parse_args()
    out = os.path.expanduser(args.out)
    out = os.path.join(out,'plots')
    if not os.path.isdir(out):
        os.makedirs(out)

    allnames = np.array(['Om','h0','Ob','ns','a_s','Onuh2','b1','b2','b3','b4','b5','m1','m2','m3','m4','ia_a','ia_alpha', 'wpz_b1','wpz_b2','wpz_b3','wpz_b4','lpz_b1','lpz_bin2','lpz_bin3','lpz_bin4','lpz_bin5','s8','like','post','weight'])
    alllabels = np.array(['\Omega_m', 'h', '\Omega_b', 'n_s','a_s', r'\Omega_{\nu}','b1','b2','b3','b4','b5','m1','m2','m3','m4','ia_a','ia_alpha', 'wpz_b1','wpz_b2','wpz_b3','wpz_b4','lpz_b1','lpz_bin2','lpz_bin3','lpz_bin4','lpz_bin5',r'\sigma_{8}','like','post','weight'])
    #useindex = [0, 1, 2, 3, 4, 5,- 4]
    useindex = [0, 1, 2, 3, 4, 5,- 4]
    usednames = allnames[useindex]
    usedlabels = alllabels[useindex]

    
    nsample =  get_nsample(args.samplesfile_forecast)
    allsamplestable = np.loadtxt(args.samplesfile_forecast)
    allsamplestable =  allsamplestable[ -nsample:, : ]                     
    usedsamples = allsamplestable[:, useindex]
    usedweights = allsamplestable[: , -1]
    usedpost =  allsamplestable[:, -2]
    samples = MCSamples(samples=usedsamples, names=usednames,
                        labels=usedlabels, weights=usedweights , loglikes=usedpost,
                        label='Forecast' )
    samples.removeBurn(remove=0.1)

    nsample_cont =  get_nsample(args.samplesfile_contaminated)
    allsamplestable_cont = np.loadtxt(args.samplesfile_contaminated)
    allsamplestable_cont =  allsamplestable_cont[-nsample_cont:, : ]
    usedsamples_cont = allsamplestable_cont[:, useindex]
    usedweights_cont = allsamplestable_cont[: , -1]
    usedpost_cont =  allsamplestable_cont[:, -2]
    samples_cont = MCSamples(samples=usedsamples_cont,
                             names=usednames, labels=usedlabels, weights=usedweights_cont ,loglikes=usedpost_cont,
                             label='PSF contamination' )
    samples_cont.removeBurn(remove=0.1)



    
    g = plots.getSubplotPlotter()
    g.triangle_plot([samples, samples_cont], filled_compare=True, contour_colors=['green','darkblue'])
    #g.add_legend(legend_labels=[legend_name], fontsize=36, legend_loc=(-3.5,7))
    g.export("getdistplot.png")

Example #2

Show file

File: MCEvidence.py Project: yabebalFantaye/BayesSchool2016

class MCEvidence(object):
    def __init__(self,method,ischain=True,isfunc=None,
                     thinlen=0.0,burnlen=0.0,
                     ndim=None, kmax= 5, 
                     priorvolume=1,debug=False,
                     nsample=None,
                      nbatch=1,
                      brange=None,
                      bscale='',
                      verbose=1,args={},
                      **gdkwargs):
        """Evidence estimation from MCMC chains
        :param method: chain name (str) or array (np.ndarray) or python class
                If string or numpy array, it is interpreted as MCMC chain. 
                Otherwise, it is interpreted as a python class with at least 
                a single method sampler and will be used to generate chain.

        :param ischain (bool): True indicates the passed method is to be interpreted as a chain.
                This is important as a string name can be passed for to 
                refer to a class or chain name 

        :param nbatch (int): the number of batchs to divide the chain (default=1) 
               The evidence can be estimated by dividing the whole chain 
               in n batches. In the case nbatch>1, the batch range (brange) 
               and batch scaling (bscale) should also be set

        :param brange (int or list): the minimum and maximum size of batches in linear or log10 scale
               e.g. [3,4] with bscale='logscale' means minimum and maximum batch size 
               of 10^3 and 10^4. The range is divided nbatch times.

        :param bscale (str): the scaling in batch size. Allowed values are 'log','linear','constant'/

        :param kmax (int): kth-nearest-neighbours, with k between 1 and kmax-1

        :param args (dict): argument to be passed to method. Only valid if method is a class.
        
        :param gdkwargs (dict): arguments to be passed to getdist.

        :param verbose: chattiness of the run
        
        """
        #
        self.verbose=verbose
        if debug or verbose>1: logging.basicConfig(level=logging.DEBUG)
        if verbose==0: logging.basicConfig(level=logging.WARNING)            
        self.logger = logging.getLogger(__name__)
        
        self.info={}
        #
        self.nbatch=nbatch
        self.brange=brange #todo: check for [N] 
        self.bscale=bscale if not isinstance(self.brange,int) else 'constant'
        
        # The arrays of powers and nchain record the number of samples 
        # that will be analysed at each iteration. 
        #idtrial is just an index
        self.idbatch=np.arange(self.nbatch,dtype=int)
        self.powers  = np.zeros(self.nbatch)
        self.bsize  = np.zeros(self.nbatch,dtype=int)
        self.nchain  = np.zeros(self.nbatch,dtype=int)               
        #
        self.kmax=max(2,kmax)
        self.priorvolume=priorvolume
        #
        self.ischain=ischain
        #
        self.fname=None
        #
        if ischain:
            
            if isinstance(method,str):
                self.fname=method      
                self.logger.debug('Using chains: ',method)
            else:
                self.logger.debug('dictionary of samples and loglike array passed')
                
        else: #python class which includes a method called sampler
            
            if nsample is None:
                self.nsample=100000
            else:
                self.nsample=nsample
            
            #given a class name, get an instance
            if isinstance(method,str):
                XClass = getattr(sys.modules[__name__], method)
            else:
                XClass=method
            
            if hasattr(XClass, '__class__'):
                self.logger.debug(__name__+': method is an instance of a class')
                self.method=XClass
            else:
                self.logger.debug(__name__+': method is class variable .. instantiating class')
                self.method=XClass(*args)                
                #if passed class has some info, display it
                try:
                    print()
                    msg=self.method.info()                        
                    print()
                except:
                    pass                        
                # Now Generate samples.
                # Output should be dict - {'chains':,'logprob':,'weight':} 
                method=self.method.Sampler(nsamples=self.nsamples)                                 
                
        #======== By this line we expect only chains either in file or dict ====
        self.gd = MCSamples(method,debug=verbose>1,**gdkwargs)

        if burnlen>0:
            _=self.gd.removeBurn(remove=burnlen)
        if thinlen>0:
            if thinlen<1:
                self.logger.info('calling poisson_thin ..')
                _=self.gd.thin_poisson(thinlen)
            else:
                _=self.gd.thin(nthin=thinlen)                

        if isfunc:
            #try:
            self.gd.importance_sample(isfunc)
            #except:
            #    self.logger.warn('Importance sampling failed. Make sure getdist is installed.')
               
        self.info['NparamsMC']=self.gd.nparamMC
        self.info['Nsamples_read']=self.gd.get_shape()[0]
        self.info['Nparams_read']=self.gd.get_shape()[1]
        #

        #after burn-in and thinning
        self.nsample = self.gd.get_shape()[0]            
        if ndim is None: ndim=self.gd.nparamMC        
        self.ndim=ndim        
        #
        self.info['NparamsCosmo']=self.ndim
        self.info['Nsamples']=self.nsample
        #
        #self.info['MaxAutoCorrLen']=np.array([self.gd.samples.getCorrelationLength(j) for j in range(self.ndim)]).max()

        #print('***** ndim,nparamMC,MaxAutoCorrLen :',self.ndim,self.nparamMC,self.info['MaxAutoCorrLen'])
        
        #print('init minmax logl',method['lnprob'].min(),method['lnprob'].max())            
        self.logger.info('chain array dimensions: %s x %s ='%(self.nsample,self.ndim))
            
        #
        self.set_batch()


    def summary(self):
        print()
        print('ndim={}'.format(self.ndim))
        print('nsample={}'.format(self.nsample))
        print('kmax={}'.format(self.kmax))
        print('brange={}'.format(self.brange))
        print('bsize'.format(self.bsize))
        print('powers={}'.format(self.powers))
        print('nchain={}'.format(self.nchain))
        print()
        
    def get_batch_range(self):
        if self.brange is None:
            powmin,powmax=None,None
        else:
            powmin=np.array(self.brange).min()
            powmax=np.array(self.brange).max()
            if powmin==powmax and self.nbatch>1:
                self.logger.error('nbatch>1 but batch range is set to zero.')
                raise
        return powmin,powmax
    
    def set_batch(self,bscale=None):
        if bscale is None:
            bscale=self.bscale
        else:
            self.bscale=bscale
            
        #    
        if self.brange is None: 
            self.bsize=self.brange #check
            powmin,powmax=None,None
            self.nchain[0]=self.nsample
            self.powers[0]=np.log10(self.nsample)
        else:
            if bscale=='logpower':
                powmin,powmax=self.get_batch_range()
                self.powers=np.linspace(powmin,powmax,self.nbatch)
                self.bsize = np.array([int(pow(10.0,x)) for x in self.powers])
                self.nchain=self.bsize

            elif bscale=='linear':   
                powmin,powmax=self.get_batch_range()
                self.bsize=np.linspace(powmin,powmax,self.nbatch,dtype=np.int)
                self.powers=np.array([int(log10(x)) for x in self.nchain])
                self.nchain=self.bsize

            else: #constant
                self.bsize=self.brange #check
                self.powers=self.idbatch
                self.nchain=np.array([x for x in self.bsize.cumsum()])
            
    def get_samples(self,nsamples,istart=0,rand=False):    
        # If we are reading chain, it will be handled here 
        # istart -  will set row index to start getting the samples 

        ntot=self.gd.get_shape()[0]
        
        if rand and not self.brange is None:
            if nsamples>ntot:
                self.logger.error('nsamples=%s, ntotal_chian=%s'%(nsamples,ntot))
                raise
            
            idx=np.random.randint(0,high=ntot,size=nsamples)
        else:
            idx=np.arange(istart,nsamples+istart)

        self.logger.info('requested nsamples=%s, ntotal_chian=%s'%(nsamples,ntot))
        s,lnp,w=self.gd.arrays()            
                
        return s[idx,0:self.ndim],lnp[idx],w[idx]
        

    def evidence(self,verbose=None,rand=False,info=False,
                      profile=False,pvolume=None,pos_lnp=False,
                      nproc=-1,prewhiten=True):
        '''

        MARGINAL LIKELIHOODS FROM MONTE CARLO MARKOV CHAINS algorithm described in Heavens et. al. (2017)
       
        Parameters
        ---------

        :param verbose - controls the amount of information outputted during run time
        :param rand - randomised sub sampling of the MCMC chains
        :param info - if True information about the analysis will be returd to the caller
        :param pvolume - prior volume
        :param pos_lnp - if input log likelihood is multiplied by negative or not
        :param nproc - determined how many processors the scikit package should use or not
        :param prewhiten  - if True chains will be normalised to have unit variance
        
        Returns
        ---------

        MLE - maximum likelihood estimate of evidence:
        self.info (optional) - returned if info=True. Contains useful information about the chain analysed
               

        Notes
        ---------

        The MCEvidence algorithm is implemented using scikit nearest neighbour code.


        Examples
        ---------

        To run the evidence estimation from an ipython terminal or notebook

        >> from MCEvidence import MCEvidence
        >> MLE = MCEvidence('/path/to/chain').evidence()
        

        To run MCEvidence from shell

        $ python MCEvidence.py </path/to/chain> 

        References
        -----------

        .. [1] Heavens etl. al. (2017)
        
        '''     
            
        if verbose is None:
            verbose=self.verbose

        #get prior volume
        if pvolume is None:
            logPriorVolume=math.log(self.priorvolume)
        else:
            logPriorVolume=math.log(pvolume)            

        self.logger.debug('log prior volume: ',logPriorVolume)
            
        kmax=self.kmax
        ndim=self.ndim
        
        MLE = np.zeros((self.nbatch,kmax))

        #get covariance matrix of chain
        #ChainCov=self.gd.samples.getCovMat()
        #eigenVal,eigenVec = np.linalg.eig(ChainCov)
        #Jacobian = math.sqrt(np.linalg.det(ChainCov))
        #ndim=len(eigenVal)
        
        # Loop over different numbers of MCMC samples (=S):
        itot=0
        for ipow,nsample in zip(self.idbatch,self.nchain):                
            S=int(nsample)            
            DkNN    = np.zeros((S,kmax))
            indices = np.zeros((S,kmax))
            volume  = np.zeros((S,kmax))

            samples_raw = np.zeros((S,ndim))
            samples_raw_cmc,logL,weight=self.get_samples(S,istart=itot,rand=rand)
            samples_raw[:,0:ndim] =  samples_raw_cmc[:,0:ndim]
            
            #We need the logarithm of the likelihood - not the negative log
            if pos_lnp: logL=-logL
                
            # Renormalise loglikelihood (temporarily) to avoid underflows:
            logLmax = np.amax(logL)
            fs    = logL-logLmax
                        
            #print('(mean,min,max) of LogLikelihood: ',fs.mean(),fs.min(),fs.max())
            
            if prewhiten:
                self.logger.info('Prewhitenning chains using sample covariance matrix ..')
                # Covariance matrix of the samples, and eigenvalues (in w) and eigenvectors (in v):
                ChainCov = np.cov(samples_raw.T)
                eigenVal,eigenVec = np.linalg.eig(ChainCov)                
                Jacobian = math.sqrt(np.linalg.det(ChainCov))

                # Prewhiten:  First diagonalise:
                samples = np.dot(samples_raw,eigenVec);

                #print('EigenValues.shape,ndim',eigenVal.shape,ndim)
                #print('EigenValues=',eigenVal)
                # And renormalise new parameters to have unit covariance matrix:
                for i in range(ndim):
                    samples[:,i]= samples[:,i]/math.sqrt(eigenVal[i])
            else:
                #no diagonalisation
                Jacobian=1
                samples=samples_raw

            #print('samples, after prewhiten', samples[1000:1010,0:ndim])
            #print('Loglikes ',logLmax,logL[1000:1010],fs[1000:1010])
            #print('weights',weight[1000:1010])
            #print('EigenValues=',eigenVal)
            
            # Use sklearn nearest neightbour routine, which chooses the 'best' algorithm.
            # This is where the hard work is done:
            nbrs = NearestNeighbors(n_neighbors=kmax+1, 
                                    algorithm='auto',n_jobs=nproc).fit(samples)
            DkNN, indices = nbrs.kneighbors(samples)                
    
            # Create the posterior for 'a' from the distances (volumes) to nearest neighbour:
            for k in range(1,self.kmax):
                for j in range(0,S):        
                    # Use analytic formula for the volume of ndim-sphere:
                    volume[j,k] = math.pow(math.pi,ndim/2)*math.pow(DkNN[j,k],ndim)/sp.gamma(1+ndim/2)
                
                
                #print('volume minmax: ',volume[:,k].min(),volume[:,k].max())
                #print('weight minmax: ',weight.min(),weight.max())
                
                # dotp is the summation term in the notes:
                dotp = np.dot(volume[:,k]/weight[:],np.exp(fs))
        
                # The MAP value of 'a' is obtained analytically from the expression for the posterior:
                amax = dotp/(S*k+1.0)
    
                # Maximum likelihood estimator for the evidence
                SumW     = np.sum(self.gd.adjusted_weights)
                print('********sumW=',SumW,np.sum(weight))
                MLE[ipow,k] = math.log(SumW*amax*Jacobian) + logLmax - logPriorVolume

                print('SumW,S,amax,Jacobian,logLmax,logPriorVolume,MLE:',SumW,S,amax,Jacobian,logLmax,logPriorVolume,MLE[ipow,k])
                print('---')
                # Output is: for each sample size (S), compute the evidence for kmax-1 different values of k.
                # Final columm gives the evidence in units of the analytic value.
                # The values for different k are clearly not independent. If ndim is large, k=1 does best.
                if self.brange is None:
                    #print('(mean,min,max) of LogLikelihood: ',fs.mean(),fs.min(),fs.max())
                    if verbose>1:
                        self.logger.info('k={},nsample={}, dotp={}, median_volume={}, a_max={}, MLE={}'.format( 
                            k,S,dotp,statistics.median(volume[:,k]),amax,MLE[ipow,k]))
                
                else:
                    if verbose>1:
                        if ipow==0: 
                            self.logger.info('(iter,mean,min,max) of LogLikelihood: ',ipow,fs.mean(),fs.min(),fs.max())
                            self.logger.info('-------------------- useful intermediate parameter values ------- ')
                            self.logger.info('nsample, dotp, median volume, amax, MLE')                
                        self.logger.info(S,k,dotp,statistics.median(volume[:,k]),amax,MLE[ipow,k])

        #MLE[:,0] is zero - return only from k=1
        if self.brange is None:
            MLE=MLE[0,1:]
        else:
            MLE=MLE[:,1:]

        if verbose>0:
            print('')
            print('MLE[k=(1,2,3,4)] = ',MLE)
            print('')        
        
        if info:
            return MLE, self.info
        else:  
            return MLE