예제 #1
0
    def fit(Deltam,
            nsne,
            xi,
            redshiftterm,
            mpi=False,
            p0=None,
            nchain=2000,
            **kwargs):
        ndim, nwalkers = 4, 8
        sig = numpy.array([0.1, 0.01, 0.01, 50 / 3e5])

        if p0 is None:
            p0 = [
                numpy.array([1, 0, 0.08, 200 / 3e5]) +
                numpy.random.uniform(low=-sig, high=sig)
                for i in range(nwalkers)
            ]

        if mpi:
            pool = MPIPool()
            if not pool.is_master():
                pool.wait()
                sys.exit(0)
            else:
                import time
                starttime = time.time()
                print("Start {}".format(starttime))
            sampler = emcee.EnsembleSampler(
                nwalkers,
                ndim,
                Fit.lnprob,
                args=[Deltam, nsne, xi, redshiftterm],
                pool=pool)
        else:
            import time
            starttime = time.time()
            print("Start {}".format(starttime))
            sampler = emcee.EnsembleSampler(
                nwalkers,
                ndim,
                Fit.lnprob,
                args=[Deltam, nsne, xi, redshiftterm])

        sampler.run_mcmc(p0, nchain)

        if mpi:
            if pool.is_master():
                endtime = time.time()
                print("End {}".format(endtime))
                print("Difference {}".format(endtime - starttime))
                pool.close()
        else:
            endtime = time.time()
            print("End {}".format(endtime))
            print("Difference {}".format(endtime - starttime))

        return sampler
예제 #2
0
파일: sampler.py 프로젝트: dstndstn/demo
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--mpi', action='store_true', default=False)
    parser.add_argument('--walkers', type=int, default=100)
    parser.add_argument('--steps', type=int, default=1000)
    opt = parser.parse_args()

    pool = None
    if opt.mpi:
        import socket
        import os
        from emcee.utils import MPIPool
        pool = MPIPool()
        print('Running in MPI.  Host', socket.gethostname(), 'pid',
              os.getpid(), 'is master?', pool.is_master())
        if not pool.is_master():
            pool.wait()
            return

    ndim, nwalkers = 2, opt.walkers

    ivar = 1. / np.random.rand(ndim)
    p0 = [np.random.rand(ndim) for i in range(nwalkers)]
    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    lnprob,
                                    args=[ivar],
                                    pool=pool)

    import time
    print('Running for', opt.steps, 'steps with', opt.walkers, 'walkers')
    t0 = time.time()
    sampler.run_mcmc(p0, opt.steps)
    print('Finished in', time.time() - t0, 'seconds')

    if pool:
        pool.close()

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    plt.figure()
    for i in range(ndim):
        plt.subplot(1, ndim, 1 + i)
        plt.hist(sampler.flatchain[:, i], 100, color="k", histtype="step")
        plt.title("Dimension {0:d}".format(i))
    plt.savefig('plot.png')
    print('Saved plot')
예제 #3
0
def get_pool(mpi=False, threads=None):
    """ Get a pool object to pass to emcee for parallel processing.
        If mpi is False and threads is None, pool is None.

        Parameters
        ----------
        mpi : bool
        Use MPI or not. If specified, ignores the threads kwarg.
        threads : int (optional)
        If mpi is False and threads is specified, use a Python
        multiprocessing pool with the specified number of threads.
        """

    if mpi:
        from emcee.utils import MPIPool

        # Initialize the MPI pool
        pool = MPIPool()

        # Make sure the thread we're running on is the master
        if not pool.is_master():
            pool.wait()
            sys.exit(0)
        print("Running with MPI...")

    elif threads > 1:
        import multiprocessing
        print("Running with multiprocessing on " + str(threads) + " cores...")
        pool = multiprocessing.Pool(threads)

    else:
        print("Running serial...")
        pool = None

    return pool
예제 #4
0
    def mcmc_emcee(self,
                   n_walkers,
                   n_run,
                   n_burn,
                   mean_start,
                   sigma_start,
                   mpi=False):
        """
        returns the mcmc analysis of the parameter space
        """
        if mpi:
            pool = MPIPool()
            if not pool.is_master():
                pool.wait()
                sys.exit(0)
            sampler = emcee.EnsembleSampler(n_walkers,
                                            self.chain.param.num_param(),
                                            self.chain.X2_chain,
                                            pool=pool)
        else:
            sampler = emcee.EnsembleSampler(n_walkers,
                                            self.chain.param.num_param(),
                                            self.chain.X2_chain)
        p0 = emcee.utils.sample_ball(mean_start, sigma_start, n_walkers)
        new_pos, _, _, _ = sampler.run_mcmc(p0, n_burn)
        sampler.reset()

        store = InMemoryStorageUtil()
        for pos, prob, _, _ in sampler.sample(new_pos, iterations=n_run):
            store.persistSamplingValues(pos, prob, None)
        return store.samples
예제 #5
0
def get_pool(mpi=False, threads=None):
    """ Get a pool object to pass to emcee for parallel processing.
        If mpi is False and threads is None, pool is None.

        Parameters
        ----------
        mpi : bool
            Use MPI or not. If specified, ignores the threads kwarg.
        threads : int (optional)
            If mpi is False and threads is specified, use a Python
            multiprocessing pool with the specified number of threads.
    """

    if mpi:
        from emcee.utils import MPIPool

        # Initialize the MPI pool
        pool = MPIPool()

        # Make sure the thread we're running on is the master
        if not pool.is_master():
            pool.wait()
            sys.exit(0)
        logger.debug("Running with MPI...")

    elif threads > 1:
        logger.debug(
            "Running with multiprocessing on {} cores...".format(threads))
        pool = multiprocessing.Pool(threads)

    else:
        logger.debug("Running serial...")
        pool = SerialPool()

    return pool
예제 #6
0
def mcmc(mass_bins, dot_val, initial_c, nwalkers, ndim, burn_in, steps_wlk):
    import numpy as np
    import scipy.optimize as op
    import emcee
    from emcee.utils import MPIPool
    import sys

    def lnlike(c, dot_val):
        loglike = np.zeros((1))
        loglike[0] = sum(
            np.log(
                (1 - c) * np.sqrt(1 + (c / 2)) *
                (1 - c * (1 - 3 *
                          (dot_val * dot_val / 2)))**(-1.5)))  #log-likelihood

        return loglike

    def lnprior(c):

        if (-1.5 < c < 0.99):  #Assumes a flat prior, uninformative prior
            return 0.0
        return -np.inf

    def lnprob(c, dot_val):
        lp = lnprior(c)
        if not np.isfinite(lp):
            return -np.inf
        return lp + lnlike(c, dot_val)

    #Parallel MCMC - initiallizes pool object; if process isn't running as master, wait for instr. and exit
    pool = MPIPool()
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    pos = [initial_c + 1e-2 * np.random.randn(ndim) for i in range(nwalkers)
           ]  #initial positions for walkers "Gaussian ball"

    #MCMC Running
    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    lnprob,
                                    args=[dot_val],
                                    pool=pool)

    pos, _, _ = sampler.run_mcmc(pos,
                                 burn_in)  #running of emcee burn-in period
    sampler.reset()

    sampler.run_mcmc(
        pos, steps_wlk
    )  #running of emcee for steps specified, using pos as initial walker positions
    pool.close()
    chain = sampler.flatchain[:, 0]

    return chain
def run_pool(pC, pW, walk, step):  #pCenter and pWidths
    steps = step
    nwalkers = walk
    ndim = len(pC)
    ## r in, del r, i, PA
    p0 = [pC[0], pC[1], pC[2], pC[3]]
    widths = [pW[0], pW[1], pW[2], pW[3]]
    p = emcee.utils.sample_ball(p0, widths, size=nwalkers)

    pool = MPIPool()
    if not pool.is_master():
        pool.wait()
        sys.exit(0)
    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    lnlike_vis_and_sed,
                                    live_dangerously=True,
                                    pool=pool)

    print 'Beginning the MCMC run.'
    start = time.clock()
    sampler.run_mcmc(p, steps)
    stop = time.clock()
    pool.close()
    print 'MCMC finished successfully.\n'
    print 'This was a simultaneous visibility and SED run with {} walkers and {} steps'.format(
        nwalkers, steps)
    print "Mean acor time: " + str(np.mean(sampler.acor))
    print "Mean acceptance fraction: " + str(
        np.mean(sampler.acceptance_fraction))
    print '\nRun took %r minutes' % ((stop - start) / 60.)

    chain = sampler.chain
    chi = (sampler.lnprobability) / (-0.5)
    whatbywhat = str(nwalkers) + 'x' + str(steps)
    os.system('mkdir MCMCRUNS/vis_and_sed/' + whatbywhat)
    chainFile = 'MCMCRUNS/vis_and_sed/' + whatbywhat + '/' + whatbywhat + '.chain.fits'
    chiFile = 'MCMCRUNS/vis_and_sed/' + whatbywhat + '/' + whatbywhat + '.chi.fits'
    infoFile = 'MCMCRUNS/vis_and_sed/' + whatbywhat + '/' + whatbywhat + '.runInfo.txt'
    fits.writeto(chainFile, chain)
    fits.writeto(chiFile, chi)
    #f = open('runInfo.txt','w')
    f = open(infoFile, 'w')
    f.write('run took %r minutes\n' % ((stop - start) / 60.))
    f.write('walkers: %r\n' % nwalkers)
    f.write('steps: %r\n' % steps)
    f.write('initial model: %r\n' % p0)
    f.write('widths: %r\n' % widths)
    f.write("mean acor time: " + str(np.mean(sampler.acor)))
    f.write("\nmean acceptance fraction: " +
            str(np.mean(sampler.acceptance_fraction)))
    f.close()

    print 'Data written to: \n' + chainFile + '\n' + chiFile + '\n' + infoFile
예제 #8
0
def pt_mpi_sample(gf, ntemps, nwalkers, burn_steps, sample_steps, thin=1,
                  pool=None, betas=None, pos=None, random_state=None,
                  pos_filename=None, convergence_interval=50):
    pool = MPIPool(loadbalance=True)
    if not pool.is_master():
        pool.wait()
        sys.exit(0)
    return pt_sample(gf, ntemps, nwalkers, burn_steps, sample_steps,
                     thin=thin, pool=pool, betas=betas, pos=pos,
                     random_state=random_state, pos_filename=pos_filename,
                     convergence_interval=convergence_interval)
예제 #9
0
파일: Model.py 프로젝트: jhoormann/SPAMM
    def run_mcmc(self, n_walkers=100, n_iterations=100):
        """
        Run emcee MCMC.
    
        Args:
            n_walkers (int): Number of walkers to pass to the MCMC.
            n_iteratins (int): Number of iterations to pass to the MCMC. 
        """

        # Initialize walker matrix with initial parameters
        walkers_matrix = []  # must be a list, not an np.array
        for walker in range(n_walkers):
            walker_params = []
            for component in self.components:
                walker_params = walker_params + component.initial_values(
                    self.data_spectrum)
            walkers_matrix.append(walker_params)

        global iteration_count
        iteration_count = 0

        # Create MCMC sampler. To enable multiproccessing, set threads > 1.
        # If using multiprocessing, the "lnpostfn" and "args" parameters
        # must be pickleable.
        if self.mpi:
            # Initialize the multiprocessing pool object.
            from emcee.utils import MPIPool
            pool = MPIPool(loadbalance=True)
            if not pool.is_master():
                pool.wait()
                sys.exit(0)
            self.sampler = emcee.EnsembleSampler(
                nwalkers=n_walkers,
                dim=len(walkers_matrix[0]),
                lnpostfn=ln_posterior,
                args=[self],
                pool=pool,
                runtime_sortingfn=sort_on_runtime)
            self.sampler.run_mcmc(walkers_matrix, n_iterations)
            pool.close()

        else:
            self.sampler = emcee.EnsembleSampler(nwalkers=n_walkers,
                                                 dim=len(walkers_matrix[0]),
                                                 lnpostfn=ln_posterior,
                                                 args=[self],
                                                 threads=1)

        #self.sampler_output = self.sampler.run_mcmc(walkers_matrix, n_iterations)
        self.sampler.run_mcmc(walkers_matrix, n_iterations)
예제 #10
0
def fit_bim_bh3_curves(p0=None):
    # Choose initial position
    if p0 is None:
        p0 = np.zeros((nwalkers, ndim))
        for walk_ix in range(nwalkers):
            for d_ix in range(len(data)):
                p0[walk_ix, d_ix*3] = np.random.uniform(1, 6)
                p0[walk_ix, d_ix*3 + 1] = np.random.uniform(6e-5, 1e-3)
                p0[walk_ix, d_ix*3 + 2] = np.random.uniform(2, 3)
            hp_ix = len(data)*3
            p0[walk_ix, hp_ix] = np.random.uniform(1,6) # fmax mean
            p0[walk_ix, hp_ix + 1] = np.random.uniform(0,1) # fmax sd
            p0[walk_ix, hp_ix + 2] = np.random.uniform(6e-5, 1e-3) # k mean
            p0[walk_ix, hp_ix + 3] = np.random.uniform(0,1e-1) # k sd
            p0[walk_ix, hp_ix + 4] = np.random.uniform(2,3) # f0 mean
            p0[walk_ix, hp_ix + 5] = np.random.uniform(0,1) # f0 sd

    #plt.figure()
    #for d_ix, data_i in enumerate(data):
    #    plt.plot(time, data_i, color=colors[d_ix])
    #    plt.plot(time, fit_func(p0[0, d_ix*3:(d_ix+1)*3]), color='k')

    # Initialize the MPI pool
    pool = MPIPool()
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    # Get the sampler
    sampler = emcee.EnsembleSampler(nwalkers, ndim, posterior, pool=pool)
    # Burn-in
    print("Burn-in sampling...")
    pos, prob, state = sampler.run_mcmc(p0, burn_steps, storechain=False)
    sampler.reset() 
    # Main sampling
    print("Main sampling...")
    sampler.run_mcmc(pos, sample_steps)

    # Close the pool!
    pool.close()

    # Pickle the sampler
    sampler.pool = None
    with open('bimbh3_141125_2.pck','w') as f:
        pickle.dump(sampler, f)

    return sampler
def run_pool(pC, pW, walk, step): #pCenter and pWidths
    steps = step
    nwalkers = walk 
    ndim = len(pC)
    ## r in, del r, i, PA
    p0 = [pC[0], pC[1], pC[2],  pC[3], pC[4]]
    widths = [pW[0], pW[1], pW[2],  pW[3], pW[4]]
    p = emcee.utils.sample_ball(p0,widths,size=nwalkers)
    
    pool = MPIPool()
    if not pool.is_master():
        pool.wait()
        sys.exit(0)
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnlike_visonly, live_dangerously=True,  pool=pool)
    
    print 'Beginning the MCMC run.'
    start = time.clock()
    sampler.run_mcmc(p, steps)
    stop = time.clock()
    pool.close()
    print 'MCMC finished successfully.\n'
    print 'This was a visibility-only run with {} walkers and {} steps'.format(nwalkers,steps)
    print "Mean acor time: "+str(np.mean(sampler.acor))
    print "\nMean acceptance fraction: "+str(np.mean(sampler.acceptance_fraction))
    print 'Run took %r minutes' % ((stop - start)/60.)

    chain = sampler.chain
    chi = (sampler.lnprobability)/(-0.5)
    whatbywhat = str(nwalkers)+'x'+str(steps)
    os.system('mkdir MCMCRUNS/vis_only/'+whatbywhat)
    chainFile = 'MCMCRUNS/vis_only/'+whatbywhat+'/'+whatbywhat+'.chain.fits'
    chiFile = 'MCMCRUNS/vis_only/'+whatbywhat+'/'+whatbywhat+'.chi.fits'
    infoFile = 'MCMCRUNS/vis_only/'+whatbywhat+'/'+whatbywhat+'.runInfo.txt'
    fits.writeto(chainFile,chain)
    fits.writeto(chiFile,chi)
    f = open(infoFile,'w')
    f.write('run took %r minutes\n' % ((stop - start)/60.))
    f.write('walkers: %r\n' % nwalkers)
    f.write('steps: %r\n' % steps)
    f.write('initial model: %r\n' % p0)
    f.write('widths: %r\n' % widths)
    f.write("mean acor time: "+str(np.mean(sampler.acor)))
    f.write("\nmean acceptance fraction: "+str(np.mean(sampler.acceptance_fraction)))
    f.close()

    print 'Data written to: \n'+chainFile+'\n'+chiFile+'\n'+infoFile
예제 #12
0
	def get_pool(self,mpi=False,nthreads=1):
		import emcee
		from emcee.utils import MPIPool
		from pathos.multiprocessing import ProcessingPool as PPool
		#from multiprocessing import Pool as PPool
		if mpi:
			pool = MPIPool(loadbalance=True)
			if not pool.is_master():
				pool.wait()
				sys.exit(0)
			self.logger.info('Creating MPI pool with {:d} workers.'.format(pool.size+1))
		elif nthreads > 1:
			pool = PPool(nthreads)
			self.logger.info('Creating multiprocessing pool with {:d} threads.'.format(nthreads))
		else:
			pool = None
		return pool
예제 #13
0
def tdelay_dt_mcmc(run, theta, Niter=20, Nwalkers=10, Ndim=2, sigma_smhm=0.2, nsnap0=15, downsampled='14', flag=None, continue_chain=False): 
    '''
    '''
    if Ndim == 2: 
        tdelay_range = [0., 3.]#np.arange(0., 3., 0.5)
        dt_range = [0.1, 4.]

    # new chain 
    chain_file = ''.join([UT.fig_dir(), run, '.tdelay_dt_mcmc.chain.dat']) 
    if os.path.isfile(chain_file) and continue_chain:   
        print 'Continuing previous MCMC chain!'
        sample = np.loadtxt(chain_file) 
        Niter = Niter - (np.float(len(sample))/np.float(Nwalkers)) # Number of chains left to finish 
        if Niter <= 0: 
            raise ValueError
            print Niter, ' iterations left to finish'
    else: 
        f = open(chain_file, 'w')
        f.close()
        # Initializing Walkers
        pos0 = [np.array([np.random.uniform(tdelay_range[0], tdelay_range[1]), np.random.uniform(dt_range[0], dt_range[1])]) for i in range(Nwalkers)]

    pool = MPIPool()
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    # Initializing the emcee sampler
    kwargs = {
            'theta': theta, 
            'sigma_smhm': 0.2, 
            'nsnap0': 15, 
            'downsampled': '14', 
            }
    sampler = emcee.EnsembleSampler(Nwalkers, Ndim, sigM, pool=pool, kwargs=kwargs)
    for result in sampler.sample(pos0, iterations=Niter, storechain=False):
        position = result[0]
        #print position
        f = open(chain_file, 'a')
        for k in range(position.shape[0]): 
            output_str = '\t'.join(position[k].astype('str')) + '\n'
            f.write(output_str)
        f.close()
    pool.close()

    return None 
예제 #14
0
def lnPost(theta):
    '''log-posterior
    '''
    # prior calculations 
    if prior_min[0] < theta[0] < prior_max[0] and \
       prior_min[1] < theta[1] < prior_max[1] and \
       prior_min[2] < theta[2] < prior_max[2] and \
       prior_min[3] < theta[3] < prior_max[3] and \
       prior_min[4] < theta[4] < prior_max[4]:
           lnPrior = 0.0
    else:
        lnPrior = -np.inf

    if not np.isfinite(lnPrior):
        return -np.inf
    return lnPrior + lnLike(theta)



    """Initializing Walkers"""

    pos = [np.array([11. , np.log(.4) , 11.5 , 1.0 , 13.5]) + 1e-3*np.random.randn(Ndim) for i in range(Nwalkers)]

    """Initializing MPIPool"""

    pool = MPIPool(loadbalance=True)
    if not pool.is_master():
       pool.wait()
       sys.exit(0)

    """Initializing the emcee sampler"""
    sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnprob, pool=pool)
    
    # Burn in + Production
    sampler.run_mcmc(pos, Nchains_burn + Nchains_pro)

    # Production.
    samples = sampler.chain[:, Nchains_burn:, :].reshape((-1, Ndim))
    #closing the pool 
    pool.close()
    
    np.savetxt("mcmc_sample.dat" , samples)
예제 #15
0
def main():
    '''
    A parallel run.
    '''
    pool = MPIPool(loadbalance=True)

    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    clf = hbsgc.HBSGC(pool=pool)

    # save start time
    clf.last_clock = time.clock()

    clf.filter_calcs()

    clf.data_calcs()

    clf.star_model_calcs()

    # if clf.calc_model_mags:
    #     clf.star_model_mags()

    clf.gal_model_calcs()

    # if clf.calc_model_mags:
    #     clf.gal_model_mags()

    clf.fit_calcs()

    clf.count_tot = 0

    clf.sample()

    clf.save_proba()

    if clf.min_chi2_write:
        clf.save_min_chi2()

    pool.close()
예제 #16
0
def main():
    '''
    A parallel run.
    '''
    pool = MPIPool(loadbalance=True)

    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    clf = hbsgc.HBSGC(pool=pool)

    # save start time
    clf.last_clock = time.clock()

    clf.filter_calcs()

    clf.data_calcs()

    clf.star_model_calcs()

    # if clf.calc_model_mags:
    #     clf.star_model_mags()

    clf.gal_model_calcs()

    # if clf.calc_model_mags:
    #     clf.gal_model_mags()

    clf.fit_calcs()

    clf.count_tot = 0

    clf.sample()

    clf.save_proba()

    if clf.min_chi2_write:
        clf.save_min_chi2()

    pool.close()
예제 #17
0
def lnPost(theta):
    '''log-posterior
    '''
    # prior calculations
    if prior_min[0] < theta[0] < prior_max[0] and \
       prior_min[1] < theta[1] < prior_max[1] and \
       prior_min[2] < theta[2] < prior_max[2] and \
       prior_min[3] < theta[3] < prior_max[3] and \
       prior_min[4] < theta[4] < prior_max[4]:
        lnPrior = 0.0
    else:
        lnPrior = -np.inf

    if not np.isfinite(lnPrior):
        return -np.inf
    return lnPrior + lnLike(theta)
    """Initializing Walkers"""

    pos = [
        np.array([11., np.log(.4), 11.5, 1.0, 13.5]) +
        1e-3 * np.random.randn(Ndim) for i in range(Nwalkers)
    ]
    """Initializing MPIPool"""

    pool = MPIPool(loadbalance=True)
    if not pool.is_master():
        pool.wait()
        sys.exit(0)
    """Initializing the emcee sampler"""
    sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnprob, pool=pool)

    # Burn in + Production
    sampler.run_mcmc(pos, Nchains_burn + Nchains_pro)

    # Production.
    samples = sampler.chain[:, Nchains_burn:, :].reshape((-1, Ndim))
    #closing the pool
    pool.close()

    np.savetxt("mcmc_sample.dat", samples)
예제 #18
0
def ens_mpi_sample(gf, nwalkers, burn_steps, sample_steps, pos=None,
                   random_state=None):
    pool = MPIPool(loadbalance=True)
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    # Initialize the parameter array with initial values (in log10 units)
    # Number of parameters to estimate
    ndim = (len(gf.builder.global_params) +
            (len(gf.data) * len(gf.builder.local_params)))
    # Initialize the walkers with starting positions drawn from the priors
    # Note that the priors are in log10 scale already, so they don't
    # need to be transformed here
    if pos is None:
        p0 = np.zeros((nwalkers, ndim))
        for walk_ix in range(nwalkers):
            for p_ix in range(ndim):
                p0[walk_ix, p_ix] = gf.priors[p_ix].random()
    else:
        p0 = pos

    # Create the sampler object
    sampler = emcee.EnsembleSampler(nwalkers, ndim, posterior,
                                         args=[gf], pool=pool)
    if random_state is not None:
        sampler.random_state = random_state

    print "Burn in sampling..."
    pos, prob, state = sampler.run_mcmc(p0, burn_steps, storechain=False)
    sampler.reset()

    print "Main sampling..."
    sampler.run_mcmc(pos, sample_steps)

    # Close the pool!
    pool.close()

    print "Done sampling."
    return sampler
예제 #19
0
def main():

	#################################################
	############Option parsing#######################
	#################################################

	#Parse command line options
	parser = argparse.ArgumentParser()
	parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file")
	parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity")
	parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity")
	parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area")
	parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!")
	parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix")
	parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file")
	parser.add_argument("-ss","--save_debug",dest="save_debug",action="store_true",default=False,help="save a bunch of debugging info for the analysis")
	parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle")
	parser.add_argument("-r","--realizations",dest="realizations",type=int,default=None,help="use only the first N realizations to estimate the covariance matrix")
	parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF")
	parser.add_argument("-ms","--mean_subtract",dest="mean_subtract",action="store_true",default=False,help="lod in the observations with the subtracted means")

	cmd_args = parser.parse_args()

	if cmd_args.options_file is None:
		parser.print_help()
		sys.exit(0)

	#Set verbosity level
	if cmd_args.verbose_plus:
		logging.basicConfig(level=DEBUG_PLUS)
	elif cmd_args.verbose:
		logging.basicConfig(level=logging.DEBUG)
	else:
		logging.basicConfig(level=logging.INFO)

	#Initialize MPI Pool
	try:
		pool = MPIPool()
	except:
		pool = None

	if (pool is not None) and (not pool.is_master()):
		pool.wait()
		sys.exit(0)

	if pool is not None:
		logging.info("Started MPI Pool.")

	#################################################################################################################
	#################Info gathering: covariance matrix, observation and emulator#####################################
	#################################################################################################################

	#start
	start = time.time()
	last_timestamp = start

	#Instantiate a FeatureLoader object that will take care of the memory loading
	feature_loader = FeatureLoader(cmd_args)

	###########################################################################################################################################

	#Use this model for the covariance matrix (from the new set of 50 N body simulations)
	covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path"))
	logging.info("Measuring covariance matrix from model {0}".format(covariance_model))
	
	#Load in the covariance matrix
	fiducial_feature_ensemble = feature_loader.load_features(covariance_model)

	#If options is enabled, use only the first N realizations to estimate the covariance matrix
	if cmd_args.realizations is not None:

		logging.info("Using only the first {0} realizations to estimate the covariance matrix".format(cmd_args.realizations))
		fiducial_feature_ensemble = fiducial_feature_ensemble.subset(range(cmd_args.realizations))
		assert fiducial_feature_ensemble.num_realizations==cmd_args.realizations

	fiducial_features = fiducial_feature_ensemble.mean()
	features_covariance = fiducial_feature_ensemble.covariance()

	#timestamp
	now = time.time()
	logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	################################################################################################################################################

	#Get also the observation instance
	observation = CFHTLens(root_path=feature_loader.options.get("observations","root_path"))
	logging.info("Measuring the observations from {0}".format(observation))
	#And load the observations
	observed_feature = feature_loader.load_features(observation).mean()

	#timestamp
	now = time.time()
	logging.info("observation loaded in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	################################################################################################################################################

	#Create a LikelihoodAnalysis instance by unpickling one of the emulators
	emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators")
	emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string)))
	logging.info("Unpickling emulator from {0}...".format(emulator_file))
	analysis = LikelihoodAnalysis.load(emulator_file)

	#timestamp
	now = time.time()
	logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	####################################################################################################################
	######################################Compute the chi2 cube#########################################################
	####################################################################################################################

	logging.info("Initializing chi2 meshgrid...")

	#Read parameters to use from options
	use_parameters = feature_loader.options.get("parameters","use_parameters").replace(" ","").split(",")
	assert len(use_parameters)==3
	
	#Reparametrization hash key
	use_parameters_hash = "-".join(use_parameters)

	########################################################################################
	#Might need to reparametrize the emulator here, use a dictionary for reparametrizations#
	########################################################################################

	assert use_parameters_hash in reparametrization.keys(),"No reparametrization scheme specified for {0} parametrization".format(use_parameters_hash)
	
	if reparametrization[use_parameters_hash] is not None:
		
		#Reparametrize
		logging.info("Reparametrizing emulator according to {0} parametrization".format(use_parameters_hash))
		analysis.reparametrize(reparametrization[use_parameters_hash])

		#Retrain for safety
		analysis.train()

	#Log current parametrization to user
	logging.info("Using parametrization {0}".format(use_parameters_hash))

	#Set the points in parameter space on which to compute the chi2 (read extremes from options)
	par = list()
	for p in range(3):
		assert feature_loader.options.has_section(use_parameters[p]),"No extremes specified for parameter {0}".format(use_parameters[p])
		par.append(np.ogrid[feature_loader.options.getfloat(use_parameters[p],"min"):feature_loader.options.getfloat(use_parameters[p],"max"):feature_loader.options.getint(use_parameters[p],"num_points")*1j])

	num_points = len(par[0]) * len(par[1]) * len(par[2]) 

	points = np.array(np.meshgrid(par[0],par[1],par[2],indexing="ij")).reshape(3,num_points).transpose()
	
	#Now compute the chi2 at each of these points
	if pool:
		split_chunks = pool.size
		logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size))
	else:
		split_chunks = None
		logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0]))
	
	chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks)

	now = time.time()
	logging.info("chi2 calculations completed in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	#Close pool
	if pool is not None:
		pool.close()
		logging.info("Closed MPI Pool.")

	#save output
	likelihoods_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"likelihoods_{0}".format(use_parameters_hash))
	prefix = cmd_args.prefix
	if cmd_args.mean_subtract:
		prefix += "_meansub"

	if not os.path.isdir(likelihoods_dir):
		os.mkdir(likelihoods_dir)
	
	if cmd_args.realizations is None:
		chi2_file = os.path.join(likelihoods_dir,"chi2{0}_{1}.npy".format(prefix,output_string(feature_loader.feature_string)))
		likelihood_file = os.path.join(likelihoods_dir,"likelihood{0}_{1}.npy".format(prefix,output_string(feature_loader.feature_string)))
	else:
		chi2_file = os.path.join(likelihoods_dir,"chi2{0}{1}real_{2}.npy".format(prefix,cmd_args.realizations,output_string(feature_loader.feature_string)))
		likelihood_file = os.path.join(likelihoods_dir,"likelihood{0}{1}real_{2}.npy".format(prefix,cmd_args.realizations,output_string(feature_loader.feature_string)))

	logging.info("Saving chi2 to {0}".format(chi2_file))
	np.save(chi2_file,chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape))

	logging.info("Saving full likelihood to {0}".format(likelihood_file))
	likelihood_cube = analysis.likelihood(chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape))
	np.save(likelihood_file,likelihood_cube)

	#Find the maximum of the likelihood using ContourPlot functionality
	contour = ContourPlot()
	contour.getLikelihood(likelihood_cube,parameter_axes={use_parameters[0]:0,use_parameters[1]:1,use_parameters[2]:2},parameter_labels={use_parameters[0]:"0",use_parameters[1]:"1",use_parameters[2]:"2"})
	contour.getUnitsFromOptions(feature_loader.options)
	parameters_maximum = contour.getMaximum()
	parameter_keys = parameters_maximum.keys()
	parameter_keys.sort(key=contour.parameter_axes.get)

	#Display the new best fit before exiting
	best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ])
	logging.info("Best fit is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2={1[0]:.3f}({2} dof)".format(best_fit_parameters,analysis.chi2(np.array(best_fit_parameters),features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1]))

	#Additionally save some debugging info to plot, etc...
	if cmd_args.save_debug:

		troubleshoot_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot_{0}".format(use_parameters_hash))
		if not os.path.isdir(troubleshoot_dir):
			os.mkdir(troubleshoot_dir)

		logging.info("Saving troubleshoot info to {0}...".format(troubleshoot_dir))

		np.save(os.path.join(troubleshoot_dir,"observation_{0}.npy".format(output_string(feature_loader.feature_string))),observed_feature)
		np.save(os.path.join(troubleshoot_dir,"covariance_{0}.npy".format(output_string(feature_loader.feature_string))),features_covariance)
		np.save(os.path.join(troubleshoot_dir,"fiducial_{0}.npy".format(output_string(feature_loader.feature_string))),fiducial_features)
		np.save(os.path.join(troubleshoot_dir,"best_fit_features_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.predict(best_fit_parameters))
		np.save(os.path.join(troubleshoot_dir,"fiducial_from_interpolator_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.predict(np.array([0.26,-1.0,0.800])))
		np.save(os.path.join(troubleshoot_dir,"chi2_contributions_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.chi2Contributions(best_fit_parameters,observed_feature=observed_feature,features_covariance=features_covariance))

	end = time.time()

	logging.info("DONE!!")
	logging.info("Completed in {0:.1f}s".format(end-start))
예제 #20
0
class MPIManager(object):
    """
    Class to serve as context manager to handle to MPI-related issues, 
    specifically, the managing of ``MPIPool`` and splitting of communicators
    """
    logger = logging.getLogger("MPIManager")

    def __init__(self, comm, nruns, debug=False):
        """
        Parameters
        ----------
        comm : MPI.Communicator
            the global communicator to split
        nruns : int
            the number of independent algorithms to run concurrently
        debug : bool, optional
            set the logging level to debug in the `MPIPool`; default
            is `False`
        """
        self.comm = comm
        self.nruns = nruns
        self.debug = debug
        if debug: self.logger.setLevel(logging.DEBUG)

        # initialize comm for parallel runs
        self.par_runs_group = None
        self.par_runs_comm = None

        # intiialize comm for pool of workers for each
        # parallel run
        self.pool_comm = None
        self.pool = None

    def __enter__(self):
        """
        Setup the MPIPool, such that only the ``pool`` master returns, 
        while the other processes wait for tasks
        """
        # split ranks if we need to
        if self.comm.size > 1:

            ranges = []
            for i, ranks in split_ranks(self.comm.size, self.nruns):
                ranges.append(ranks[0])
                if self.comm.rank in ranks: color = i

            # split the global comm into pools of workers
            self.pool_comm = self.comm.Split(color, 0)

            # make the comm to communicate b/w parallel runs
            if self.nruns > 1:
                self.par_runs_group = self.comm.group.Incl(ranges)
                self.par_runs_comm = self.comm.Create(self.par_runs_group)

        # initialize the MPI pool, if the comm has more than 1 process
        if self.pool_comm is not None and self.pool_comm.size > 1:
            from emcee.utils import MPIPool
            kws = {
                'loadbalance': True,
                'comm': self.pool_comm,
                'debug': self.debug
            }
            self.pool = MPIPool(**kws)

        # explicitly force non-master ranks in pool to wait
        if self.pool is not None and not self.pool.is_master():
            self.pool.wait()
            self.logger.debug("exiting after pool closed")
            sys.exit(0)

        # log
        if self.pool is not None:
            self.logger.debug("using an MPIPool instance with %d worker(s)" %
                              self.pool.size)

        self.rank = 0
        if self.par_runs_comm is not None:
            self.rank = self.par_runs_comm.rank

        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        """
        Exit gracefully by closing and freeing the MPI-related variables
        """
        if exc_value is not None:
            trace = ''.join(
                traceback.format_exception(exc_type,
                                           exc_value,
                                           exc_traceback,
                                           limit=5))
            self.logger.error("traceback:\n%s" % trace)

        # wait for all the processes, if we more than one
        if self.par_runs_comm is not None and self.par_runs_comm.size > 1:
            self.par_runs_comm.Barrier()

        # close and free the MPI stuff
        self.logger.debug("beginning to close MPI variables...")

        if self.par_runs_group is not None:
            self.par_runs_group.Free()
        if self.par_runs_comm is not None:
            self.par_runs_comm.Free()
        if self.pool is not None:
            self.pool.close()
        self.logger.debug('...MPI variables closed')

        return True
def main():
##################
#These change a lot
  numWaveforms = 12
  numThreads = 8
  
  ndim = 6*numWaveforms + 7
  nwalkers = 25*ndim
  
  iter=10000
  burnIn = 9000
  
######################

  doPlots = 1

#  plt.ion()

  fitSamples = 350
  timeStepSize = 1. #ns
  
  #Prepare detector
  tempGuess = 79.310080
  gradGuess = 0.05
  pcRadGuess = 2.5
  pcLenGuess = 1.6

  #Create a detector model
  detName = "conf/P42574A_grad%0.2f_pcrad%0.2f_pclen%0.2f.conf" % (0.05,2.5, 1.65)
  det =  Detector(detName, temperature=tempGuess, timeStep=timeStepSize, numSteps=fitSamples*10 )
  det.LoadFields("P42574A_fields_v3.npz")
  det.SetFields(pcRadGuess, pcLenGuess, gradGuess)
  
  b_over_a = 0.107213
  c = -0.821158
  d = 0.828957
  rc1 = 74.4
  rc2 = 1.79
  rcfrac = 0.992
  det.SetTransferFunction(b_over_a, c, d, rc1, rc2, rcfrac)
  
  tempIdx = -7
  #and the remaining 4 are for the transfer function
  fig_size = (20,10)
  
  #Create a decent start guess by fitting waveform-by-waveform
  wfFileName = "P42574A_12_fastandslow_oldwfs.npz"
#  wfFileName =  "P42574A_5_fast.npz"
  
  if os.path.isfile(wfFileName):
    data = np.load(wfFileName)
    results = data['results']
    wfs = data['wfs']
    
#    wfs = wfs[::3]
#    results = results[::3]

    numWaveforms = wfs.size
  else:
    print "No saved waveforms available.  Exiting."
    exit(0)

  #prep holders for each wf-specific param
  r_arr = np.empty(numWaveforms)
  phi_arr = np.empty(numWaveforms)
  z_arr = np.empty(numWaveforms)
  scale_arr = np.empty(numWaveforms)
  t0_arr = np.empty(numWaveforms)
  smooth_arr = np.ones(numWaveforms)*7.
  simWfArr = np.empty((1,numWaveforms, fitSamples))

  #Prepare the initial value arrays
  for (idx, wf) in enumerate(wfs):
    wf.WindowWaveformTimepoint(fallPercentage=.97, rmsMult=2,)
    r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[idx], smooth_arr[idx]  = results[idx]['x']
#    t0_arr[idx] -= 15

  #Initialize the multithreading
#  p = Pool(numThreads, initializer=initializeDetectorAndWaveforms, initargs=[det, wfs])

  initializeDetectorAndWaveforms(det, wfs)
  p = MPIPool()
  if not p.is_master():
    p.wait()
    sys.exit(0)

  #Do the MCMC
  mcmc_startguess = np.hstack((r_arr[:], phi_arr[:], z_arr[:], scale_arr[:], t0_arr[:], smooth_arr[:],       # waveform-specific params
                              tempGuess, b_over_a, c, d, rc1, rc2, rcfrac)) # detector-specific

  #number of walkers _must_ be even
  if nwalkers % 2:
    nwalkers +=1

  pos0 = [mcmc_startguess + 1e-2*np.random.randn(ndim)*mcmc_startguess for i in range(nwalkers)]
  rc1idx = -3
  rc2idx = -2
  rcfracidx = -1

  #Make sure everything in the initial guess is within bounds
  for pos in pos0:
    pos[:numWaveforms] = np.clip( pos[:numWaveforms], 0, np.floor(det.detector_radius*10.)/10.)
    pos[numWaveforms:2*numWaveforms] = np.clip(pos[numWaveforms:2*numWaveforms], 0, np.pi/4)
    pos[2*numWaveforms:3*numWaveforms] = np.clip(pos[2*numWaveforms:3*numWaveforms], 0, np.floor(det.detector_length*10.)/10.)
    pos[4*numWaveforms:5*numWaveforms] = np.clip(pos[4*numWaveforms:5*numWaveforms], 0, fitSamples)
    pos[5*numWaveforms:6*numWaveforms] = np.clip(pos[5*numWaveforms:6*numWaveforms], 0, 20.)

    pos[tempIdx] = np.clip(pos[tempIdx], 40, 120)
    pos[rcfracidx] = np.clip(pos[rcfracidx], 0, 1)
    pos[rc2idx] = np.clip(pos[rc2idx], 0, np.inf)
    pos[rc1idx] = np.clip(pos[rc1idx], 0, np.inf)


    prior = lnprior(pos,)
    if not np.isfinite(prior) :
      print "BAD PRIOR WITH START GUESS YOURE KILLING ME SMALLS"
      print pos
      exit(0)

  #Initialize, run the MCMC
  sampler = emcee.EnsembleSampler( nwalkers, ndim,  lnprob,  pool=p)

  #w/ progress bar, & time the thing
  bar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=iter).start()
  for (idx,result) in enumerate(sampler.sample(pos0, iterations=iter, storechain=True)):
    bar.update(idx+1)
  bar.finish()

  p.close()

  print "Dumping chain to file..."
  np.save("mpisampler_%dwfs.npy" % numWaveforms, sampler.chain)
예제 #22
0
		parser.print_help()
		sys.exit(0)

	#Set verbosity level
	if cmd_args.verbose:
		logging.basicConfig(level=logging.DEBUG)
	else:
		logging.basicConfig(level=logging.INFO)

	#Initialize MPIPool
	try:
		pool = MPIPool()
	except:
		pool = None

	if (pool is not None) and not(pool.is_master()):
		
		pool.wait()
		pool.comm.Barrier()
		MPI.Finalize()
		sys.exit(0)

	#Set progressbar attributes
	widgets = ["Progress: ",progressbar.Percentage(),' ',progressbar.Bar(marker="+")]

	#Parse INI options file
	options = ConfigParser.ConfigParser()
	with open(cmd_args.options_file,"r") as configfile:
		options.readfp(configfile)

	#Read the save path from options
예제 #23
0
    #Smooth 1 arcmin
    conv_map.smooth(1.0 * arcmin, inplace=True)

    #Measure the moments
    return conv_map.moments(connected=True, dimensionless=True)


logging.basicConfig(level=logging.DEBUG)

try:
    pool = MPIPool()
except ValueError:
    pool = None

if (pool is not None) and not (pool.is_master()):

    pool.wait()
    sys.exit(0)

map_mock_ids = range(int(sys.argv[1]))

igs1_set = IGS1(
    root_path=
    "/Users/andreapetri/Documents/Columbia/spurious_shear/convergence_maps")
map_igs1_ids = igs1_set.getNames(z=1.0,
                                 realizations=range(1,
                                                    int(sys.argv[1]) + 1))

gen = GaussianNoiseGenerator(shape=(2048, 2048),
                             side_angle=3.41 * deg,
예제 #24
0
def main(argv):
    ##################
    #These change a lot
    numWaveforms = 16
    numThreads = 12

    ndim = 6 * numWaveforms + 8
    nwalkers = 2 * ndim

    iter = 50
    burnIn = 40
    wfPlotNumber = 10

    ######################

    #  plt.ion()

    fitSamples = 200

    #Prepare detector
    zero_1 = -5.56351644e+07
    pole_1 = -1.38796386e+04
    pole_real = -2.02559385e+07
    pole_imag = 9885315.37450211

    zeros = [zero_1, 0]
    poles = [pole_real + pole_imag * 1j, pole_real - pole_imag * 1j, pole_1]
    system = signal.lti(zeros, poles, 1E7)

    tempGuess = 77.89
    gradGuess = 0.0483
    pcRadGuess = 2.591182
    pcLenGuess = 1.613357

    #Create a detector model
    detName = "conf/P42574A_grad%0.2f_pcrad%0.2f_pclen%0.2f.conf" % (0.05, 2.5,
                                                                     1.65)
    det = Detector(detName,
                   temperature=tempGuess,
                   timeStep=1.,
                   numSteps=fitSamples * 10,
                   tfSystem=system)
    det.LoadFields("P42574A_fields_v3.npz")
    det.SetFields(pcRadGuess, pcLenGuess, gradGuess)

    tempIdx = -8
    gradIdx = -7
    pcRadIdx = -6
    pcLenIdx = -5
    #and the remaining 4 are for the transfer function

    fig_size = (20, 10)

    #Create a decent start guess by fitting waveform-by-waveform

    wfFileName = "P42574A_512waveforms_%drisetimeculled.npz" % numWaveforms
    if os.path.isfile(wfFileName):
        data = np.load(wfFileName)
        results = data['results']
        wfs = data['wfs']
        numWaveforms = wfs.size
    else:
        print "No saved waveforms available.  Loading from Data"
        exit(0)

    #prep holders for each wf-specific param
    r_arr = np.empty(numWaveforms)
    phi_arr = np.empty(numWaveforms)
    z_arr = np.empty(numWaveforms)
    scale_arr = np.empty(numWaveforms)
    t0_arr = np.empty(numWaveforms)
    smooth_arr = np.ones(numWaveforms) * 7.
    simWfArr = np.empty((1, numWaveforms, fitSamples))

    #Prepare the initial value arrays
    for (idx, wf) in enumerate(wfs):
        wf.WindowWaveformTimepoint(fallPercentage=.99)
        r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[
            idx], smooth_arr[idx] = results[idx]['x']
        t0_arr[
            idx] += 10  #because i had a different windowing offset back in the day

    #Plot the waveforms to take a look at the initial guesses
    if False:
        fig = plt.figure()
        for (idx, wf) in enumerate(wfs):

            print "WF number %d:" % idx
            print "  >>r: %f\n  >>phi %f\n  >>z %f\n  >>e %f\n  >>t0 %f\n >>smooth %f" % (
                r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx],
                t0_arr[idx], smooth_arr[idx])
            ml_wf = det.GetSimWaveform(r_arr[idx],
                                       phi_arr[idx],
                                       z_arr[idx],
                                       scale_arr[idx] * 100,
                                       t0_arr[idx],
                                       fitSamples,
                                       smoothing=smooth_arr[idx])
            plt.plot(ml_wf, color="b")
            plt.plot(wf.windowedWf, color="r")
        value = raw_input('  --> Press q to quit, any other key to continue\n')
        if value == 'q': exit(0)

    #Initialize this thread's globals
    initializeDetectorAndWaveforms(det, wfs)

    #Initialize the multithreading
    pool = MPIPool()
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    #Do the MCMC
    mcmc_startguess = np.hstack((
        r_arr[:],
        phi_arr[:],
        z_arr[:],
        scale_arr[:] * 100.,
        t0_arr[:],
        smooth_arr[:],  # waveform-specific params
        tempGuess,
        gradGuess,
        pcRadGuess,
        pcLenGuess,
        zero_1,
        pole_1,
        pole_real,
        pole_imag))  # detector-specific

    #number of walkers _must_ be even
    if nwalkers % 2:
        nwalkers += 1

    #Initialize walkers with a random, narrow ball around the start guess
    pos0 = [
        mcmc_startguess + 1e-2 * np.random.randn(ndim) * mcmc_startguess
        for i in range(nwalkers)
    ]

    #Make sure everything in the initial guess is within bounds
    for pos in pos0:
        pos[:numWaveforms] = np.clip(pos[:numWaveforms], 0,
                                     np.floor(det.detector_radius * 10.) / 10.)
        pos[numWaveforms:2 * numWaveforms] = np.clip(
            pos[numWaveforms:2 * numWaveforms], 0, np.pi / 4)
        pos[2 * numWaveforms:3 * numWaveforms] = np.clip(
            pos[2 * numWaveforms:3 * numWaveforms], 0,
            np.floor(det.detector_length * 10.) / 10.)
        pos[4 * numWaveforms:5 * numWaveforms] = np.clip(
            pos[4 * numWaveforms:5 * numWaveforms], 0, fitSamples)
        pos[5 * numWaveforms:6 * numWaveforms] = np.clip(
            pos[5 * numWaveforms:6 * numWaveforms], 0, 20.)

        pos[tempIdx] = np.clip(pos[tempIdx], 40, 120)
        pos[gradIdx] = np.clip(pos[gradIdx], det.gradList[0], det.gradList[-1])
        pos[pcRadIdx] = np.clip(pos[pcRadIdx], det.pcRadList[0],
                                det.pcRadList[-1])
        pos[pcLenIdx] = np.clip(pos[pcLenIdx], det.pcLenList[0],
                                det.pcLenList[-1])

        prior = lnprior(pos, )
        if not np.isfinite(prior):
            print "BAD PRIOR WITH START GUESS YOURE KILLING ME SMALLS"
            print pos
            exit(0)

    #Initialize, run the MCMC
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=p)

    #w/ progress bar, & time the thing
    start = timer()
    for (idx, result) in enumerate(
            sampler.sample(pos0, iterations=iter, storechain=True)):
        continue
    end = timer()

    pool.close()

    print "Elapsed time: " + str(end - start)

    print "Dumping chain to file..."
    np.save("sampler_mpi_%dwfs.npy" % numWaveforms, sampler.chain)

    print "Making MCMC steps figure..."

    #########  Plots for Waveform params
    stepsFig = plt.figure(2, figsize=fig_size)
    plt.clf()
    ax0 = stepsFig.add_subplot(611)
    ax1 = stepsFig.add_subplot(612, sharex=ax0)
    ax2 = stepsFig.add_subplot(613, sharex=ax0)
    ax3 = stepsFig.add_subplot(614, sharex=ax0)
    ax4 = stepsFig.add_subplot(615, sharex=ax0)
    ax5 = stepsFig.add_subplot(616, sharex=ax0)

    ax0.set_ylabel('r')
    ax1.set_ylabel('phi')
    ax2.set_ylabel('z')
    ax3.set_ylabel('scale')
    ax4.set_ylabel('t0')
    ax5.set_ylabel('smoothing')

    for i in range(nwalkers):
        for j in range(wfs.size):
            ax0.plot(sampler.chain[i, :, 0 + j], alpha=0.3)  # r
            ax1.plot(sampler.chain[i, :, numWaveforms + j], alpha=0.3)  # phi
            ax2.plot(sampler.chain[i, :, 2 * numWaveforms + j], alpha=0.3)  #z
            ax3.plot(sampler.chain[i, :, 3 * numWaveforms + j],
                     alpha=0.3)  #energy
            ax4.plot(sampler.chain[i, :, 4 * numWaveforms + j], alpha=0.3)  #t0
            ax5.plot(sampler.chain[i, :, 5 * numWaveforms + j],
                     alpha=0.3)  #smoothing

    plt.savefig("emcee_mpi_wfchain_%dwfs.png" % numWaveforms)

    #########  Plots for Detector params
    stepsFigDet = plt.figure(3, figsize=fig_size)
    plt.clf()
    ax0 = stepsFigDet.add_subplot(411)
    ax1 = stepsFigDet.add_subplot(412, sharex=ax0)
    ax2 = stepsFigDet.add_subplot(413, sharex=ax0)
    ax3 = stepsFigDet.add_subplot(414, sharex=ax0)

    ax0.set_ylabel('temp')
    ax1.set_ylabel('grad')
    ax2.set_ylabel('pcRad')
    ax3.set_ylabel('pcLen')

    for i in range(nwalkers):
        ax0.plot(sampler.chain[i, :, tempIdx], "b", alpha=0.3)  #temp
        ax1.plot(sampler.chain[i, :, gradIdx], "b", alpha=0.3)  #grad
        ax2.plot(sampler.chain[i, :, pcRadIdx], "b", alpha=0.3)  #pcrad
        ax3.plot(sampler.chain[i, :, pcLenIdx], "b", alpha=0.3)  #pclen

    plt.savefig("emcee_mpi_detchain_%dwfs.png" % numWaveforms)

    #and for the transfer function
    stepsFigTF = plt.figure(4, figsize=fig_size)
    plt.clf()
    tf0 = stepsFigTF.add_subplot(411)
    tf1 = stepsFigTF.add_subplot(412, sharex=ax0)
    tf2 = stepsFigTF.add_subplot(413, sharex=ax0)
    tf3 = stepsFigTF.add_subplot(414, sharex=ax0)
    tf0.set_ylabel('zero_1')
    tf1.set_ylabel('pole_1')
    tf2.set_ylabel('pole_real')
    tf3.set_ylabel('pole_imag')

    for i in range(nwalkers):
        tf0.plot(sampler.chain[i, :, -4], "b", alpha=0.3)  #2
        tf1.plot(sampler.chain[i, :, -3], "b", alpha=0.3)  #den1
        tf2.plot(sampler.chain[i, :, -2], "b", alpha=0.3)  #2
        tf3.plot(sampler.chain[i, :, -1], "b", alpha=0.3)  #3

    plt.savefig("emcee_mpi_tfchain_%dwfs.png" % numWaveforms)

    samples = sampler.chain[:, burnIn:, :].reshape((-1, ndim))

    print "temp is %f" % np.median(samples[:, tempIdx])
    print "grad is %f" % np.median(samples[:, gradIdx])
    print "pcrad is %f" % np.median(samples[:, pcRadIdx])
    print "pclen is %f" % np.median(samples[:, pcLenIdx])
    print "zero_1 is %f" % np.median(samples[:, -4])
    print "pole_1 is %f" % np.median(samples[:, -3])
    print "pole_real is %f" % np.median(samples[:, -2])
    print "pole_imag is %f" % np.median(samples[:, -1])

    #TODO: Aaaaaaand plot some waveforms..
    simWfs = np.empty((wfPlotNumber, numWaveforms, fitSamples))

    for idx, (theta) in enumerate(samples[np.random.randint(
            len(samples), size=wfPlotNumber)]):
        temp, impGrad, pcRad, pcLen = theta[tempIdx], theta[gradIdx], theta[
            pcRadIdx], theta[pcLenIdx]
        zero_1, pole_1, pole_real, pole_imag = theta[-4:]
        r_arr, phi_arr, z_arr, scale_arr, t0_arr, smooth_arr = theta[:-8].reshape(
            (6, numWaveforms))
        det.SetTemperature(temp)
        det.SetFields(pcRad, pcLen, impGrad)

        zeros = [zero_1, 0]
        poles = [
            pole_real + pole_imag * 1j, pole_real - pole_imag * 1j, pole_1
        ]
        det.SetTransferFunction(zeros, poles, 1E7)

        for wf_idx in range(wfs.size):
            wf_i = det.GetSimWaveform(r_arr[wf_idx], phi_arr[wf_idx],
                                      z_arr[wf_idx], scale_arr[wf_idx],
                                      t0_arr[wf_idx], fitSamples)
            simWfs[idx, wf_idx, :] = wf_i
            if wf_i is None:
                print "Waveform %d, %d is None" % (idx, wf_idx)

    residFig = plt.figure(4, figsize=(20, 15))
    helpers.plotManyResidual(simWfs, wfs, figure=residFig)
    plt.savefig("emcee_mpi_waveforms_%dwfs.png" % numWaveforms)
예제 #25
0
def run(N, p00=None, nwalkers=500):
    fn = chainDirRel+'.pickle'
    ndim =  17

    if p00 is not None:
        p0 = [p00*(1.0+0.001*np.random.randn( ndim )) for i in range(nwalkers)]
    else:
        p0 = [sampleFromPrior() for i in range(nwalkers)]

    restart = {}
    restart['currentPosition'] = p0
    restart['chain'] = None
    restart['state'] = None
    restart['prob'] = None
    restart['iterationCounter'] = 0
    restart['mcmcRunCounter'] = 0

    # Read in our past progress UNLESS we've been given a new starting location.
    if p00 is None:
        updateRestart(fn,restart)

    if restart['chain'] is not None:
        # This may save some time if you change something and forget to delete the .pickle file.
        restartedShape = np.shape(restart['chain'])
        print restartedShape, nwalkers, ndim
        assert restartedShape[0] == nwalkers
        assert restartedShape[2] == ndim

    global runNumber
    runNumber = restart['mcmcRunCounter']

    restart['iterationCounter'] += N
    restart['mcmcRunCounter'] += 1

    pool = MPIPool(comm=comm, loadbalance=True)
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnProb, pool=pool)
    #pos, prob, state = sampler.run_mcmc(restart['currentPosition'], N, rstate0=restart['state'], lnprob0=restart['prob'])

    for result in sampler.sample(restart['currentPosition'], iterations=N, lnprob0=restart['prob'], rstate0=restart['state']):

        pos, prob, state = result

        restart['acor'] = sampler.acor[:] # autocorr length for each param (ndim)
        restart['accept'] = sampler.acceptance_fraction[:]  # acceptance frac for each walker.
        restart['currentPosition'] = pos # same shape as p0: nwalkers x ndim
        restart['state'] = state # random number generator state
        restart['prob'] = prob # nwalkers x __
        if restart['chain'] is None:
            restart['chain'] = np.expand_dims(sampler.chain[:,0,:],1) # nwalkers x niterations x ndim
            restart['allProbs'] = np.expand_dims(prob,1)  # nwalkers x niterations
        else:
            print np.shape(restart['chain']), np.shape(sampler.chain[:,-1,:]), np.shape(sampler.chain)
            print restart['mcmcRunCounter'], restart['iterationCounter']
            #restart['chain'] = np.concatenate((restart['chain'], sampler.chain[:,-1,:]), axis=1)
            print "dbg1: ",np.shape(restart['chain']), np.shape(np.zeros((nwalkers, 1, ndim))), np.shape(np.expand_dims(pos,1))
            restart['chain'] = np.concatenate((restart['chain'], np.expand_dims(pos, 1)),axis=1)
            restart['allProbs'] = np.concatenate((restart['allProbs'], np.expand_dims(prob, 1)),axis=1)

        
        saveRestart(fn,restart)

    pool.close()
예제 #26
0
def mcmc_mpi(Nwalkers, Niters, Mr, prior_name = 'first_try', pois = False): 
    '''
    Parameters
    -----------
    - Nwalker : 
        Number of walkers
    - Nchains : 
        Number of MCMC chains   
    '''
    #data and covariance matrix
    fake_obs_icov = Data.load_covariance(Mr , pois = False)
    fake_obs = Data.load_data(Mr)
        
    # True HOD parameters
    data_hod = Data.load_dechod_random_guess(Mr)
    Ndim = len(data_hod)

    # Priors
    prior_min, prior_max = PriorRange(prior_name , Mr)
    prior_range = np.zeros((len(prior_min),2))
    prior_range[:,0] = prior_min
    prior_range[:,1] = prior_max
    
    # mcmc chain output file 
    chain_file_name = ''.join([util.mcmc_dir(),'group_nopoisson_mcmc_chain_Mr',str(Mr),'.hdf5'])
 

    if os.path.isfile(chain_file_name) and continue_chain:   
        print 'Continuing previous MCMC chain!'
        sample = h5py.File(chain_file_name , "r") 
        Nchains = Niters - len(sample) # Number of chains left to finish 
        if Nchains > 0: 
            pass
        else: 
            raise ValueError
        print Nchains, ' iterations left to finish'

        # Initializing Walkers from the end of the chain 
        pos0 = sample[-Nwalkers:]
    else:
        # new chain
        print "chain_file_name=" , chain_file_name
 
        sample_file = h5py.File(chain_file_name , 'w')
        sample_file.create_dataset("mcmc",(Niters, Nwalkers, Ndim), data = np.zeros((Niters, Nwalkers , Ndim)))
        sample_file.close()
         
        # Initializing Walkers
        random_guess = data_hod
        pos0 = np.repeat(random_guess, Nwalkers).reshape(Ndim, Nwalkers).T + \
                         5.e-2 * np.random.randn(Ndim * Nwalkers).reshape(Nwalkers, Ndim)
    print "initial position of the walkers = " , pos0.shape
    # Initializing MPIPool
    pool = MPIPool(loadbalance=True)
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    # Initializing the emcee sampler
    hod_kwargs = {
            'prior_range': prior_range, 
            'data': fake_obs, 
            'data_icov': fake_obs_icov, 
            'Mr': Mr
            }
    sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnPost, pool=pool, kwargs=hod_kwargs)

    cnt = 0

    # Initializing Walkers 
    for result in sampler.sample(pos0, iterations = Niters, storechain=False):
        position = result[0]
        sample_file = h5py.File(chain_file_name)
        sample_file["mcmc"][cnt] = position
        sample_file.close()
        print "iteration=" , cnt
        cnt += 1
        pass
    pool.close()
예제 #27
0
    def sample(self):
        '''
        Run the MCMC.
        '''
        # First make sure that the maximum likelihood params are fitted
        if not self.minimized:
            self.approximate_ml()
        # print(self.params_all)

        ndim, nwalkers = len(self.params_vary), self.config['NWALKERS']
        p0 = np.zeros((nwalkers, len(self.params_vary)))
        pml = [self.params_all[pname] for pname in self.params_vary]

        for pnum, pname in enumerate(self.params_vary):
            p0[:, pnum] = (np.random.randn(nwalkers)\
            * self.config['SAMPLE_BALL']+1.)*pml[pnum]

        plist = []

        for key in self.params_vary.keys():
            plist.append(key)

        args = (self.freqs, self.tb_meas, self.var_tb, self.params_all, plist,
                self.params_vary, self.fg_model, self.sig_model)

        if self.config['MPI']:
            from emcee.utils import MPIPool
            pool = MPIPool()

            if not pool.is_master():
                pool.wait()
                sys.exit(0)
                self.sampler = emcee.EnsembleSampler(nwalkers,
                                                     ndim,
                                                     lnprob,
                                                     args=args,
                                                     pool=pool)

            self.sampler.run_mcmc(p0, self.config['NBURN'])  # burn in

            p0 = self.sampler.chain[:, -1, :].squeeze()

            self.sampler.reset()
            self.sampler.run_mcmc(p0, self.config['NSTEPS'])
            pool.close()
        else:
            if self.config['SAMPLER'] == 'PARALLELTEMPERING':
                logl = lambda x: lnlike(
                    x, self.freqs, self.tb_meas, self.var_tb, self.params_all,
                    self.params_vary, self.fg_model, self.sig_model)

                logp = lambda x: lnprior(x, self.params_vary.keys(), self.
                                         params_vary)

                self.sampler = ptemcee.Sampler(
                    ntemps=self.config['NTEMPS'],
                    nwalkers=self.config['NWALKERS'],
                    dim=self.ndim,
                    logl=logl,
                    logp=logp)
            else:
                self.sampler = emcee.EnsembleSampler(
                    nwalkers=self.config['NWALKERS'],
                    ndim=ndim,
                    log_prob_fn=lnprob,
                    args=args,
                    threads=self.config['THREADS'])

            # If we use PT sampling, we need a further dimension of
            # start parameters for the different temperatures
            if self.config['SAMPLER'] == 'PARALLELTEMPERING':
                p0 = np.array([p0 for m in range(self.config['NTEMPS'])])

            # Run the MCMC for the burn-in
            self.sampler.run_mcmc(p0,
                                  self.config['NBURN'],
                                  thin=self.config['NTHIN'])

            # Reset after burn-in and run the full chain
            if self.config['SAMPLER'] == 'PARALLELTEMPERING':
                p0 = self.sampler.chain[:, :, -1, :]
            else:
                p0 = self.sampler.chain[:, -1, :].squeeze()
            self.sampler.reset()
            self.sampler.run_mcmc(p0,
                                  self.config['NSTEPS'],
                                  thin=self.config['NTHIN'])

        # Create output directory
        if not os.path.exists(self.config['PROJECT_NAME']):
            os.makedirs(self.config['PROJECT_NAME'])

        # Save output and configuration
        with open(os.path.join(self.config['PROJECT_NAME'], 'config.yaml'),
                  'w') as f:
            yaml.dump(self.config, f, default_flow_style=False)

        with open(os.path.join(self.config['PROJECT_NAME'], 'ml_params.yaml'),
                  'w') as f:
            yaml.dump(self.params_all, f, default_flow_style=False)

        self.sampled = True

        # Collect result parameters
        ###########################
        resultdict = {}

        # Chain
        #######
        resultdict['chain'] = self.sampler.chain,

        # Conservative evidence
        #######################
        if (self.config['COMPUTECOVARIANCE'] &
            (self.config['SAMPLER'] == 'ENSEMBLESAMPLER')):

            # Estimate autocorrelation
            self.acors = self.sampler.acor.astype(int)
            resultdict['autocorrs'] = self.acors

            # Estimate covariance
            self.cov_samples = np.zeros(
                (len(self.params_vary), len(self.params_vary)))
            resultdict['cov_samples'] = self.cov_samples

            for i in range(len(self.params_vary)):
                for j in range(len(self.params_vary)):
                    stepsize = np.max([self.acors[i], self.acors[j]])
                    csample_i = self.sampler.chain[i, ::stepsize, :].flatten()
                    csample_j = self.sampler.chain[j, ::stepsize, :].flatten()
                    self.cov_samples[i, j] = np.mean(
                        (csample_i - csample_i.mean()) *
                        (csample_j - csample_j.mean()))

            # Compute conservative evidence without prior factor
            self.conservative_evidence = np.exp(self.ln_ml) / np.sqrt(
                np.linalg.det(self.cov_samples))
            resultdict['conservative_evidence'] = self.conservative_evidence

        # Evidence from thermodynamic integration from the PT sampler
        #############################################################
        if self.config['SAMPLER'].lower() == 'paralleltempering':
            self.logz, self.dlogz = self.sampler.log_evidence_estimate(
                fburnin=0.)

            resultdict['log_thd_evidence'] = self.logz
            resultdict['dlog_thd_evidence'] = self.dlogz

        # Posterior mean
        # The posterior mean values of the parameters
        ###############
        post_mean_vals = np.mean(self.sampler.flatchain, axis=0)
        resultdict['post_mean_vals'] = post_mean_vals

        # Likelihood
        # The value of the posterior for the best-fit results
        ############
        logL = self.sampler.log_prob_fn(post_mean_vals)
        resultdict['logL'] = logL

        # Save as .npz
        np.savez(os.path.join(self.config['PROJECT_NAME'], 'output.npz'),
                 **resultdict)
예제 #28
0
def rerunPosteriorPredictive():
    ''' Rerun the posterior predictive distribution. This can be used to e.g. increase the resolution
        spatially or in terms of the age of stellar populations, or vary some parameter systematically.
        The mandatory argument func is a user-provided function that specifies how a model with known
        parameters should be modified and (re) run.'''
    pool = MPIPool(comm=comm, loadbalance=True)
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    output = readoutput.Experiment(chainDirRel+'-ppd') # read in the posterior predictive distribution.
    output.read(paramsOnly=True,keepStars=False)
    emcee_params = []
    print "output.models: ",len(output.models)
    # For each model, take the parameters we have read in and construct the corresponding emcee parameters.
    for model in output.models:
        #eta, epsff, fg0, muNorm, muMhScaling, fixedQ, accScaleLength, fcool, Mh0, fscatter, x0, x1, x2, x3, obsScale, conRF, muHgScaling = emceeParams
        eta = model.p['eta']
        epsff = model.p['epsff']
        fg0 = model.p['fg0']
        muNorm = model.p['muNorm']
        muMhScaling = model.p['muMhScaling']
        fixedQ = model.p['fixedQ']
        accScaleLength = model.p['accScaleLength']
        fcool = model.p['fcool']
        Mh0 = model.p['Mh0']
        fscatter = model.p['fscatter']
        x0 = model.p['x0']
        x1 = model.p['x1']
        x2 = model.p['x2']
        x3 = model.p['x3']
        obsScale = 1.0 # doesn't matter.. see below
        conRF = model.p['concentrationRandomFactor']
        muHgScaling = model.p['muHgScaling']
        # We have everything except obsScale, but actually that doesn't matter,
        # since it only affects the model in post-processing, i.e. in comparing to the data,
        # not the running of the model itself. So..... we good!
        theList = [ eta, epsff, fg0, muNorm, muMhScaling, fixedQ, accScaleLength, fcool, Mh0, fscatter, x0, x1, x2, x3, obsScale, conRF, muHgScaling]
        try:
            assert eta>0 and epsff>0 and fg0>0 and fg0<=1 and fixedQ>0 and muNorm>=0 and fcool>=0 and fcool<=1 and Mh0>0
        except:
            print 'Unexpected ppd params: ',theList
        emcee_params.append( copy.deepcopy(theList) )
    # OK, from here on out, we just need to emulate parts of the run() function to trick emcee into running a single iteration of the algorithm with this IC.


    ndim =  17

    restart = {}
    restart['currentPosition'] = emcee_params
    restart['chain'] = None
    restart['state'] = None
    restart['prob'] = None
    restart['iterationCounter'] = 0
    restart['mcmcRunCounter'] = 0



    nwalkers = len(emcee_params) # Need one walker per sample from posterior predictive distribution
    print "Starting up the ensemble sampler!"
    sampler = emcee.EnsembleSampler(nwalkers, ndim, fakeProb, pool=pool)
    #pos, prob, state = sampler.run_mcmc(restart['currentPosition'], N, rstate0=restart['state'], lnprob0=restart['prob'])
    print "Take a step with the ensemble sampler"

    # Take a single step with the ensemble sampler.
    print np.shape(restart['currentPosition']), np.shape(np.random.uniform(0,1,nwalkers))
    sampler._get_lnprob(pos = restart['currentPosition'])

    #result = sampler.sample(restart['currentPosition'], iterations=1, lnprob0=None, rstate0=None)

    #pos, prob, state = result
    print "Close the pool"

    pool.close()
예제 #29
0
파일: mcmc.py 프로젝트: Jonasori/astrocail
def run_emcee_simple(run_name,
                     nsteps,
                     nwalkers,
                     lnprob,
                     to_vary,
                     burn_in=0,
                     pool=False,
                     resume=False):
    """A new version of run_emcee.

    Args:
        run_name (str):
        nsteps (int):
        nwalkers (int):
        lnprob (function?): I think lnprob here is a function?
                            Maybe equivalent in docs to lnpostfn
        to_vary (list of lists):
        burn_in (int?): how many steps to remove from the front
        pool (bool): Want to parallelize?
        resume (bool): Are you resuming a previous run?
    """
    # Set up parallelization
    if pool:
        pool = MPIPool()
        if not pool.is_master():
            pool.wait()
            sys.exit(0)

    start = time.time()
    # initiate sampler chain
    ndim = len(to_vary[0])
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=pool)

    # Name the chain we're looking for
    chain_filename = run_name + '/' + run_name + '_chain.csv'

    # This seems a little risky bc if you forget, you just overwrite.
    if resume:
        chain = pd.read_csv(chain_filename)
        start_step = chain.index[-1] // nwalkers
        print('Resuming {} at step {}'.format(run_name, start_step))

        with open(chain_filename, 'a') as f:
            f.write('\n')
        pos = np.array(chain.iloc[-nwalkers:, :-1])

    else:
        sp.call('rm -rf ' + run_name, shell=True)
        sp.call(['mkdir', run_name])
        print('Starting {}'.format(run_name))
        start_step = 0

        with open(chain_filename, 'w') as f:
            f.write(','.join([param[0]
                              for param in to_vary] + ['lnprob']) + '\n')
        pos = [[param[1] + param[2] * np.random.randn() for param in to_vary]
               for i in range(nwalkers)]

    # Run the sampler and then query it
    run = sampler.sample(pos, iterations=nsteps, storechain=False)
    """Note that sampler.sample returns:
            pos: list of the walkers' current positions in an object of shape
                    [nwalkers, ndim]
            lnprob: The list of log posterior probabilities for the walkers at
                    positions given by pos.
                    The shape of this object is (nwalkers, dim)
            rstate: The current state of the random number generator.
            blobs (optional): The metadata "blobs" associated with the current
                              position. The value is only returned if
                              lnpostfn returns blobs too.
            """
    for i, result in enumerate(run):
        print "Step {}".format(start_step + i)
        # Where did chisum come from? What is it returning?
        pos, chisum, blob = result
        with open(run_name + '/' + run_name + '_chain.csv', 'a') as f:
            for i in range(nwalkers):
                f.write(','.join(map(str, np.append(pos[i], chisum[i]))) +
                        '\n')
    print('{} samples in {:.1f} seconds'.format(nsteps * nwalkers,
                                                time.time() - start))

    if pool:
        pool.close()

    return MCMCrun(run_name, nwalkers, burn_in=burn_in)
예제 #30
0
파일: mcmc.py 프로젝트: Jonasori/astrocail
def run_emcee(run_name, nsteps, nwalkers, lnprob, to_vary):
    """The heart of it.

    Args:
        run_name (str): the name to output I guess
        nsteps (int):
        nwalkers (int):
        lnprob (something):
        to_vary (list of lists): list of [param name,
                                          initial_position_center,
                                          initial_position_sigma,
                                          (prior low bound, prior high bound)]
                                for each parameter.
                                The second two values set the position & size
                                for a random Gaussian ball of initial positions
    """
    # Something to do with parallelizing
    pool = MPIPool()
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    # initiate sampler chain
    ndim = len(to_vary)
    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    lnprob,
                                    args=(run_name, to_vary),
                                    pool=pool)

    # Name the chain we're looking for
    chain_filename = run_name + '/' + run_name + '_chain.csv'
    # Try to resume an existing run of this name.
    try:
        # Read in an existing chain
        chain = pd.read_csv(chain_filename)
        start_step = chain.index[-1] // nwalkers
        print 'Resuming {} at step {}'.format(run_name, start_step)
        pos = np.array(chain.iloc[-nwalkers:, :-1])

        # If we're adding new steps, just put in a new line and get started.
        with open(chain_filename, 'a') as f:
            f.write('\n')
        end = np.array(chain.iloc[-nwalkers:, :])
        print 'Start step: {}'.format(np.mean(end[:, -1]))

    # If there's no pre-existing run, set one up.
    except IOError:

        sp.call(['mkdir', run_name])
        sp.call(['mkdir', run_name + '/model_files'])

        print 'Starting {}'.format(run_name)

        start_step = 0
        # Start a new file for the chain
        # Set up a header line
        with open(chain_filename, 'w') as f:
            param_names = [param[0] for param in to_vary]
            np.savetxt(f, (np.append(param_names, 'lnprob'), ),
                       delimiter=',',
                       fmt='%s')

        # Set up initial positions?
        """I think this is saying the same thing as the nested list comps.
        pos = []
        for i in range(nwalkers):
            for param in to_vary:
                pos.append(param[1] + param[2]*np.random.randn())
                """
        # randn makes an n-dimensional array of rands in [0,1]
        pos = [[param[1] + param[2] * np.random.randn() for param in to_vary]
               for i in range(nwalkers)]

    # Initialize the lnprob list
    lnprobs = []
    first_sample = sampler.sample(pos, iterations=nsteps, storechain=False)

    for i, result in enumerate(first_sample):
        """Enumerate returns a tuple the element and a counter.
            tuples = [t for t in enumerate(['a', 'b', 'c'])]
            counters = [c for c, l in enumerate(['a', 'b', 'c'])]
            """
        old_lnprobs = np.copy(lnprobs)
        pos, lnprobs, blob = result
        print "Step {}: {}".format(start_step + i, np.mean(lnprobs))
        # print('Acceptances: {}'.format([lnprob for lnprob in lnprobs if lnprob not in old_lnprobs]))
        # print('')
        # print(lnprobs)
        # print(np.mean(pos))

        # Log out the new positions
        with open(chain_filename, 'a') as f:
            new_step = [np.append(pos[k], lnprobs[k]) for k in range(nwalkers)]
            np.savetxt(f, new_step, delimiter=',')

    pool.close()
예제 #31
0
def LensModelMCMC(data,lens,source,
                  xmax=30.,highresbox=[-3.,3.,-3.,3.],emitres=None,fieldres=None,
                  sourcedatamap=None, scaleamp=False, shiftphase=False,
                  modelcal=True,cosmo=Planck15,
                  nwalkers=1e3,nburn=1e3,nstep=1e3,pool=None,nthreads=1,mpirun=False):
      """
      Wrapper function which basically takes what the user wants and turns it into the
      format needed for the acutal MCMC lens modeling.
      
      Inputs:
      data:
            One or more visdata objects; if multiple datasets are being
            fit to, should be a list of visdata objects.
      lens:
            Any of the currently implemented lens objects or ExternalShear.
      source:
            One or more of the currently implemented source objects; if more than
            one source to be fit, should be a list of multiple sources.
      xmax:
            (Half-)Grid size, in arcseconds; the grid will span +/-xmax in x&y
      highresbox:
            The region to model at higher resolution (to account for high-magnification
            and differential lensing effects), as [xmin, xmax, ymin, ymax]. 
            Note the sign convention is: +x = West, +y = North, like the lens
            positions.
      sourcedatamap:
            A list of length the number of datasets which tells which source(s)
            are to be fit to which dataset(s). Eg, if two sources are to be fit
            to two datasets jointly, should be [[0,1],[0,1]]. If we have four
            sources and three datasets, could be [[0,1],[0,1],[2,3]] to say that the
            first two sources should both be fit to the first two datasets, while the
            second two should be fit to the third dataset. If None, will assume
            all sources should be fit to all datasets.
      scaleamp:
            A list of length the number of datasets which tells whether a flux
            rescaling is allowed and which dataset the scaling should be relative to.
            False indicates no scaling should be done, while True indicates that
            amplitude scaling should be allowed.
      shiftphase:
            Similar to scaleamp above, but allowing for positional/astrometric offsets.
      modelcal:
            Whether or not to perform the pseudo-selfcal procedure of H+13
      cosmo:
            The cosmology to use, as an astropy object, e.g.,
            from astropy.cosmology import WMAP9; cosmo=WMAP9
            Default is Planck15.
      nwalkers:
            Number of walkers to use in the mcmc process; see dan.iel.fm/emcee/current
            for more details.
      nburn:
            Number of burn-in steps to take with the chain.
      nstep:
            Number of actual steps to take in the mcmc chains after the burn-in
      nthreads:
            Number of threads (read: cores) to use during the fitting, default 1.
      mpirun:
            Whether to parallelize using MPI instead of multiprocessing. If True,
            nthreads has no effect, and your script should be run with, eg,
            mpirun -np 16 python lensmodel.py.

      Returns:
      mcmcresult:
            A nested dict containing the chains requested. Will have all the MCMC
            chain results, plus metadata about the run (initial params, data used,
            etc.). Formatting still a work in progress (esp. for modelcal phases).
      chains:
            The raw chain data, for testing.
      blobs:
            Everything else returned by the likelihood function; will have
            magnifications and any modelcal phase offsets at each step; eventually
            will remove this once get everything packaged up for mcmcresult nicely.
      colnames:
            Basically all the keys to the mcmcresult dict; eventually won't need
            to return this once mcmcresult is packaged up nicely.
      """

      if pool: nthreads = 1
      elif mpirun:
            nthreads = 1
            from emcee.utils import MPIPool
            pool = MPIPool(debug=False,loadbalance=True)
            if not pool.is_master():
            	pool.wait()
            	sys.exit(0)
      else: pool = None

      # Making these lists just makes later stuff easier since we now know the dtype
      lens = list(np.array([lens]).flatten())
      source = list(np.array([source]).flatten()) # Ensure source(s) are a list
      data = list(np.array([data]).flatten())     # Same for dataset(s)
      scaleamp = list(np.array([scaleamp]).flatten())
      shiftphase = list(np.array([shiftphase]).flatten())
      modelcal = list(np.array([modelcal]).flatten())
      if len(scaleamp)==1 and len(scaleamp)<len(data): scaleamp *= len(data)
      if len(shiftphase)==1 and len(shiftphase)<len(data): shiftphase *= len(data)
      if len(modelcal)==1 and len(modelcal)<len(data): modelcal *= len(data)
      if sourcedatamap is None: sourcedatamap = [None]*len(data)

      # emcee isn't very flexible in terms of how it gets initialized; start by
      # assembling the user-provided info into a form it likes
      ndim, p0, colnames = 0, [], []
      # Lens(es) first
      for i,ilens in enumerate(lens):
            if ilens.__class__.__name__=='SIELens':
                  for key in ['x','y','M','e','PA']:
                        if not vars(ilens)[key]['fixed']:
                              ndim += 1
                              p0.append(vars(ilens)[key]['value'])
                              colnames.append(key+'L'+str(i))
            elif ilens.__class__.__name__=='ExternalShear':
                  for key in ['shear','shearangle']:
                        if not vars(ilens)[key]['fixed']:
                              ndim += 1
                              p0.append(vars(ilens)[key]['value'])
                              colnames.append(key)
      # Then source(s)
      for i,src in enumerate(source):
            if src.__class__.__name__=='GaussSource':
                  for key in ['xoff','yoff','flux','width']:
                        if not vars(src)[key]['fixed']:
                              ndim += 1
                              p0.append(vars(src)[key]['value'])
                              colnames.append(key+'S'+str(i))
            elif src.__class__.__name__=='SersicSource':
                  for key in ['xoff','yoff','flux','majax','index','axisratio','PA']:
                        if not vars(src)[key]['fixed']:
                              ndim += 1
                              p0.append(vars(src)[key]['value'])
                              colnames.append(key+'S'+str(i))
            elif src.__class__.__name__=='PointSource':
                  for key in ['xoff','yoff','flux']:
                        if not vars(src)[key]['fixed']:
                              ndim += 1
                              p0.append(vars(src)[key]['value'])
                              colnames.append(key+'S'+str(i))
      # Then flux rescaling; only matters if >1 dataset
      for i,t in enumerate(scaleamp[1:]):
            if t:
                  ndim += 1
                  p0.append(1.) # Assume 1.0 scale factor to start
                  colnames.append('ampscale_dset'+str(i+1))
      # Then phase/astrometric shift; each has two vals for a shift in x&y
      for i,t in enumerate(shiftphase[1:]):
            if t:
                  ndim += 2
                  p0.append(0.); p0.append(0.) # Assume zero initial offset
                  colnames.append('astromshift_x_dset'+str(i+1))
                  colnames.append('astromshift_y_dset'+str(i+1))

      # Get any model-cal parameters set up. The process involves some expensive
      # matrix inversions, but these only need to be done once, so we'll do them
      # now and pass the results as arguments to the likelihood function. See docs
      # in calc_likelihood.model_cal for more info.
      for i,dset in enumerate(data):
            if modelcal[i]:
                  uniqant = np.unique(np.asarray([dset.ant1,dset.ant2]).flatten())
                  dPhi_dphi = np.zeros((uniqant.size-1,dset.u.size))
                  for j in range(1,uniqant.size):
                        dPhi_dphi[j-1,:]=(dset.ant1==uniqant[j])-1*(dset.ant2==uniqant[j])
                  C = scipy.sparse.diags((dset.sigma/dset.amp)**-2.,0)
                  F = np.dot(dPhi_dphi,C*dPhi_dphi.T)
                  Finv = np.linalg.inv(F)
                  FdPC = np.dot(-Finv,dPhi_dphi*C)
                  modelcal[i] = [dPhi_dphi,FdPC]


      # Create our lensing grid coordinates now, since those shouldn't be
      # recalculated with every call to the likelihood function
      xmap,ymap,xemit,yemit,indices = GenerateLensingGrid(data,xmax,highresbox,
                                                fieldres,emitres)

      # Calculate the uv coordinates we'll interpolate onto; only need to calculate
      # this once, so do it here.
      kmax = 0.5/((xmap[0,1]-xmap[0,0])*arcsec2rad)
      ug = np.linspace(-kmax,kmax,xmap.shape[0])

      # Calculate some distances; we only need to calculate these once.
      # This assumes multiple sources are all at same z; should be this
      # way anyway or else we'd have to deal with multiple lensing planes
      if cosmo is None: cosmo = Planck15
      Dd = cosmo.angular_diameter_distance(lens[0].z).value
      Ds = cosmo.angular_diameter_distance(source[0].z).value
      Dds= cosmo.angular_diameter_distance_z1z2(lens[0].z,source[0].z).value

      p0 = np.array(p0)
      # Create a ball of starting points for the walkers, gaussian ball of 
      # 10% width; if initial value is 0 (eg, astrometric shift), give a small sigma
      # for angles, generally need more spread than 10% to sample well, do 30% for those cases [~0.5% >180deg for p0=100deg]
      isangle = np.array([0.30 if 'PA' in s or 'angle' in s else 0.1 for s in colnames])
      initials = emcee.utils.sample_ball(p0,np.asarray([isangle[i]*x if x else 0.05 for i,x in enumerate(p0)]),int(nwalkers))

      # All the lens objects know if their parameters have been altered since the last time
      # we calculated the deflections. If all the lens pars are fixed, we only need to do the
      # deflections once. This step ensures that the lens object we create the sampler with
      # has these initial deflections.
      for i,ilens in enumerate(lens):
            if ilens.__class__.__name__ == 'SIELens': ilens.deflect(xemit,yemit,Dd,Ds,Dds)
            elif ilens.__class__.__name__ == 'ExternalShear': ilens.deflect(xemit,yemit,lens[0])

      # Create the sampler object; uses calc_likelihood function defined elsewhere
      lenssampler = emcee.EnsembleSampler(nwalkers,ndim,calc_vis_lnlike,
            args = [data,lens,source,Dd,Ds,Dds,ug,
                    xmap,ymap,xemit,yemit,indices,
                    sourcedatamap,scaleamp,shiftphase,modelcal],
            threads=nthreads,pool=pool)

      
      # Run burn-in phase
      print "Running burn-in... "
      #pos,prob,rstate,mus = lenssampler.run_mcmc(initials,nburn,storechain=False)
      for i,result in enumerate(lenssampler.sample(initials,iterations=nburn,storechain=False)):
            if i%20==0: print 'Burn-in step ',i,'/',nburn
            pos,prob,rstate,blob = result
      
      
      lenssampler.reset()
      
      # Run actual chains
      print "Done. Running chains... "
      for i,result in enumerate(lenssampler.sample(pos,rstate0=rstate,iterations=nstep,storechain=True)):
            if i%20==0: print 'Chain step ',i,'/',nstep
      
      #lenssampler.run_mcmc(pos,nstep,rstate0=rstate)
      if mpirun: pool.close()
      print "Mean acceptance fraction: ",np.mean(lenssampler.acceptance_fraction)

      #return lenssampler.flatchain,lenssampler.blobs,colnames
      
      # Package up the magnifications and modelcal phases; disregards nan points (where
      # we failed the prior, usu. because a periodic angle wrapped).
      blobs = lenssampler.blobs
      mus = np.asarray([[a[0] for a in l] for l in blobs]).flatten(order='F')
      bad = np.where(np.asarray([np.any(np.isnan(m)) for m in mus],dtype=bool))[0]
      for k in bad: mus[k] = np.array([np.nan]*len(source))
      mus = np.asarray(list(mus),dtype=float).reshape((-1,len(source)),order='F') # stupid-ass hack
      bad = np.isnan(mus)[:,0]
      #bad = bad.reshape((-1,len(source)),order='F')[:,0]
      #mus = np.atleast_2d(np.asarray([mus[i] if not bad[i] else [np.nan]*len(source) for i in range(mus.size)])).T
      colnames.extend(['mu{0:.0f}'.format(i) for i in range(len(source))])

      
      # Assemble the output. Want to return something that contains both the MCMC chains
      # themselves, but also metadata about the run.
      mcmcresult = {}

      # keep track of git revision, for reproducibility's sake
      # if run under mpi, this will spew some scaremongering warning text,
      # but it's fine. use --mca mpi_warn_on_fork 0 in the mpirun statement to disable
      try: 
            import subprocess
            gitd = os.path.abspath(os.path.join(os.path.dirname(__file__),os.pardir))
            mcmcresult['githash'] = subprocess.check_output('git --git-dir={0:s} --work-tree={1:s} '\
                  'rev-parse HEAD'.format(gitd+'/.git',gitd),shell=True).rstrip()
      except:
            mcmcresult['githash'] = 'No repo found'
      
      
      mcmcresult['datasets'] = [dset.filename for dset in data] # Data files used

      mcmcresult['lens_p0'] = lens      # Initial params for lens,src(s),shear; also tells if fixed, priors, etc.
      mcmcresult['source_p0'] = source
      
      if sourcedatamap: mcmcresult['sourcedatamap'] = sourcedatamap
      mcmcresult['xmax'] = xmax
      mcmcresult['highresbox'] = highresbox
      mcmcresult['fieldres'] = fieldres
      mcmcresult['emitres'] = emitres
      if any(scaleamp): mcmcresult['scaleamp'] = scaleamp
      if any(shiftphase): mcmcresult['shiftphase'] = shiftphase

      mcmcresult['chains'] = np.core.records.fromarrays(np.hstack((lenssampler.flatchain[~bad],mus[~bad])).T,names=colnames)
      mcmcresult['lnlike'] = lenssampler.flatlnprobability[~bad]
      
      # Keep track of best-fit params, derived from chains.
      c = copy.deepcopy(mcmcresult['chains'])
      mcmcresult['best-fit'] = {}
      pbest = []
      # Calculate the best fit values as medians of each param
      lens,source = copy.deepcopy(mcmcresult['lens_p0']), copy.deepcopy(mcmcresult['source_p0'])
      for i,ilens in enumerate(lens):
            if ilens.__class__.__name__ == 'SIELens':
                  ilens.__dict__['_altered'] = True
                  for key in ['x','y','M','e','PA']:
                        if not vars(ilens)[key]['fixed']:
                              ilens.__dict__[key]['value'] = np.median(c[key+'L'+str(i)])
                              pbest.append(np.median(c[key+'L'+str(i)]))
            elif ilens.__class__.__name__ == 'ExternalShear':
                  for key in ['shear','shearangle']:
                        if not vars(ilens)[key]['fixed']:
                              ilens.__dict__[key]['value'] = np.median(c[key])
                              pbest.append(np.median(c[key]))
      
      mcmcresult['best-fit']['lens'] = lens

      # now do the source(s)
      for i,src in enumerate(source): # Source is a list of source objects
            if src.__class__.__name__ == 'GaussSource':
                  for key in ['xoff','yoff','flux','width']:
                        if not vars(src)[key]['fixed']:
                              src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)])
                              pbest.append(np.median(c[key+'S'+str(i)]))
            elif src.__class__.__name__ == 'SersicSource':
                  for key in ['xoff','yoff','flux','majax','index','axisratio','PA']:
                        if not vars(src)[key]['fixed']:
                              src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)])
                              pbest.append(np.median(c[key+'S'+str(i)]))
            elif src.__class__.__name__ == 'PointSource':
                  for key in ['xoff','yoff','flux']:
                        if not vars(src)[key]['fixed']:
                              src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)])
                              pbest.append(np.median(c[key+'S'+str(i)]))

      mcmcresult['best-fit']['source'] = source
      mcmcresult['best-fit']['magnification'] = np.median(mus[~bad],axis=0)

      # Any amplitude scaling or astrometric shifts
      bfscaleamp = np.ones(len(data))
      if 'scaleamp' in mcmcresult.keys():
            for i,t in enumerate(mcmcresult['scaleamp']): # only matters if >1 datasets
                  if i==0: pass
                  elif t: 
                        bfscaleamp[i] = np.median(c['ampscale_dset'+str(i)])
                        pbest.append(np.median(c['ampscale_dset'+str(i)]))
                  else: pass
      mcmcresult['best-fit']['scaleamp'] = bfscaleamp
      
      bfshiftphase = np.zeros((len(data),2))
      if 'shiftphase' in mcmcresult.keys():
            for i,t in enumerate(mcmcresult['shiftphase']):
                  if i==0: pass # only matters if >1 datasets
                  elif t:
                        bfshiftphase[i][0] = np.median(c['astromshift_x_dset'+str(i)])
                        bfshiftphase[i][1] = np.median(c['astromshift_y_dset'+str(i)])
                        pbest.append(np.median(c['astromshift_x_dset'+str(i)]))
                        pbest.append(np.median(c['astromshift_y_dset'+str(i)]))
                  else: pass # no shifting
      mcmcresult['best-fit']['shiftphase'] = bfshiftphase
      
      mcmcresult['best-fit']['lnlike'] = calc_vis_lnlike(pbest,data,mcmcresult['best-fit']['lens'],
            mcmcresult['best-fit']['source'],
            Dd,Ds,Dds,ug,xmap,ymap,xemit,yemit,indices,
            sourcedatamap,scaleamp,shiftphase,modelcal)[0]
      
      # Calculate the deviance information criterion, using the Spiegelhalter+02 definition (cf Gelman+04)
      mcmcresult['best-fit']['DIC'] = -4*np.mean(mcmcresult['lnlike']) + 2*mcmcresult['best-fit']['lnlike']
      
      # If we did any modelcal stuff, keep the antenna phase offsets here
      if any(modelcal): 
            mcmcresult['modelcal'] = [True if j else False for j in modelcal]
            dp = np.squeeze(np.asarray([[a[1] for a in l if ~np.any(np.isnan(a[0]))] for l in blobs]))
            a = [x for l in dp for x in l] # Have to dick around with this if we had any nan's
            dphases = np.squeeze(np.reshape(a,(nwalkers*nstep-bad.sum(),len(data),-1),order='F'))
            if len(data) > 1: 
                  for i in range(len(data)):
                        if modelcal[i]: mcmcresult['calphases_dset'+str(i)] = np.vstack(dphases[:,i])
            else: 
                  if any(modelcal): mcmcresult['calphases_dset0'] = dphases
      
      return mcmcresult
예제 #32
0
파일: d3sb_main.py 프로젝트: tripathi/D3SB
def emceeinit(w0, inclin, nbins, nthreads, nsteps, savename, data, dbins, MPI=0, allbinseq=0):
    """Emcee driver function"""

#HARDCODED - Warning. Also bins.
    global incl
    incl = inclin

    #Initialize the MPI-based pool used for parallelization.
    if MPI:
        print MPI
        pool = MPIPool()
        if not pool.is_master():
                # Wait for instructions from the master process.
                pool.wait()
                sys.exit(0)

    #Setup
    ndim = nbins #Removing inclination as a variable.
    nwalkers = 4*ndim
    p0 = np.zeros((nwalkers, ndim))
    print 'Nbins is now', nbins

    #Needed for fixing unresolved starting balls
    global b1
    global rin
    rin, b1 = dbins


    #Initialize walkers
    radii = np.arange(nbins)
    sizecorr = 1 #Currently Hardcoded; Scaling factor to treat different radii differently
    scale = 0.2 #Currently hardcoded; Fraction of parameter by which it can vary
    for walker in range(nwalkers):
        for rs in radii:
            rand = np.random.uniform(-(w0[rs]*scale*sizecorr), (w0[rs]*scale*sizecorr))
            if (b1[rs] <= res) and (allbinseq <1) :
                rand = np.random.uniform(0, 2.*w0[rs])
            p0[walker][rs] = w0[rs] + rand #Make it rs+2, if a & l vary
        # #Initialize a & l
#        p0[walker][0] = np.random.uniform(.0001, .5) #When adding back in, make prev statement rs+1
#        while True:
#            p0[walker][1] = np.random.gamma(2., 2.)*np.amax(dbins[1:])/20. + np.amin(np.diff(dbins[1:]))
#            if (p0[walker][1]>=np.amin(dbins[1:]) or p0[walker][1]<=np.amax(dbins[1:])):
#                break

        #THIS IS A PROBLEM FOR THE 1st BIN WITH rin. Also the normalization
#        p0[walker][0] = incl+np.random.uniform(0.85*incl,1.15*incl) #When adding back in, make prev statement rs+1


    #Write emcee perturbation params to log file
    f = open('emceerand.log', 'a')
    FORMAT = '%m-%d-%Y-%H%M'
    f.write(savename+', '+str(nbins)+', '+str(nsteps)+', '+str(scale)+', '+str(sizecorr)+', '+datetime.now().strftime(FORMAT))

    #Model initialization
    u, v, dreal, dimag, dwgt = data
    udeproj = u * np.cos(incl) #Deproject
    rho  =  1e3*np.sqrt(udeproj**2+v**2)
    indices = np.arange(b1.size)
    global gpbins
    gpbins = dbins
#rin, indices
    global rbin
    rbin = np.concatenate([np.array([rin]), b1])
    jarg = np.outer(2.*np.pi*rbin, rho/206264.806427)
    global jinc
    jinc = sc.j1(jarg)/jarg
#    pool = mp.Pool(nthreads-1)

    #Initialize sampler using MPI if necessary
    if MPI:
        sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=pool)
    else:
        sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, threads=nthreads)
    print 'Nbins, Ndim', nbins, ndim
    print 'Dbins', dbins

    #Run emcee, and time it
    tic = time.time()
    print "I'm line 110, before the threads"
    sampler.run_mcmc(p0, nsteps)
    print "I'm line 112, after the threads"
    toc = time.time()

    #Display and record run information
    print 'Elapsed emcee run time:', ((toc-tic)/60.)
    print 'Acceptance:', sampler.acceptance_fraction
    f.write(' ,'+str(round((toc-tic)/60., 2))+', '+str(np.round(np.mean(sampler.acceptance_fraction),2))+'\n')
    f.close()


    #Save the results in a binary file
    np.save('mc_'+savename,sampler.chain)

    if MPI:
        #Close the processes.
        pool.close()

    print 'Done with this emcee run'

    #Allow user interaction at end, if not using MPI
#    if not MPI:
#        pdb.set_trace()

    return sampler.chain
예제 #33
0
def main(runmpi=True, nw=100, th=6, bi=10, fr=10):

    if runmpi:
        pool = MPIPool()
        if not pool.is_master():
            pool.wait()
            sys.exit(0)
    else:
        pool = None

    #ldfileloc = '/Users/tom/Projects/koi2133/code/'
    ldfileloc = '/nobackup/tsbarcl2/Projects/koi2133/code/'
    #codedir = '/Users/tom/Projects/koi2133/code'
    codedir = '/nobackup/tsbarcl2/Projects/koi2133/code'

    koi = 2133
    cadence = 1625.3
    teff = 4550.
    teff_unc = 75.
    feh = 0.11
    feh_unc = 0.07
    logg = 2.943
    logg_unc = 0.007
    rho = 0.0073
    rho_unc = 0.0001
    nplanets = 1

    dil = 0.0

    period = 6.24672336
    impact = 0.7
    T0 = 136.383880
    rprs = 0.02067
    alb = 30.
    occ = 40.
    ell = 30.
    rvamp = 79.0
    ecosw = 0.048
    esinw = -0.045
    planet_guess = np.array(
        [T0, period, impact, rprs, ecosw, esinw, rvamp, occ, ell, alb])

    rvtime, rvval, rverr = get_rv()
    time, flux, ferr = get_lc()

    rho_prior = True
    ldp_prior = False

    nwalkers = nw
    threads = th
    burnin = bi
    fullrun = fr
    thin = 1

    n_ldparams = 2

    toffset_lc = 0
    toffset_rv = 0

    zpt_0 = 1.E-10

    M = tmod.transitemcee_koi2133(nplanets,
                                  cadence,
                                  ldfileloc=ldfileloc,
                                  codedir=codedir)

    M.get_stellar(teff, logg, feh, n_ldparams, ldp_prior=ldp_prior)

    M.already_open(time,
                   flux,
                   ferr,
                   rvtime,
                   rvval,
                   rverr,
                   timeoffset=toffset_lc,
                   rvtimeoffset=toffset_rv,
                   normalize=False)

    rho_vals = np.array([rho, rho_unc])

    M.get_rho(rho_vals, rho_prior)
    M.get_zpt(zpt_0)

    noise_model = [3.0E-4, 3.7E-2, 2.E-4, 3.]

    if dil is not None:
        M.get_sol(*planet_guess, dil=dil, noise_model=noise_model)
    else:
        M.get_sol(*planet_guess, noise_model=noise_model)

    outfile = 'koi{0}_np{1}_prior{2}_dil{3}GP.hdf5'.format(
        koi, nplanets, rho_prior, dil)

    p0 = M.get_guess(nwalkers)

    #dirty hack!!
    qwe = np.r_[np.arange(0, 7), np.arange(9, 21)]
    p0 = p0[:, qwe]

    l_var = np.shape(p0)[1]

    N = len([indval for indval in xrange(fullrun) if indval % thin == 0])
    with h5py.File(outfile, u"w") as f:
        f.create_dataset("time", data=M.time)
        f.create_dataset("flux", data=M.flux)
        f.create_dataset("err", data=M.err)
        f.create_dataset("rvtime", data=M.rvtime)
        f.create_dataset("rvval", data=M.rvval)
        f.create_dataset("rverr", data=M.rverr)
        f.create_dataset("itime", data=M._itime)
        f.create_dataset("ntt", data=M._ntt)
        f.create_dataset("tobs", data=M._tobs)
        f.create_dataset("omc", data=M._omc)
        f.create_dataset("datatype", data=M._datatype)
        f.attrs["rho_0"] = M.rho_0
        f.attrs["rho_0_unc"] = M.rho_0_unc
        f.attrs["nplanets"] = M.nplanets
        f.attrs["ld1"] = M.ld1
        f.attrs["ld2"] = M.ld2
        f.attrs["koi"] = koi
        f.attrs["dil"] = dil
        g = f.create_group("mcmc")
        g.attrs["nwalkers"] = nwalkers
        g.attrs["burnin"] = burnin
        g.attrs["iterations"] = fullrun
        g.attrs["thin"] = thin
        g.attrs["rho_prior"] = rho_prior
        g.attrs["ldp_prior"] = ldp_prior
        g.attrs["onlytransits"] = M.onlytransits
        g.attrs["tregion"] = M.tregion
        g.attrs["ldfileloc"] = M.ldfileloc
        g.attrs["n_ldparams"] = M.n_ldparams
        g.create_dataset("fixed_sol", data=M.fixed_sol)
        g.create_dataset("fit_sol_0", data=M.fit_sol_0)

        c_ds = g.create_dataset("chain", (nwalkers, N, l_var),
                                dtype=np.float64)
        lp_ds = g.create_dataset("lnprob", (nwalkers, N), dtype=np.float64)

        #I don't like the default LDP unc
        #I'm changing them
        M.ld1_unc = 0.8
        M.ld2_unc = 0.8

        args = [
            M.nplanets, M.rho_0, M.rho_0_unc, M.rho_prior, M.ld1, M.ld1_unc,
            M.ld2, M.ld2_unc, M.ldp_prior, M.flux, M.err, M.fixed_sol, M.time,
            M._itime, M._ntt, M._tobs, M._omc, M._datatype, M.rvtime, M.rvval,
            M.rverr, M._rvitime, M.n_ldparams, M.ldfileloc, M.onlytransits,
            M.tregion
        ]

        tom = tmod.logchi2_rv_phaseGP2

        if runmpi:
            sampler = emcee.EnsembleSampler(nwalkers,
                                            l_var,
                                            tom,
                                            args=args,
                                            pool=pool)
        else:
            sampler = emcee.EnsembleSampler(nwalkers,
                                            l_var,
                                            tom,
                                            args=args,
                                            threads=th)

        time1 = thetime.time()
        p2, prob, state = sampler.run_mcmc(p0, burnin, storechain=False)
        sampler.reset()
        with h5py.File(outfile, u"a") as f:
            g = f["mcmc"]
            g.create_dataset("burnin_pos", data=p2)
            g.create_dataset("burnin_prob", data=prob)

        time2 = thetime.time()
        print('burn-in took ' + str((time2 - time1) / 60.) + ' min')
        time1 = thetime.time()
        for i, (pos, lnprob, state) in enumerate(
                sampler.sample(p2,
                               iterations=fullrun,
                               rstate0=state,
                               storechain=False)):

            #do the thinning in the loop here
            if i % thin == 0:
                ind = i / thin
                with h5py.File(outfile, u"a") as f:
                    g = f["mcmc"]
                    c_ds = g["chain"]
                    lp_ds = g["lnprob"]
                    c_ds[:, ind, :] = pos
                    lp_ds[:, ind] = lnprob

        time2 = thetime.time()
        print('MCMC run took ' + str((time2 - time1) / 60.) + ' min')
        print('')
        print("Mean acceptance: " + str(np.mean(sampler.acceptance_fraction)))
        print('')

        if runmpi:
            pool.close()
        else:
            sampler.pool.close()

        return sampler
예제 #34
0
파일: ensemble.py 프로젝트: dessn/sn-bhm
class EnsembleSampler(GenericSampler):

    def __init__(self, num_walkers=None, num_steps=5000, num_burn=2000,
                 temp_dir=None, save_interval=300):
        """ Uses ``emcee`` and the `EnsembleSampler
        <http://dan.iel.fm/emcee/current/api/#emcee.EnsembleSampler>`_ to fit the supplied
        model.

        This method sets an emcee run using the ``EnsembleSampler`` and manual
        chain management to allow for low to medium dimensional models. MPI running
        is detected automatically for less hassle, and chain progress is serialised
        to disk automatically for convenience.

        Parameters
        ----------
        num_walkers : int, optional
            The number of walkers to run. If not supplied, it defaults to eight times the
            framework dimensionality
        num_steps : int, optional
            The number of steps to run
        num_burn : int, optional
            The number of steps to discard for burn in
        temp_dir : str
            If set, specifies a directory in which to save temporary results, like the emcee chain
        save_interval : float
            The amount of seconds between saving the chain to file. Setting to ``None``
            disables serialisation.
        """

        self.logger = logging.getLogger(__name__)
        import emcee
        self.chain = None
        self.pool = None
        self.master = True
        self.num_steps = num_steps
        self.num_burn = num_burn
        self.temp_dir = temp_dir
        if temp_dir is not None and not os.path.exists(temp_dir):
            os.makedirs(temp_dir)
        self.save_interval = save_interval
        self.num_walkers = num_walkers

    def fit(self, kwargs):
        """ Runs the sampler over the model and returns the flat chain of results

        Parameters
        ----------
        kwargs : dict
            Containing the following information at a minimum:

            - log_posterior : function
                A function which takes a list of parameters and returns
                the log posterior
            - start : function|list|ndarray
                Either a starting position, or a function that can be called
                to generate a starting position
            - save_dims : int, optional
                Only return values for the first ``save_dims`` parameters.
                Useful to remove numerous marginalisation parameters if running
                low on memory or hard drive space.
            - uid : str, optional
                A unique identifier used to differentiate different fits
                if two fits both serialise their chains and use the
                same temporary directory
        Returns
        -------
        dict
            A dictionary with key "chains" containing the final
            flattened chain of dimensions
             ``(num_dimensions, num_walkers * (num_steps - num_burn))``
        """
        log_posterior = kwargs.get("log_posterior")
        start = kwargs.get("start")
        save_dims = kwargs.get("save_dims")
        uid = kwargs.get("uid")
        assert log_posterior is not None
        assert start is not None
        from emcee.utils import MPIPool
        import emcee
        try:  # pragma: no cover
            self.pool = MPIPool()
            if not self.pool.is_master():
                self.logger.info("Slave waiting")
                self.master = False
                self.pool.wait()
                sys.exit(0)
            else:
                self.logger.info("MPIPool successful initialised and master found. "
                                 "Running with %d cores." % self.pool.size)
        except ImportError:
            self.logger.info("mpi4py is not installed or not configured properly. "
                             "Ignore if running through python, not mpirun")
        except ValueError as e:  # pragma: no cover
            self.logger.info("Unable to start MPI pool, expected normal python execution")
            self.logger.info(str(e))

        if callable(start):
            num_dim = start().size
        else:
            num_dim = start.size
        if self.num_walkers is None:
            self.num_walkers = num_dim * 4
            self.num_walkers = max(self.num_walkers, 20)

        self.logger.debug("Fitting framework with %d dimensions" % num_dim)

        self.logger.info("Using Ensemble Sampler")
        sampler = emcee.EnsembleSampler(self.num_walkers, num_dim,
                                        log_posterior,
                                        pool=self.pool, live_dangerously=True)

        emcee_wrapper = EmceeWrapper(sampler)
        flat_chain = emcee_wrapper.run_chain(self.num_steps, self.num_burn,
                                             self.num_walkers, num_dim,
                                             start=start,
                                             save_dim=save_dims,
                                             temp_dir=self.temp_dir,
                                             uid=uid,
                                             save_interval=self.save_interval)
        self.logger.debug("Fit finished")
        if self.pool is not None:  # pragma: no cover
            self.pool.close()
            self.logger.debug("Pool closed")

        return {"chain": flat_chain}
예제 #35
0
def main(argv):
##################
#These change a lot
  numWaveforms = 16
  numThreads = 12
  
  ndim = 6*numWaveforms + 8
  nwalkers = 2*ndim
  
  iter=50
  burnIn = 40
  wfPlotNumber = 10
  
######################


#  plt.ion()

  fitSamples = 200
  
  #Prepare detector
  zero_1 = -5.56351644e+07
  pole_1 = -1.38796386e+04
  pole_real = -2.02559385e+07
  pole_imag = 9885315.37450211
  
  zeros = [zero_1,0 ]
  poles = [ pole_real+pole_imag*1j, pole_real-pole_imag*1j, pole_1]
  system = signal.lti(zeros, poles, 1E7 )
  
  tempGuess = 77.89
  gradGuess = 0.0483
  pcRadGuess = 2.591182
  pcLenGuess = 1.613357

  #Create a detector model
  detName = "conf/P42574A_grad%0.2f_pcrad%0.2f_pclen%0.2f.conf" % (0.05,2.5, 1.65)
  det =  Detector(detName, temperature=tempGuess, timeStep=1., numSteps=fitSamples*10, tfSystem=system)
  det.LoadFields("P42574A_fields_v3.npz")
  det.SetFields(pcRadGuess, pcLenGuess, gradGuess)
  
  
  tempIdx = -8
  gradIdx = -7
  pcRadIdx = -6
  pcLenIdx = -5
  #and the remaining 4 are for the transfer function
  
  fig_size = (20,10)
  
  
  #Create a decent start guess by fitting waveform-by-waveform
  
  wfFileName = "P42574A_512waveforms_%drisetimeculled.npz" % numWaveforms
  if os.path.isfile(wfFileName):
    data = np.load(wfFileName)
    results = data['results']
    wfs = data['wfs']
    numWaveforms = wfs.size
  else:
    print "No saved waveforms available.  Loading from Data"
    exit(0)

  #prep holders for each wf-specific param
  r_arr = np.empty(numWaveforms)
  phi_arr = np.empty(numWaveforms)
  z_arr = np.empty(numWaveforms)
  scale_arr = np.empty(numWaveforms)
  t0_arr = np.empty(numWaveforms)
  smooth_arr = np.ones(numWaveforms)*7.
  simWfArr = np.empty((1,numWaveforms, fitSamples))

  #Prepare the initial value arrays
  for (idx, wf) in enumerate(wfs):
    wf.WindowWaveformTimepoint(fallPercentage=.99)
    r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[idx], smooth_arr[idx]  = results[idx]['x']
    t0_arr[idx] += 10 #because i had a different windowing offset back in the day


  #Plot the waveforms to take a look at the initial guesses
  if False:
    fig = plt.figure()
    for (idx,wf) in enumerate(wfs):
      
      print "WF number %d:" % idx
      print "  >>r: %f\n  >>phi %f\n  >>z %f\n  >>e %f\n  >>t0 %f\n >>smooth %f" % (r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[idx], smooth_arr[idx])
      ml_wf = det.GetSimWaveform(r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx]*100, t0_arr[idx], fitSamples, smoothing = smooth_arr[idx])
      plt.plot(ml_wf, color="b")
      plt.plot(wf.windowedWf, color="r")
    value = raw_input('  --> Press q to quit, any other key to continue\n')
    if value == 'q': exit(0)

  #Initialize this thread's globals
  initializeDetectorAndWaveforms(det, wfs)

  #Initialize the multithreading
  pool = MPIPool()
  if not pool.is_master():
    pool.wait()
    sys.exit(0)

  #Do the MCMC
  mcmc_startguess = np.hstack((r_arr[:], phi_arr[:], z_arr[:], scale_arr[:]*100., t0_arr[:],smooth_arr[:],        # waveform-specific params
                              tempGuess, gradGuess,pcRadGuess, pcLenGuess, zero_1, pole_1, pole_real, pole_imag)) # detector-specific

  #number of walkers _must_ be even
  if nwalkers % 2:
    nwalkers +=1

  #Initialize walkers with a random, narrow ball around the start guess
  pos0 = [mcmc_startguess + 1e-2*np.random.randn(ndim)*mcmc_startguess for i in range(nwalkers)]

  #Make sure everything in the initial guess is within bounds
  for pos in pos0:
    pos[:numWaveforms] = np.clip( pos[:numWaveforms], 0, np.floor(det.detector_radius*10.)/10.)
    pos[numWaveforms:2*numWaveforms] = np.clip(pos[numWaveforms:2*numWaveforms], 0, np.pi/4)
    pos[2*numWaveforms:3*numWaveforms] = np.clip(pos[2*numWaveforms:3*numWaveforms], 0, np.floor(det.detector_length*10.)/10.)
    pos[4*numWaveforms:5*numWaveforms] = np.clip(pos[4*numWaveforms:5*numWaveforms], 0, fitSamples)
    pos[5*numWaveforms:6*numWaveforms] = np.clip(pos[5*numWaveforms:6*numWaveforms], 0, 20.)

    pos[tempIdx] = np.clip(pos[tempIdx], 40, 120)
    pos[gradIdx] = np.clip(pos[gradIdx], det.gradList[0], det.gradList[-1])
    pos[pcRadIdx] = np.clip(pos[pcRadIdx], det.pcRadList[0], det.pcRadList[-1])
    pos[pcLenIdx] = np.clip(pos[pcLenIdx], det.pcLenList[0], det.pcLenList[-1])

    prior = lnprior(pos,)
    if not np.isfinite(prior) :
      print "BAD PRIOR WITH START GUESS YOURE KILLING ME SMALLS"
      print pos
      exit(0)

  #Initialize, run the MCMC
  sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob,  pool=p)

  #w/ progress bar, & time the thing
  start = timer()
  for (idx,result) in enumerate(sampler.sample(pos0, iterations=iter, storechain=True)):
    continue
  end = timer()

  pool.close()

  print "Elapsed time: " + str(end-start)

  print "Dumping chain to file..."
  np.save("sampler_mpi_%dwfs.npy" % numWaveforms, sampler.chain)


  print "Making MCMC steps figure..."

  #########  Plots for Waveform params
  stepsFig = plt.figure(2, figsize=fig_size)
  plt.clf()
  ax0 = stepsFig.add_subplot(611)
  ax1 = stepsFig.add_subplot(612, sharex=ax0)
  ax2 = stepsFig.add_subplot(613, sharex=ax0)
  ax3 = stepsFig.add_subplot(614, sharex=ax0)
  ax4 = stepsFig.add_subplot(615, sharex=ax0)
  ax5 = stepsFig.add_subplot(616, sharex=ax0)

  ax0.set_ylabel('r')
  ax1.set_ylabel('phi')
  ax2.set_ylabel('z')
  ax3.set_ylabel('scale')
  ax4.set_ylabel('t0')
  ax5.set_ylabel('smoothing')

  for i in range(nwalkers):
    for j in range(wfs.size):
      ax0.plot(sampler.chain[i,:,0+j], alpha=0.3)                 # r
      ax1.plot(sampler.chain[i,:,numWaveforms + j], alpha=0.3)    # phi
      ax2.plot(sampler.chain[i,:,2*numWaveforms + j], alpha=0.3)  #z
      ax3.plot(sampler.chain[i,:,3*numWaveforms + j],  alpha=0.3) #energy
      ax4.plot(sampler.chain[i,:,4*numWaveforms + j],  alpha=0.3) #t0
      ax5.plot(sampler.chain[i,:,5*numWaveforms + j],  alpha=0.3) #smoothing

  plt.savefig("emcee_mpi_wfchain_%dwfs.png" % numWaveforms)


  #########  Plots for Detector params
  stepsFigDet = plt.figure(3, figsize=fig_size)
  plt.clf()
  ax0 = stepsFigDet.add_subplot(411)
  ax1 = stepsFigDet.add_subplot(412, sharex=ax0)
  ax2 = stepsFigDet.add_subplot(413, sharex=ax0)
  ax3 = stepsFigDet.add_subplot(414, sharex=ax0)
  
  ax0.set_ylabel('temp')
  ax1.set_ylabel('grad')
  ax2.set_ylabel('pcRad')
  ax3.set_ylabel('pcLen')

  for i in range(nwalkers):
    ax0.plot(sampler.chain[i,:,tempIdx], "b", alpha=0.3) #temp
    ax1.plot(sampler.chain[i,:,gradIdx], "b", alpha=0.3) #grad
    ax2.plot(sampler.chain[i,:,pcRadIdx], "b", alpha=0.3) #pcrad
    ax3.plot(sampler.chain[i,:,pcLenIdx], "b", alpha=0.3) #pclen
    
  plt.savefig("emcee_mpi_detchain_%dwfs.png" % numWaveforms)


  #and for the transfer function
  stepsFigTF = plt.figure(4, figsize=fig_size)
  plt.clf()
  tf0 = stepsFigTF.add_subplot(411)
  tf1 = stepsFigTF.add_subplot(412, sharex=ax0)
  tf2 = stepsFigTF.add_subplot(413, sharex=ax0)
  tf3 = stepsFigTF.add_subplot(414, sharex=ax0)
  tf0.set_ylabel('zero_1')
  tf1.set_ylabel('pole_1')
  tf2.set_ylabel('pole_real')
  tf3.set_ylabel('pole_imag')

  for i in range(nwalkers):
    tf0.plot(sampler.chain[i,:,-4], "b", alpha=0.3) #2
    tf1.plot(sampler.chain[i,:,-3], "b", alpha=0.3) #den1
    tf2.plot(sampler.chain[i,:,-2], "b", alpha=0.3) #2
    tf3.plot(sampler.chain[i,:,-1], "b", alpha=0.3) #3

  plt.savefig("emcee_mpi_tfchain_%dwfs.png" % numWaveforms)


  samples = sampler.chain[:, burnIn:, :].reshape((-1, ndim))

  print "temp is %f" % np.median(samples[:,tempIdx])
  print "grad is %f" % np.median(samples[:,gradIdx])
  print "pcrad is %f" % np.median(samples[:,pcRadIdx])
  print "pclen is %f" % np.median(samples[:,pcLenIdx])
  print "zero_1 is %f" % np.median(samples[:,-4])
  print "pole_1 is %f" % np.median(samples[:,-3])
  print "pole_real is %f" % np.median(samples[:,-2])
  print "pole_imag is %f" % np.median(samples[:,-1])

  #TODO: Aaaaaaand plot some waveforms..
  simWfs = np.empty((wfPlotNumber,numWaveforms, fitSamples))

  for idx, (theta) in enumerate(samples[np.random.randint(len(samples), size=wfPlotNumber)]):
    temp, impGrad, pcRad, pcLen = theta[tempIdx], theta[gradIdx], theta[pcRadIdx], theta[pcLenIdx]
    zero_1, pole_1, pole_real, pole_imag = theta[-4:]
    r_arr, phi_arr, z_arr, scale_arr, t0_arr, smooth_arr = theta[:-8].reshape((6, numWaveforms))
    det.SetTemperature(temp)
    det.SetFields(pcRad, pcLen, impGrad)
    
    zeros = [zero_1,0 ]
    poles = [ pole_real+pole_imag*1j, pole_real-pole_imag*1j, pole_1]
    det.SetTransferFunction(zeros, poles, 1E7)

    for wf_idx in range(wfs.size):
      wf_i = det.GetSimWaveform(r_arr[wf_idx], phi_arr[wf_idx], z_arr[wf_idx], scale_arr[wf_idx], t0_arr[wf_idx], fitSamples)
      simWfs[idx, wf_idx, :] = wf_i
      if wf_i is None:
        print "Waveform %d, %d is None" % (idx, wf_idx)

  residFig = plt.figure(4, figsize=(20, 15))
  helpers.plotManyResidual(simWfs, wfs, figure=residFig)
  plt.savefig("emcee_mpi_waveforms_%dwfs.png" % numWaveforms)
예제 #36
0
def main(runmpi=False, nw=100, th=6, bi=10, fr=10):

    if runmpi:
        pool = MPIPool()
        if not pool.is_master():
            pool.wait()
            sys.exit(0)
    else:
        pool=None


    time, flux, ferr  = get_lc()

    toi = 175
    cadence = 120

    rho = 18
    rho_unc = 1
    nplanets = 3

    ld1 = 0.1642
    ld2 = 0.4259

    dil=0.0

    periods = [2.25321888449, 3.6906274382, 7.45131144274]
    impacts = [0.26, 0.21, 0.89]
    T0s = [1354.90455205, 1356.203624274, 1355.2866249]
    rprss = [0.02011, 0.038564, 0.0438550698]

    planet_guess = np.array([])
    for i in range(nplanets):
        planet_guess = np.r_[planet_guess,
                             T0s[i], periods[i], impacts[i], rprss[i],
                             0.0, 0.0
                             ]

    nwalkers = nw
    threads = th
    burnin = bi
    fullrun = fr
    thin = 1

    M = tmod.transitmc2(
        nplanets, cadence)

    M.get_ld(ld1, ld2)

    M.already_open(time,
        flux, ferr)

    M.get_rho([rho, rho_unc])
    M.get_zpt(0.0)

    M.get_sol(*planet_guess)

    outfile = 'koi{0}_np{1}.hdf5'.format(
            toi, nplanets)

    p0 = M.get_guess(nwalkers)
    l_var = np.shape(p0)[1]

    tom = tmod.logchi2
    args = [M.nplanets, M.rho_0, M.rho_0_unc,
            M.ld1, M.ld1_unc, M.ld2, M.ld2_unc,
            M.flux, M.err, 
            M.fixed_sol,
            M.time, M._itime, M._ntt,
            M._tobs, M._omc, M._datatype]

    N = len([indval for indval in range(fullrun)
            if indval%thin == 0])
    with h5py.File(outfile, u"w") as f:
        f.create_dataset("time", data=M.time)
        f.create_dataset("flux", data=M.flux)
        f.create_dataset("err", data=M.err)
        f.attrs["rho_0"] = M.rho_0
        f.attrs["rho_0_unc"] = M.rho_0_unc
        f.attrs["nplanets"] = M.nplanets
        f.attrs["ld1"] = M.ld1
        f.attrs["ld2"] = M.ld2
        g = f.create_group("mcmc")
        g.attrs["nwalkers"] = nwalkers
        g.attrs["burnin"] = burnin
        g.attrs["iterations"] = fullrun
        g.attrs["thin"] = thin
        g.create_dataset("fixed_sol", data= M.fixed_sol)
        g.create_dataset("fit_sol_0", data= M.fit_sol_0)


        c_ds = g.create_dataset("chain",
            (nwalkers, N, l_var),
            dtype=np.float64)
        lp_ds = g.create_dataset("lnprob",
            (nwalkers, N),
            dtype=np.float64)

    if runmpi:
        sampler = emcee.EnsembleSampler(nwalkers, l_var, tom,
            args=args,pool=pool)
    else:
        sampler = emcee.EnsembleSampler(nwalkers, l_var, tom,
            args=args,threads=th)

    time1 = thetime.time()
    p2, prob, state = sampler.run_mcmc(p0, burnin,
                storechain=False)
    sampler.reset()

    with h5py.File(outfile, u"a") as f:
        g = f["mcmc"]
        g.create_dataset("burnin_pos", data=p2)
        g.create_dataset("burnin_prob", data=prob)
    time2 = thetime.time()

    print('burn-in took ' + str((time2 - time1)/60.) + ' min')
    time1 = thetime.time()
    for i, (pos, lnprob, state) in enumerate(tqdm(sampler.sample(p2,
        iterations=fullrun, rstate0=state,
        storechain=False), total=fullrun)):

        #do the thinning in the loop here
        if i % thin == 0:
            ind = i / thin
            with h5py.File(outfile, u"a") as f:
                g = f["mcmc"]
                c_ds = g["chain"]
                lp_ds = g["lnprob"]
                c_ds[:, ind, :] = pos
                lp_ds[:, ind] = lnprob

    time2 = thetime.time()
    print('MCMC run took ' + str((time2 - time1)/60.) + ' min')
    print('')
    print("Mean acceptance: "
        + str(np.mean(sampler.acceptance_fraction)))
    print('')

    if runmpi:
        pool.close()
    else:
        sampler.pool.close()

    return sampler
예제 #37
0
	#Set verbosity level
	if cmd_args.verbose_plus:
		logging.basicConfig(level=DEBUG_PLUS)
	elif cmd_args.verbose:
		logging.basicConfig(level=logging.DEBUG)
	else:
		logging.basicConfig(level=logging.INFO)

	#Initialize MPI Pool
	try:
		pool = MPIPool()
	except:
		pool = None

	if (pool is not None) and (not pool.is_master()):
		pool.wait()
		sys.exit(0)

	if pool is not None:
		logging.info("Started MPI Pool.")

	test_components = json.loads(options.get("pca","num_components"))
	for n_components_collection in test_components:
		main(n_components_collection,cmd_args,pool)


	#Close MPI Pool
	if pool is not None:
		pool.close()
		logging.info("Closed MPI Pool.")
예제 #38
0
def mcmc_mpi(
    Nwalkers,
    Nchains,
    observables=["nbar", "xi"],
    data_dict={"Mr": 21, "b_normal": 0.25},
    prior_name="first_try",
    mcmcrun=None,
):
    """
    Standard MCMC implementaion
    
    Parameters
    -----------
    - Nwalker : 
        Number of walkers
    - Nchains : 
        Number of MCMC chains   
    - observables : 
        list of observables. Options are: ['nbar','xi'],['nbar','gmf'],['xi']
    - data_dict : dictionary that specifies the observation keywords
    """
    # Initializing the vector of observables and inverse covariance matrix
    if observables == ["xi"]:
        fake_obs = Data.data_xi(**data_dict)
        # fake_obs_icov = Data.data_inv_cov('xi', **data_dict)
        fake_obs_icov = Data.data_cov(inference="mcmc", **data_dict)[1:16, 1:16]
    if observables == ["nbar", "xi"]:
        fake_obs = np.hstack([Data.data_nbar(**data_dict), Data.data_xi(**data_dict)])
        fake_obs_icov = Data.data_cov(inference="mcmc", **data_dict)[:16, :16]
    if observables == ["nbar", "gmf"]:
        ##### FIRST BIN OF GMF DROPPED ###############
        # CAUTION: hardcoded
        fake_obs = np.hstack([Data.data_nbar(**data_dict), Data.data_gmf(**data_dict)[1:]])
        fake_obs_icov = np.zeros((10, 10))
        # print Data.data_cov(**data_dict)[17: , 17:].shape

        # Covariance matrix being adjusted accordingly
        fake_obs_icov[1:, 1:] = Data.data_cov(inference="mcmc", **data_dict)[17:, 17:]
        fake_obs_icov[0, 1:] = Data.data_cov(inference="mcmc", **data_dict)[0, 17:]
        fake_obs_icov[1:, 0] = Data.data_cov(inference="mcmc", **data_dict)[17:, 0]
        fake_obs_icov[0, 0] = Data.data_cov(inference="mcmc", **data_dict)[0, 0]

    # True HOD parameters
    data_hod_dict = Data.data_hod_param(Mr=data_dict["Mr"])
    data_hod = np.array(
        [
            data_hod_dict["logM0"],  # log M0
            np.log(data_hod_dict["sigma_logM"]),  # log(sigma)
            data_hod_dict["logMmin"],  # log Mmin
            data_hod_dict["alpha"],  # alpha
            data_hod_dict["logM1"],  # log M1
        ]
    )
    Ndim = len(data_hod)

    # Priors
    prior_min, prior_max = PriorRange(prior_name)
    prior_range = np.zeros((len(prior_min), 2))
    prior_range[:, 0] = prior_min
    prior_range[:, 1] = prior_max

    # mcmc chain output file
    chain_file = "".join([util.mcmc_dir(), util.observable_id_flag(observables), ".", mcmcrun, ".mcmc_chain.dat"])
    # print chain_file

    if os.path.isfile(chain_file) and continue_chain:
        print "Continuing previous MCMC chain!"
        sample = np.loadtxt(chain_file)
        Nchain = Niter - (len(sample) / Nwalkers)  # Number of chains left to finish
        if Nchain > 0:
            pass
        else:
            raise ValueError
        print Nchain, " iterations left to finish"

        # Initializing Walkers from the end of the chain
        pos0 = sample[-Nwalkers:]
    else:
        # new chain
        f = open(chain_file, "w")
        f.close()
        Nchain = Niter

        # Initializing Walkers
        random_guess = data_hod
        pos0 = np.repeat(random_guess, Nwalkers).reshape(Ndim, Nwalkers).T + 5.0e-2 * np.random.randn(
            Ndim * Nwalkers
        ).reshape(Nwalkers, Ndim)
        # print pos0.shape
    # Initializing MPIPool
    pool = MPIPool()
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    # Initializing the emcee sampler
    hod_kwargs = {
        "prior_range": prior_range,
        "data": fake_obs,
        "data_icov": fake_obs_icov,
        "observables": observables,
        "Mr": data_dict["Mr"],
    }
    sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnPost, pool=pool, kwargs=hod_kwargs)

    # Initializing Walkers
    for result in sampler.sample(pos0, iterations=Nchain, storechain=False):
        position = result[0]
        # print position
        f = open(chain_file, "a")
        for k in range(position.shape[0]):
            output_str = "\t".join(position[k].astype("str")) + "\n"
            f.write(output_str)
        f.close()

    pool.close()
예제 #39
0
p0 = [
    paramGuesses + p0agitators * np.random.randn(nDim) for i in range(nWalkers)
]
#p0 = [e0 +  100*np.random.randn(nDim) for i in range(nWalkers)]
#p0 = np.random.uniform(600.0, 1300.0, size=nWalkers)

if useMPI:
    # initialize the MPI pool
    if debugging:
        processPool = MPIPool(debug=True, loadbalance=True)
#        processPool = pathos.multiprocessing.ProcessingPool(nodes=nMPInodes)
    else:
        processPool = MPIPool(loadbalance=True)
#        processPool = pathos.multiprocessing.ProcessingPool(nodes=nMPInodes)
# if not the master, wait for instruction
    if not processPool.is_master():
        processPool.wait()
        sys.exit(0)
#
    sampler = emcee.EnsembleSampler(nWalkers,
                                    nDim,
                                    lnprob,
                                    kwargs={
                                        'observables': observedTOF,
                                        'standoffDists': standoffs,
                                        'tofRanges': tof_range,
                                        'nTOFbins': tofRunBins
                                    },
                                    pool=processPool)
#else:
#    processPool = pathos.multiprocessing.ProcessingPool(ncpus=nThreads)
예제 #40
0
def run(n, p00=None, nwalkers=500, prior=True, pikfilename=None):
    # accScaleLength, muNorm, muMassScaling, muFgScaling, muColScaling, accCeiling, eta, fixedQ, Qlim, conRF, kappaNormalization, kappaMassScaling = emceeparams

    fn = chaindirrel + '.pickle'
    ndim = 24

    if p00 is not None:
        p0 = [
            p00 * (1.0 + 0.01 * np.random.randn(ndim)) for i in range(nwalkers)
        ]
    else:
        if prior:
            p0 = [globalPrior.sample() for i in range(nwalkers)]
        else:
            assert pikfilename is not None

            arr = pickle.load(open(pikfilename, 'r'))
            assert nwalkers > len(arr[:, 0])
            print "Simulating cases for ", pikfilename, "with ", nwalkers, "walkers and ", len(
                arr[:, 0]), "samples"
            p0 = []
            for i in range(nwalkers):
                j = i % len(arr[:, 0])
                ### not sure if this the best strategy, but...
                thisMass = np.power(
                    10.0,
                    np.log10(haloMassMin) +
                    np.random.random() * np.log10(haloMassMax / haloMassMin))
                p0.append([thisMass] + list(arr[j, :-1]))

    restart = {}
    restart['currentPosition'] = p0
    restart['chain'] = None
    restart['state'] = None
    restart['prob'] = None
    restart['iterationCounter'] = 0
    restart['mcmcRunCounter'] = 0

    # read in our past progress unless we've been given a new starting location.
    if p00 is None:
        updaterestart(fn, restart)

    if restart['chain'] is not None:
        # this may save some time if you change something and forget to delete the .pickle file.
        restartedshape = np.shape(restart['chain'])
        print restartedshape, nwalkers, ndim
        assert restartedshape[0] == nwalkers
        assert restartedshape[2] == ndim

    global runnumber
    runnumber = restart['mcmcRunCounter']

    restart['iterationCounter'] += n
    restart['mcmcRunCounter'] += 1

    if False:
        pool = MPIPool(comm=comm, loadbalance=True)
        if not pool.is_master():
            pool.wait()
            sys.exit(0)

    #sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=pool)
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)
    #pos, prob, state = sampler.run_mcmc(restart['currentPosition'], n, rstate0=restart['state'], lnprob0=restart['prob'])

    for result in sampler.sample(restart['currentPosition'],
                                 iterations=n,
                                 lnprob0=restart['prob'],
                                 rstate0=restart['state']):

        pos, prob, state = result

        restart[
            'acor'] = sampler.acor[:]  # autocorr length for each param (ndim)
        restart[
            'accept'] = sampler.acceptance_fraction[:]  # acceptance frac for each walker.
        restart['currentPosition'] = pos  # same shape as p0: nwalkers x ndim
        restart['state'] = state  # random number generator state
        restart['prob'] = prob  # nwalkers x __
        if restart['chain'] is None:
            restart['chain'] = np.expand_dims(
                sampler.chain[:, 0, :], 1)  # nwalkers x niterations x ndim
            restart['allProbs'] = np.expand_dims(prob,
                                                 1)  # nwalkers x niterations
        else:
            print np.shape(restart['chain']), np.shape(
                sampler.chain[:, -1, :]), np.shape(sampler.chain)
            print restart['mcmcRunCounter'], restart['iterationCounter']
            #restart['chain'] = np.concatenate((restart['chain'], sampler.chain[:,-1,:]), axis=1)
            print "dbg1: ", np.shape(restart['chain']), np.shape(
                np.zeros(
                    (nwalkers, 1, ndim))), np.shape(np.expand_dims(pos, 1))
            restart['chain'] = np.concatenate(
                (restart['chain'], np.expand_dims(pos, 1)), axis=1)
            restart['allProbs'] = np.concatenate(
                (restart['allProbs'], np.expand_dims(prob, 1)), axis=1)

        saverestart(fn, restart)

    pool.close()
예제 #41
0
파일: darts.py 프로젝트: cpankow/dart_board
    def throw_darts(self, nburn=1000, nsteps=1000, method='emcee'):
        """
        Run the sampler.

        Args:
            nburn : int (default: 1000), number of burn-in steps.
            nsteps : int (default: 1000), number of steps to be saved.
        """

        # To allow for PT sampling
        if self.ntemps is not None: method = 'emcee_PT'

        if method == 'emcee':

            # Define sampler
            if self.mpi == True:
                pool = MPIPool()
                if not pool.is_master():
                    pool.wait()
                    sys.exit(0)
                sampler = emcee.EnsembleSampler(
                    nwalkers=self.nwalkers,
                    dim=self.dim,
                    lnpostfn=self.posterior_function,
                    args=[self],
                    pool=pool)

            elif self.threads != 1:
                sampler = emcee.EnsembleSampler(
                    nwalkers=self.nwalkers,
                    dim=self.dim,
                    lnpostfn=self.posterior_function,
                    args=[self],
                    threads=self.threads)
            else:
                sampler = emcee.EnsembleSampler(
                    nwalkers=self.nwalkers,
                    dim=self.dim,
                    lnpostfn=self.posterior_function,
                    args=[self])

            # Burn-in
            print("Starting burn-in...")
            pos, prob, state, binary_data = sampler.run_mcmc(self.p0, N=nburn)
            print("...finished running burn-in")

            # Full run
            print("Starting full run...")
            sampler.reset()
            pos, prob, state, binary_data = sampler.run_mcmc(pos, N=nsteps)
            print("...full run finished")

            # Save only every 100th sample
            self.chains = sampler.chain[:, ::self.thin, :]
            self.derived = np.swapaxes(np.array(sampler.blobs), 0,
                                       1)[:, ::self.thin, 0, :]
            self.lnprobability = sampler.lnprobability[:, ::self.thin]

            self.sampler = sampler

        elif method == 'emcee_PT':

            # THIS DOES NOT YET WORK #

            # Define sampler
            if self.mpi == True:
                pool = MPIPool()
                if not pool.is_master():
                    pool.wait()
                    sys.exit(0)
                sampler = emcee.PTSampler(ntemps=self.ntemps,
                                          nwalkers=self.nwalkers,
                                          dim=self.dim,
                                          logl=posterior.ln_likelihood,
                                          logp=priors.ln_prior,
                                          loglargs=(self, ),
                                          logpargs=(self, ),
                                          pool=pool)

            elif self.threads != 1:
                sampler = emcee.PTSampler(ntemps=self.ntemps,
                                          nwalkers=self.nwalkers,
                                          dim=self.dim,
                                          logl=posterior.ln_likelihood,
                                          logp=priors.ln_prior,
                                          loglargs=(self, ),
                                          logpargs=(self, ),
                                          threads=self.threads)
            else:
                sampler = emcee.PTSampler(ntemps=self.ntemps,
                                          nwalkers=self.nwalkers,
                                          dim=self.dim,
                                          logl=posterior.ln_likelihood,
                                          logp=priors.ln_prior,
                                          loglargs=(self, ),
                                          logpargs=(self, ))

            # Burn-in
            print("Starting burn-in...")
            for pos, prob, state in sampler.sample(self.p0, iterations=nburn):
                pass
            print("...finished running burn-in")

            # Full run
            print("Starting full run...")
            sampler.reset()
            for pos, prob, state in sampler.sample(pos,
                                                   iterations=nsteps,
                                                   thin=self.thin):
                pass
            print("...full run finished")

            self.chains = sampler.chain
            # self.derived = np.swapaxes(np.array(sampler.blobs), 0, 1)
            self.lnprobability = sampler.lnprobability
            self.sampler = sampler

        elif method == 'nestle':

            print("Nested sampling is not yet implemented.")

        else:
            print("Your chosen method is not supported by dart_board.")
예제 #42
0
def main():

	#################################################
	############Option parsing#######################
	#################################################

	#Parse command line options
	parser = argparse.ArgumentParser()
	parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file")
	parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity")
	parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity")
	parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area")
	parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!")
	parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix")
	parser.add_argument("-s","--save_features",dest="save_features",action="store_true",default=False,help="save features profiles")
	parser.add_argument("-ss","--save",dest="save",action="store_true",default=False,help="save the best fits and corresponding chi2")
	parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle")
	parser.add_argument("-l","--likelihood",dest="likelihood",action="store_true",default=False,help="save the likelihood cubes for the mocks")
	parser.add_argument("-o","--observation",dest="observation",action="store_true",default=False,help="append the actual observation results to the mock results for direct comparison")
	parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF")

	cmd_args = parser.parse_args()

	if cmd_args.options_file is None:
		parser.print_help()
		sys.exit(0)

	#Set verbosity level
	if cmd_args.verbose_plus:
		logging.basicConfig(level=DEBUG_PLUS)
	elif cmd_args.verbose:
		logging.basicConfig(level=logging.DEBUG)
	else:
		logging.basicConfig(level=logging.INFO)

	#Initialize MPI Pool
	try:
		pool = MPIPool()
	except:
		pool = None

	if (pool is not None) and (not pool.is_master()):
		pool.wait()
		sys.exit(0)

	if pool is not None:
		logging.info("Started MPI Pool.")

	#################################################################################################################
	#################Info gathering: covariance matrix, observation and emulator#####################################
	#################################################################################################################

	#start
	start = time.time()
	last_timestamp = start

	#Instantiate a FeatureLoader object that will take care of the memory loading
	feature_loader = FeatureLoader(cmd_args)

	###########################################################################################################################################

	#Use this model for the covariance matrix (from the new set of 50 N body simulations)
	covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path"))
	logging.info("Measuring covariance matrix from model {0}".format(covariance_model))
	
	#Load in the covariance matrix
	fiducial_feature_ensemble = feature_loader.load_features(covariance_model)
	fiducial_features = fiducial_feature_ensemble.mean()
	features_covariance = fiducial_feature_ensemble.covariance()

	#timestamp
	now = time.time()
	logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	################################################################################################################################################

	#Treat the 50N-body simulation set as data
	observation = CFHTcov.getModels(root_path=feature_loader.options.get("observations","root_path"))
	logging.info("Measuring the observations from {0}".format(observation))
	
	#And load the observations
	observed_feature = feature_loader.load_features(observation)

	#timestamp
	now = time.time()
	logging.info("observation loaded in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	################################################################################################################################################

	#Create a LikelihoodAnalysis instance by unpickling one of the emulators
	emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators")
	emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string)))
	logging.info("Unpickling emulator from {0}...".format(emulator_file))
	analysis = LikelihoodAnalysis.load(emulator_file)

	#timestamp
	now = time.time()
	logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	####################################################################################################################
	######################################Compute the chi2 cube#########################################################
	####################################################################################################################

	logging.info("Initializing chi2 meshgrid...")

	#Set the points in parameter space on which to compute the chi2 (read from options)
	Om = np.ogrid[feature_loader.options.getfloat("Omega_m","min"):feature_loader.options.getfloat("Omega_m","max"):feature_loader.options.getint("Omega_m","num_points")*1j]
	w = np.ogrid[feature_loader.options.getfloat("w","min"):feature_loader.options.getfloat("w","max"):feature_loader.options.getint("w","num_points")*1j]
	si8 = np.ogrid[feature_loader.options.getfloat("sigma8","min"):feature_loader.options.getfloat("sigma8","max"):feature_loader.options.getint("sigma8","num_points")*1j]

	num_points = len(Om) * len(w) * len(si8) 

	points = np.array(np.meshgrid(Om,w,si8,indexing="ij")).reshape(3,num_points).transpose()

	#Now compute the chi2 at each of these points
	if pool:
		split_chunks = pool.size
		logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size))
	else:
		split_chunks = None
		logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0]))

	#Allocate array for best fit
	first_realization = feature_loader.options.getint("mocks","first_realization")
	last_realization = feature_loader.options.getint("mocks","last_realization")

	if cmd_args.observation:
		best_fit_all = np.zeros((last_realization-first_realization+1 + 1,analysis.parameter_set.shape[1]))
		chi2_all = np.zeros(last_realization-first_realization+1 + 1)
		chi2_from_expected_all = np.zeros(last_realization-first_realization+1 + 1)
	else:
		best_fit_all = np.zeros((last_realization-first_realization+1,analysis.parameter_set.shape[1]))
		chi2_all = np.zeros(last_realization-first_realization+1)
		chi2_from_expected_all = np.zeros(last_realization-first_realization+1)

	#Cycle through the realizations and obtain a best fit for each one of them
	
	for nreal in range(first_realization-1,last_realization):
	
		chi_squared = analysis.chi2(points,observed_feature=observed_feature[nreal],features_covariance=features_covariance,pool=pool,split_chunks=split_chunks)

		now = time.time()
		logging.info("realization {0}, chi2 calculations completed in {1:.1f}s".format(nreal+1,now-last_timestamp))
		last_timestamp = now

		#After chi2, compute the likelihood
		likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape))

		#Maybe save the likelihood cube?
		if cmd_args.likelihood:
			likelihood_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","likelihood{0}_{1}.npy".format(nreal+1,output_string(feature_loader.feature_string)))
			logging.info("Saving likelihood cube to {0}...".format(likelihood_filename))
			np.save(likelihood_filename,likelihood_cube)

		#Maybe save the feature profiles?
		if cmd_args.save_features:
			features_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","features{0}_{1}.npy".format(nreal+1,output_string(feature_loader.feature_string)))
			logging.info("Saving features for realization {0} to {1}...".format(nreal+1,features_filename))
			np.save(features_filename,observed_feature[nreal])

		#Find the maximum of the likelihood using ContourPlot functionality
		contour = ContourPlot()
		contour.getLikelihood(likelihood_cube)
		contour.getUnitsFromOptions(feature_loader.options)
		parameters_maximum = contour.getMaximum()
		parameter_keys = parameters_maximum.keys()
		parameter_keys.sort(key=contour.parameter_axes.get)

		#Display the new best fit before exiting
		best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ])
		best_fit_chi2 = analysis.chi2(best_fit_parameters,features_covariance=features_covariance,observed_feature=observed_feature[nreal])[0]
		chi2_from_expected = analysis.chi2(np.array([0.26,-1.0,0.800]),features_covariance=features_covariance,observed_feature=observed_feature[nreal])[0]

		logging.info("Best fit for realization {4} is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2_best={1:.3f}({2} dof), chi2_expected={3:.3f}({2} dof)".format(best_fit_parameters,best_fit_chi2,analysis.training_set.shape[1],chi2_from_expected,nreal+1))

		#Update global array with best fit parameters and corresponding chi2
		best_fit_all[nreal-first_realization+1,:] = best_fit_parameters.copy()
		chi2_all[nreal-first_realization+1] = best_fit_chi2 
		chi2_from_expected_all[nreal-first_realization+1] = chi2_from_expected

	#######################################################################################################################################################################

	#If option was selected, append the observation results to the mock ones, for comparison
	if cmd_args.observation:

		observed_feature = feature_loader.load_features(CFHTLens(root_path=feature_loader.options.get("observations","root_path")))[0]

		chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks)

		now = time.time()
		logging.info("actual observation, chi2 calculations completed in {0:.1f}s".format(now-last_timestamp))
		last_timestamp = now

		#After chi2, compute the likelihood
		likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape))

		#Maybe save the likelihood cube?
		if cmd_args.likelihood:
			likelihood_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","likelihood_obs_{0}.npy".format(output_string(feature_loader.feature_string)))
			logging.info("Saving likelihood cube to {0}...".format(likelihood_filename))
			np.save(likelihood_filename,likelihood_cube)

		#Maybe save the feature profiles?
		if cmd_args.save_features:
			features_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","features_obs_{0}.npy".format(output_string(feature_loader.feature_string)))
			logging.info("Saving observed features to {0}...".format(features_filename))
			np.save(features_filename,observed_feature)

		#Find the maximum of the likelihood using ContourPlot functionality
		contour = ContourPlot()
		contour.getLikelihood(likelihood_cube)
		contour.getUnitsFromOptions(feature_loader.options)
		parameters_maximum = contour.getMaximum()
		parameter_keys = parameters_maximum.keys()
		parameter_keys.sort(key=contour.parameter_axes.get)

		#Display the new best fit before exiting
		best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ])
		best_fit_chi2 = analysis.chi2(best_fit_parameters,features_covariance=features_covariance,observed_feature=observed_feature)[0]
		chi2_from_expected = analysis.chi2(np.array([0.26,-1.0,0.800]),features_covariance=features_covariance,observed_feature=observed_feature)[0]
		
		logging.info("Best fit for observation is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2_best={1:.3f}({2} dof), chi2_expected={3:.3f}({2} dof)".format(best_fit_parameters,best_fit_chi2,analysis.training_set.shape[1],chi2_from_expected))

		#Update global array with best fit parameters and corresponding chi2
		best_fit_all[-1,:] = best_fit_parameters.copy()
		chi2_all[-1] = best_fit_chi2
		chi2_from_expected_all[-1] = chi2_from_expected

	#######################################################################################################################################################################
	
	#Close MPI Pool
	if pool is not None:
		pool.close()
		logging.info("Closed MPI Pool.")

	if cmd_args.save:

		#Save the best fit parameters for all realizations
		best_fit_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","best_fit_all_{0}.npy".format(output_string(feature_loader.feature_string)))
		logging.info("Saving best fit to {0}...".format(best_fit_filename))
		np.save(best_fit_filename,best_fit_all)

		#Save the best fit chi2 for all realizations
		chi2_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","chi2_all_{0}.npy".format(output_string(feature_loader.feature_string)))
		logging.info("Saving best fit chi2 to {0}...".format(chi2_filename))
		np.save(chi2_filename,chi2_all)

		#Save also the chi2 for the expected best fit
		chi2_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","chi2_all_expected_{0}.npy".format(output_string(feature_loader.feature_string)))
		logging.info("Saving expected chi2 to {0}...".format(chi2_filename))
		np.save(chi2_filename,chi2_from_expected_all)

	end = time.time()

	logging.info("DONE!!")
	logging.info("Completed in {0:.1f}s".format(end-start))
예제 #43
0
def run_emcee_seeded(light_curve, transit_params, spot_parameters, n_steps,
                     n_walkers, output_path, burnin=0.7,
                     n_extra_spots=1, skip_priors=False):
    """
    Fit for transit depth and spot parameters given initial guess informed by
    results from `peak_finder`

    Parameters
    ----------
    light_curve : `friedrich.lightcurve.TransitLightCurve`
        Light curve to fit
    transit_params : `~batman.TransitParams`
        Transit light curve parameters
    spot_parameters : list
        List of all spot parameters in [amp, t0, sig, amp, t0, sig, ...] order
    n_steps : int
        Number of MCMC steps to take
    n_walkers : int
        Number of MCMC walkers to initialize (must be even, more than twice the
        number of free params in fit)
    output_path : str
        Path to HDF5 archive output for storing results
    burnin : float
        Fraction of total number of steps to save to output (will truncate
        the first `burnin` of the light curve)
    n_extra_spots : int
        Add `n_extra_spots` extra spots to the fit to soak up spots not
        predicted by `peak_finder`
    skip_priors : bool
        Should a prior be applied to the depth parameter?

    Returns
    -------
    sampler : `emcee.EnsembleSampler`
        Sampler object returned by `emcee`
    """

    times = light_curve.times.jd
    fluxes = light_curve.fluxes
    errors = light_curve.errors

    lower_t_bound, upper_t_bound = get_in_transit_bounds(times, transit_params)
    amps = spot_parameters[::3]
    init_depth = transit_params.rp**2

    extra_spot_params = [0.1*np.min(amps), np.mean(times),
                         0.05*(upper_t_bound-lower_t_bound)]
    fit_params = np.concatenate([spot_parameters,
                                 n_extra_spots*extra_spot_params])

    ndim, nwalkers = len(fit_params), n_walkers
    pos = []

    while len(pos) < nwalkers:
        realization = fit_params + 1e-5*np.random.randn(ndim)

        if not np.isinf(lnprior(realization, fluxes, lower_t_bound,
                                upper_t_bound, transit_params, skip_priors)):
            pos.append(realization)

    print('Begin MCMC...')

    pool = MPIPool(loadbalance=True)
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob,
                                    args=(times, fluxes, errors, lower_t_bound,
                                          upper_t_bound, transit_params,
                                          skip_priors),
                                    pool=pool)
    sampler.run_mcmc(pos, n_steps)
    print('Finished MCMC...')
    pool.close()

    burnin_len = int(burnin*n_steps)

    from .storage import create_results_archive

    create_results_archive(output_path, light_curve, sampler, burnin_len, ndim)

    return sampler
예제 #44
0
def run_mcmc(cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,\
             data_pmb,data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,\
             age_number,distance_number,Number):

    distance = distance_list[distance_number]
    age = age_list[age_number]

    #define the objective function
    negativelnLikelihood = lambda *args: -lnlike(*args)[0]

    #initial guess for p
    p_0 = p0_list[age_number]

    #generate random values. np.random.randn provides Gaussian with mean 0 and standard deviation 1
    #thus here is adding random values obeying the Gaussian like above to each values.
    pos = [p_0 + 1. * np.random.randn(ndim) for i in range(N_WALKERS)]

    #for multiprocessing
    pool = MPIPool(loadbalance=True)
    if not pool.is_master():
        pool.wait()
        sys.exit(0)
    obsevlosables= cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\
                   data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number
    sampler = emcee.EnsembleSampler(N_WALKERS,
                                    ndim,
                                    lnprob,
                                    args=obsevlosables)

    #sampler = emcee.EnsembleSampler(N_WALKERS, ndim, lnprob, pool=pool, \
    #          args=(cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\
    #                data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos))
    sampler.run_mcmc(pos, Nrun)
    pool.close()

    print('Done.')

    #---
    # store the results
    burnin = Nburn
    samples = sampler.chain[:, burnin:, :].reshape((-1, ndim))

    plt.clf()
    hight_fig_inch = np.int((ndim + 1) * 3.0)
    fig, axes = plt.subplots(ndim + 1,
                             1,
                             sharex=True,
                             figsize=(8, hight_fig_inch))
    for i in range(ndim):
        axes[i].plot(sampler.chain[:, :, i].T, color='k', alpha=0.5)
        axes[i].set_ylabel(_list_labels[i])
    # last panel shows the evolution of ln-likelihood for the ensemble of walkers
    axes[-1].plot(sampler.lnprobability.T, color='k', alpha=0.5)
    axes[-1].set_ylabel('ln(L)')
    maxlnlike = np.max(sampler.lnprobability)
    axes[-1].set_ylim(maxlnlike - 3 * ndim, maxlnlike)
    fig.tight_layout(h_pad=0.)

    filename_pre = 'newModel_1/'+distance+'/line-time_walker%dNrun%dNburn%d_withscatter_'\
                   +age+'Gyr_'+distance+'_%dstars_newModel_1'
    filename = filename_pre % (N_WALKERS, Nrun, Nburn, Number)
    fig.savefig(filename + '.png')

    # Make a triangle plot
    burnin = Nburn
    samples = sampler.chain[:, burnin:, :].reshape((-1, ndim))

    #convert scatters to exp(scatters)
    #samples[:,-3] = np.exp(samples[:,-3])
    #samples[:,-2] = np.exp(samples[:,-2])
    #samples[:,-1] = np.exp(samples[:,-1])

    fig = corner.corner(
        samples[:, :-3],
        labels=_list_labels,
        label_kwargs={'fontsize': 20},
        # truths=_list_answer,
        quantiles=[0.16, 0.5, 0.84],
        plot_datapoints=True,
        show_titles=True,
        title_args={'fontsize': 20},
        title_fmt='.3f',
    )

    filename_pre = 'newModel_1/'+distance+'/trinagle_walker%dNrun%dNburn%d_withscatter_'\
                   +age+'Gyr_'+distance+'_%dstars_newModel_1'
    filename = filename_pre % (N_WALKERS, Nrun, Nburn, Number)
    fig.savefig(filename + '.png')
    #fig.savefig(filename+'.pdf')

    p = np.mean(samples, axis=0)
    e = np.var(samples, axis=0)**0.5
    filename = 'newModel_1/result_' + age + 'Gyr_' + distance + '_' + str(
        Number) + 'stars_newModel_1' + '.txt'
    np.savetxt(filename, (p, e), fmt="%.3f", delimiter=',')

    va,vR2,sigmaphi,meanvR,sigmaR =\
    lnlike(p,cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\
           data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number)[1],\
    lnlike(p,cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\
           data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number)[2],\
    lnlike(p,cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\
           data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number)[3],\
    lnlike(p,cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\
           data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number)[4],\
    lnlike(p,cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\
           data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number)[5]

    f = open('va_newModel_1.txt', 'a')
    printline = '%s, %f, %f, %f, %f, %f, %f\n' % (filename, np.mean(va), vR2,
                                                  sigmaphi, np.mean(sigmaR) /
                                                  80., meanvR, sigmaR)
    f.write(printline)
    f.close()

    va_list = []
    sigmaR = []

    print(filename)

    return None
예제 #45
0
def mcmc_mpi(Nwalkers,
             Nchains,
             observables=['nbar', 'xi'],
             data_dict={
                 'Mr': 21,
                 'b_normal': 0.25
             },
             prior_name='first_try',
             mcmcrun=None):
    '''
    Standard MCMC implementaion
    
    Parameters
    -----------
    - Nwalker : 
        Number of walkers
    - Nchains : 
        Number of MCMC chains   
    - observables : 
        list of observables. Options are: ['nbar','xi'],['nbar','gmf'],['xi']
    - data_dict : dictionary that specifies the observation keywords
    '''
    #Initializing the vector of observables and inverse covariance matrix
    if observables == ['xi']:
        fake_obs = Data.data_xi(**data_dict)
        #fake_obs_icov = Data.data_inv_cov('xi', **data_dict)
        fake_obs_icov = Data.data_cov(inference='mcmc', **data_dict)[1:16,
                                                                     1:16]
    if observables == ['nbar', 'xi']:
        fake_obs = np.hstack(
            [Data.data_nbar(**data_dict),
             Data.data_xi(**data_dict)])
        fake_obs_icov = Data.data_cov(inference='mcmc', **data_dict)[:16, :16]
    if observables == ['nbar', 'gmf']:
        ##### FIRST BIN OF GMF DROPPED ###############
        # CAUTION: hardcoded
        fake_obs = np.hstack(
            [Data.data_nbar(**data_dict),
             Data.data_gmf(**data_dict)[1:]])
        fake_obs_icov = np.zeros((10, 10))
        #print Data.data_cov(**data_dict)[17: , 17:].shape

        # Covariance matrix being adjusted accordingly
        fake_obs_icov[1:, 1:] = Data.data_cov(inference='mcmc',
                                              **data_dict)[17:, 17:]
        fake_obs_icov[0, 1:] = Data.data_cov(inference='mcmc',
                                             **data_dict)[0, 17:]
        fake_obs_icov[1:, 0] = Data.data_cov(inference='mcmc',
                                             **data_dict)[17:, 0]
        fake_obs_icov[0, 0] = Data.data_cov(inference='mcmc', **data_dict)[0,
                                                                           0]

    # True HOD parameters
    data_hod_dict = Data.data_hod_param(Mr=data_dict['Mr'])
    data_hod = np.array([
        data_hod_dict['logM0'],  # log M0 
        np.log(data_hod_dict['sigma_logM']),  # log(sigma)
        data_hod_dict['logMmin'],  # log Mmin
        data_hod_dict['alpha'],  # alpha
        data_hod_dict['logM1']  # log M1
    ])
    Ndim = len(data_hod)

    # Priors
    prior_min, prior_max = PriorRange(prior_name)
    prior_range = np.zeros((len(prior_min), 2))
    prior_range[:, 0] = prior_min
    prior_range[:, 1] = prior_max

    # mcmc chain output file
    chain_file = ''.join([
        util.mcmc_dir(),
        util.observable_id_flag(observables), '.', mcmcrun, '.mcmc_chain.dat'
    ])
    #print chain_file

    if os.path.isfile(chain_file) and continue_chain:
        print 'Continuing previous MCMC chain!'
        sample = np.loadtxt(chain_file)
        Nchain = Niter - (len(sample) / Nwalkers
                          )  # Number of chains left to finish
        if Nchain > 0:
            pass
        else:
            raise ValueError
        print Nchain, ' iterations left to finish'

        # Initializing Walkers from the end of the chain
        pos0 = sample[-Nwalkers:]
    else:
        # new chain
        f = open(chain_file, 'w')
        f.close()
        Nchain = Niter

        # Initializing Walkers
        random_guess = data_hod
        pos0 = np.repeat(random_guess, Nwalkers).reshape(Ndim, Nwalkers).T + \
                         5.e-2 * np.random.randn(Ndim * Nwalkers).reshape(Nwalkers, Ndim)
        #print pos0.shape
    # Initializing MPIPool
    pool = MPIPool()
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    # Initializing the emcee sampler
    hod_kwargs = {
        'prior_range': prior_range,
        'data': fake_obs,
        'data_icov': fake_obs_icov,
        'observables': observables,
        'Mr': data_dict['Mr']
    }
    sampler = emcee.EnsembleSampler(Nwalkers,
                                    Ndim,
                                    lnPost,
                                    pool=pool,
                                    kwargs=hod_kwargs)

    # Initializing Walkers
    for result in sampler.sample(pos0, iterations=Nchain, storechain=False):
        position = result[0]
        #print position
        f = open(chain_file, 'a')
        for k in range(position.shape[0]):
            output_str = '\t'.join(position[k].astype('str')) + '\n'
            f.write(output_str)
        f.close()

    pool.close()
예제 #46
0
def main():

	#################################################
	############Option parsing#######################
	#################################################

	#Parse command line options
	parser = argparse.ArgumentParser()
	parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file")
	parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity")
	parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity")
	parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area")
	parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!")
	parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix")
	parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file")
	parser.add_argument("-ss","--save_debug",dest="save_debug",action="store_true",default=False,help="save a bunch of debugging info for the analysis")
	parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle")
	parser.add_argument("-r","--remove",dest="remove",action="store",type=int,default=24,help="model to remove from the analysis")
	parser.add_argument("-R","--random",dest="random",action="store",type=int,default=0,help="random seed initialization for realization picking")

	cmd_args = parser.parse_args()

	if cmd_args.options_file is None:
		parser.print_help()
		sys.exit(0)

	#Set verbosity level
	if cmd_args.verbose_plus:
		logging.basicConfig(level=DEBUG_PLUS)
	elif cmd_args.verbose:
		logging.basicConfig(level=logging.DEBUG)
	else:
		logging.basicConfig(level=logging.INFO)

	#Initialize MPI Pool
	try:
		pool = MPIPool()
	except:
		pool = None

	if (pool is not None) and (not pool.is_master()):
		pool.wait()
		sys.exit(0)

	if pool is not None:
		logging.info("Started MPI Pool.")

	#################################################################################################################
	#################Info gathering: covariance matrix, observation and emulator#####################################
	#################################################################################################################

	#start
	start = time.time()
	last_timestamp = start

	#Instantiate a FeatureLoader object that will take care of the memory loading
	feature_loader = FeatureLoader(cmd_args)

	###########################################################################################################################################

	#Get the names of all the simulated models available for the CFHT analysis, including smoothing scales and subfields
	all_simulated_models = CFHTemu1.getModels(root_path=feature_loader.options.get("simulations","root_path"))

	#Use this model for the covariance matrix
	covariance_model = all_simulated_models[feature_loader.options.getint("analysis","covariance_model")]
	logging.info("Measuring covariance matrix from model {0}".format(covariance_model))
	#Load in the covariance matrix
	features_covariance = feature_loader.load_features(covariance_model).covariance()

	#timestamp
	now = time.time()
	logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	################################################################################################################################################

	#Create a LikelihoodAnalysis instance by unpickling one of the emulators
	emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators")
	emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string)))
	logging.info("Unpickling emulator from {0}...".format(emulator_file))
	analysis = LikelihoodAnalysis.load(emulator_file)

	#timestamp
	now = time.time()
	logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	##################################################################################################################################################

	#Initialize random seed
	np.random.seed(cmd_args.random)
	realization = np.random.randint(0,1000)

	#Treat the removed model as data
	model_to_remove = all_simulated_models[cmd_args.remove]
	parameters_to_remove = model_to_remove.squeeze()
	logging.info("Treating model {0}, realization {1} as data, loading features...".format(model_to_remove,realization+1))
	observed_feature = feature_loader.load_features(model_to_remove)[np.random.randint(0,1000)]

	#Compute the chi2 for this observed feature without removing it from the emulator (must be close to 0)
	logging.info("Chi2 before removal: {0[0]:.3f} ({1} dof)".format(analysis.chi2(parameters_to_remove,features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1]))

	#Remove the model from the emulator
	remove_index = analysis.find(parameters_to_remove)[0]
	logging.info("Removing model {0} with parameters {1} from emulator...".format(remove_index,analysis.parameter_set[remove_index]))
	analysis.remove_model(remove_index)

	#Retrain without the removed model
	analysis.train()

	#Compute the chi2 for this observed feature after removing it from the emulator (likely it's not 0 anymore)
	logging.info("Chi2 after removal: {0[0]:.3f} ({1} dof)".format(analysis.chi2(parameters_to_remove,features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1]))

	####################################################################################################################
	######################################Compute the chi2 cube#########################################################
	####################################################################################################################

	logging.info("Initializing chi2 meshgrid...")

	#Set the points in parameter space on which to compute the chi2 (read from options)
	Om = np.ogrid[feature_loader.options.getfloat("Omega_m","min"):feature_loader.options.getfloat("Omega_m","max"):feature_loader.options.getint("Omega_m","num_points")*1j]
	w = np.ogrid[feature_loader.options.getfloat("w","min"):feature_loader.options.getfloat("w","max"):feature_loader.options.getint("w","num_points")*1j]
	si8 = np.ogrid[feature_loader.options.getfloat("sigma8","min"):feature_loader.options.getfloat("sigma8","max"):feature_loader.options.getint("sigma8","num_points")*1j]

	num_points = len(Om) * len(w) * len(si8) 

	points = np.array(np.meshgrid(Om,w,si8,indexing="ij")).reshape(3,num_points).transpose()
	if cmd_args.save_points is not None:
		logging.info("Saving points to {0}.npy".format(cmd_args.save_points.rstrip(".npy")))
		np.save(cmd_args.save_points.rstrip(".npy")+".npy",points)

	#Now compute the chi2 at each of these points
	if pool:
		split_chunks = pool.size
		logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size))
	else:
		split_chunks = None
		logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0]))
	
	chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks)

	#Close MPI Pool
	if pool is not None:
		pool.close()
		logging.info("Closed MPI Pool.")

	now = time.time()
	logging.info("chi2 calculations completed in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	#Save output

	likelihood_file = "likelihood_remove{0}_{1}.npy".format(cmd_args.remove,output_string(feature_loader.feature_string))
	chi2_file = "chi2_remove{0}_{1}.npy".format(cmd_args.remove,output_string(feature_loader.feature_string))

	logging.info("Saving chi2 to {0}".format(chi2_file))
	np.save(chi2_file,chi_squared.reshape(Om.shape + w.shape + si8.shape))

	logging.info("Saving full likelihood to {0}".format(likelihood_file))
	likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape))
	np.save(likelihood_file,likelihood_cube)

	#Find the maximum of the likelihood using ContourPlot functionality
	contour = ContourPlot()
	contour.getLikelihood(likelihood_cube)
	contour.getUnitsFromOptions(feature_loader.options)
	parameters_maximum = contour.getMaximum()
	parameter_keys = parameters_maximum.keys()
	parameter_keys.sort(key=contour.parameter_axes.get)

	#Display the new best fit before exiting
	best_fit_parameters = [ parameters_maximum[par_key] for par_key in parameter_keys ]
	logging.info("New best fit is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2={1[0]:.3f}".format(best_fit_parameters,analysis.chi2(np.array(best_fit_parameters),features_covariance=features_covariance,observed_feature=observed_feature)))

	#End
	end = time.time()
	logging.info("DONE!!")
	logging.info("Completed in {0:.1f}s".format(end-start))
        jj=0    
        while jj< len(y):
            print 'icount, jj',icount,jj
            iipix_mask,iipix = WLanalysis.coords2grid(x[jj:jj+istep], y[jj:jj+istep], idata.flatten().reshape(1,-1)[:,jj:jj+istep], size=sizes[Wx-1])
            ipix_mask += iipix_mask
            ipix += iipix
            jj+=istep
    print icount,'W%i done coords2grid %s'%(Wx,icount)#,time.strftime("%Y-%m-%d %H:%M")    
    
    save(mask_dir+'smaller/weight0_W%i_%i_numpix'%(Wx,icount), ipix)
    save(mask_dir+'smaller/weight0_W%i_%i_nummask'%(Wx,icount), ipix_mask)
    #ipix is the num. of pixels fall in that big pix, ipix_mask is the mask
    return ipix, ipix_mask

p = MPIPool()    
if not p.is_master():
    p.wait()
    sys.exit(0)

#p.map(partialdata2grid, range(63))
ismall_map=p.map(partialdata2grid, range(63))
small_map = sum(array(ismall_map),axis=0)
save(mask_dir+'weight0_W%i_smaller_mask.npy'%(Wx),small_map)
weight=1-small_map[1]/small_map[0]
weight[isnan(weight)]=0
save(mask_dir+'ludoweight_weight0_W%i.npy'%Wx, weight)
mask=weight/weight
mask[isnan(mask)]=0
save(mask_dir+'ludomask_weight0_W%i.npy'%Wx, mask)

p.close()
예제 #48
0
파일: stats.py 프로젝트: astroJeff/XRB
def run_emcee_2(M2_d, P_orb_obs, ecc_obs, ra, dec, M2_d_err=1.0,
    P_orb_obs_err=1.0, ecc_obs_err=0.05, nwalkers=80, nburn=1000,
    nsteps=1000,
    threads=1, mpi=False):
    """ Run the emcee function

    Parameters
    ----------
    M2_d : float
        Observed secondary mass
    P_orb_obs : float
        Observed orbital period
    ecc_obs : float
        Observed orbital eccentricity
    ra : float
        Observed right ascension
    dec : float
        Observed declination
    threads : int
        Number of threads to use for parallelization
    mpi : bool
        If true, use MPIPool for parallelization

    Returns
    -------
    sampler : emcee object
    """

    # First thing is to load the sse data and SF_history data
    load_sse.load_sse()
    sf_history.load_sf_history()

    # Get initial values
    initial_vals = get_initial_values(M2_d, nwalkers=nwalkers)

    # Define sampler
    args = [[M2_d, M2_d_err, P_orb_obs, P_orb_obs_err, ecc_obs, ecc_obs_err, ra, dec]]

    if mpi == True:
        pool = MPIPool()
        if not pool.is_master():
            pool.wait()
            sys.exit(0)
        sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=10, lnpostfn=ln_posterior, args=args, pool=pool)

    elif threads != 1:
        sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=10, lnpostfn=ln_posterior, args=args, threads=threads)
    else:
        sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=10, lnpostfn=ln_posterior, args=args)

    # Assign initial values
    p0 = np.zeros((nwalkers,10))
    p0 = set_walkers(initial_vals, args[0], nwalkers=nwalkers)

    # Burn-in 1
    pos,prob,state = sampler.run_mcmc(p0, N=nburn)
    sampler1 = copy.copy(sampler)

    # TESTING BEGIN - Get limiting ln_prob for worst 10 chains
    prob_lim = (np.sort(prob)[9] + np.sort(prob)[10])/2.0
    index_best = np.argmax(prob)
    for i in np.arange(len(prob)):
        # if sampler1.acceptance_fraction[i] == 0.0: pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10)
        if prob[i] < prob_lim:  pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10)
    # TESTING END

    print "Burn-in 1 finished."
    print "Starting burn-in 2..."

    # Burn-in 2
    sampler.reset()
    pos,prob,state = sampler.run_mcmc(pos, N=nburn)
    sampler2 = copy.copy(sampler)

    # TESTING BEGIN - Get limiting ln_prob for worst 10 chains
    prob_lim = (np.sort(prob)[9] + np.sort(prob)[10])/2.0
    index_best = np.argmax(prob)
    for i in np.arange(len(prob)):
        # if sampler2.acceptance_fraction[i] == 0.0: pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10)
        if prob[i] < prob_lim:  pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10)
    # TESTING END

    print "Burn-in 2 finished."
    print "Starting burn-in 3..."

    # Burn-in 3
    sampler.reset()
    pos,prob,state = sampler.run_mcmc(pos, N=nburn)
    sampler3 = copy.copy(sampler)

    # TESTING BEGIN - Get limiting ln_prob for worst 10 chains
    prob_lim = (np.sort(prob)[9] + np.sort(prob)[10])/2.0
    index_best = np.argmax(prob)
    for i in np.arange(len(prob)):
        # if sampler3.acceptance_fraction[i] == 0.0: pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10)
        if prob[i] < prob_lim:  pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10)
    # TESTING END

    print "Burn-in 3 finished."
    print "Starting burn-in 4..."

    # Burn-in 4
    sampler.reset()
    pos,prob,state = sampler.run_mcmc(pos, N=nburn)
    sampler4 = copy.copy(sampler)

    print "Burn-in 4 finished."
    print "Starting production run..."

    # Full run
    sampler.reset()
    pos,prob,state = sampler.run_mcmc(pos, N=nsteps)

    print "Finished production run"

    if mpi is True: pool.close()


    return sampler1, sampler2, sampler3, sampler4, sampler
예제 #49
0
def main(runmpi=True,nw=100,th=6,bi=10,fr=10):

    if runmpi:
        pool = MPIPool()
        if not pool.is_master():
            pool.wait()
            sys.exit(0)
    else:
        pool=None

    #ldfileloc = '/Users/tom/Projects/koi2133/code/'
    ldfileloc = '/nobackup/tsbarcl2/Projects/koi2133/code/'
    #codedir = '/Users/tom/Projects/koi2133/code'
    codedir = '/nobackup/tsbarcl2/Projects/koi2133/code'

    koi = 2133
    cadence=1625.3
    teff = 4550.
    teff_unc = 75.
    feh = 0.11
    feh_unc = 0.07
    logg = 2.943
    logg_unc = 0.007
    rho = 0.0073
    rho_unc = 0.0001
    nplanets = 1

    dil=0.0

    period=6.24672336
    impact=0.7
    T0=136.383880
    rprs=0.02067
    alb=30.
    occ=40.
    ell=30.
    rvamp=79.0
    ecosw=0.048
    esinw=-0.045
    planet_guess = np.array([
        T0,period,impact,rprs,ecosw,esinw,
        rvamp,occ,ell,alb])


    rvtime, rvval, rverr = get_rv()
    time,flux,ferr = get_lc()

    rho_prior = True
    ldp_prior = False

    nwalkers = nw
    threads = th
    burnin = bi
    fullrun = fr
    thin = 1

    n_ldparams = 2

    toffset_lc = 0
    toffset_rv = 0

    zpt_0 = 1.E-10

    M = tmod.transitemcee_koi2133(
        nplanets,cadence,
        ldfileloc=ldfileloc,codedir=codedir)

    M.get_stellar(teff,
        logg,
        feh,
        n_ldparams,ldp_prior=ldp_prior)

    M.already_open(time,
        flux,ferr,
        rvtime,rvval,
        rverr,
        timeoffset=toffset_lc,rvtimeoffset=toffset_rv,
        normalize=False)

    rho_vals = np.array([rho,rho_unc])

    M.get_rho(rho_vals,rho_prior)
    M.get_zpt(zpt_0)

    noise_model = [3.0E-4, 3.7E-2, 2.E-4, 3.]


    if dil is not None:
        M.get_sol(*planet_guess,dil=dil,noise_model=noise_model)
    else:
        M.get_sol(*planet_guess,noise_model=noise_model)


    outfile = 'koi{0}_np{1}_prior{2}_dil{3}GP.hdf5'.format(
            koi,nplanets,rho_prior,dil)

    p0 = M.get_guess(nwalkers)

    #dirty hack!!
    qwe = np.r_[np.arange(0,7),np.arange(9,21)]
    p0 = p0[:,qwe]

    l_var = np.shape(p0)[1]

    N = len([indval for indval in xrange(fullrun)
            if indval%thin == 0])
    with h5py.File(outfile, u"w") as f:
        f.create_dataset("time", data=M.time)
        f.create_dataset("flux", data=M.flux)
        f.create_dataset("err", data=M.err)
        f.create_dataset("rvtime", data=M.rvtime)
        f.create_dataset("rvval", data=M.rvval)
        f.create_dataset("rverr", data=M.rverr)
        f.create_dataset("itime", data=M._itime)
        f.create_dataset("ntt", data = M._ntt)
        f.create_dataset("tobs", data = M._tobs)
        f.create_dataset("omc",data = M._omc)
        f.create_dataset("datatype",data = M._datatype)
        f.attrs["rho_0"] = M.rho_0
        f.attrs["rho_0_unc"] = M.rho_0_unc
        f.attrs["nplanets"] = M.nplanets
        f.attrs["ld1"] = M.ld1
        f.attrs["ld2"] = M.ld2
        f.attrs["koi"] = koi
        f.attrs["dil"] = dil
        g = f.create_group("mcmc")
        g.attrs["nwalkers"] = nwalkers
        g.attrs["burnin"] = burnin
        g.attrs["iterations"] = fullrun
        g.attrs["thin"] = thin
        g.attrs["rho_prior"] = rho_prior
        g.attrs["ldp_prior"] = ldp_prior
        g.attrs["onlytransits"] = M.onlytransits
        g.attrs["tregion"] = M.tregion
        g.attrs["ldfileloc"] = M.ldfileloc
        g.attrs["n_ldparams"] = M.n_ldparams
        g.create_dataset("fixed_sol", data= M.fixed_sol)
        g.create_dataset("fit_sol_0", data= M.fit_sol_0)


        c_ds = g.create_dataset("chain",
            (nwalkers, N, l_var),
            dtype=np.float64)
        lp_ds = g.create_dataset("lnprob",
            (nwalkers, N),
            dtype=np.float64)

        #I don't like the default LDP unc
        #I'm changing them
        M.ld1_unc = 0.8
        M.ld2_unc = 0.8

        args = [M.nplanets,M.rho_0,M.rho_0_unc,M.rho_prior,
            M.ld1,M.ld1_unc,M.ld2,M.ld2_unc,M.ldp_prior,
            M.flux,M.err,M.fixed_sol,M.time,M._itime,M._ntt,
            M._tobs,M._omc,M._datatype,
            M.rvtime,M.rvval,M.rverr,M._rvitime,
            M.n_ldparams,M.ldfileloc,
            M.onlytransits,M.tregion]


        tom = tmod.logchi2_rv_phaseGP2


        if runmpi:
            sampler = emcee.EnsembleSampler(nwalkers, l_var, tom,
                args=args,pool=pool)
        else:
            sampler = emcee.EnsembleSampler(nwalkers, l_var, tom,
                args=args,threads=th)


        time1 = thetime.time()
        p2, prob, state = sampler.run_mcmc(p0, burnin,
            storechain=False)
        sampler.reset()
        with h5py.File(outfile, u"a") as f:
            g = f["mcmc"]
            g.create_dataset("burnin_pos", data=p2)
            g.create_dataset("burnin_prob", data=prob)

        time2 = thetime.time()
        print('burn-in took ' + str((time2 - time1)/60.) + ' min')
        time1 = thetime.time()
        for i, (pos, lnprob, state) in enumerate(sampler.sample(p2,
            iterations=fullrun, rstate0=state,
            storechain=False)):

            #do the thinning in the loop here
            if i % thin == 0:
                ind = i / thin
                with h5py.File(outfile, u"a") as f:
                    g = f["mcmc"]
                    c_ds = g["chain"]
                    lp_ds = g["lnprob"]
                    c_ds[:, ind, :] = pos
                    lp_ds[:, ind] = lnprob

        time2 = thetime.time()
        print('MCMC run took ' + str((time2 - time1)/60.) + ' min')
        print('')
        print("Mean acceptance: "
            + str(np.mean(sampler.acceptance_fraction)))
        print('')

        if runmpi:
            pool.close()
        else:
            sampler.pool.close()

        return sampler
예제 #50
0
def LensModelMCMC(data,lens,source,
                  xmax=30.,highresbox=[-3.,3.,-3.,3.],emitres=None,fieldres=None,
                  sourcedatamap=None, scaleamp=False, shiftphase=False,
                  modelcal=True,cosmo=Planck15,
                  nwalkers=1e3,nburn=1e3,nstep=1e3,pool=None,nthreads=1,mpirun=False):
      """
      Wrapper function which basically takes what the user wants and turns it into the
      format needed for the acutal MCMC lens modeling.
      
      Inputs:
      data:
            One or more visdata objects; if multiple datasets are being
            fit to, should be a list of visdata objects.
      lens:
            Any of the currently implemented lens objects or ExternalShear.
      source:
            One or more of the currently implemented source objects; if more than
            one source to be fit, should be a list of multiple sources.
      xmax:
            (Half-)Grid size, in arcseconds; the grid will span +/-xmax in x&y
      highresbox:
            The region to model at higher resolution (to account for high-magnification
            and differential lensing effects), as [xmin, xmax, ymin, ymax]. 
            Note the sign convention is: +x = West, +y = North, like the lens
            positions.
      sourcedatamap:
            A list of length the number of datasets which tells which source(s)
            are to be fit to which dataset(s). Eg, if two sources are to be fit
            to two datasets jointly, should be [[0,1],[0,1]]. If we have four
            sources and three datasets, could be [[0,1],[0,1],[2,3]] to say that the
            first two sources should both be fit to the first two datasets, while the
            second two should be fit to the third dataset. If None, will assume
            all sources should be fit to all datasets.
      scaleamp:
            A list of length the number of datasets which tells whether a flux
            rescaling is allowed and which dataset the scaling should be relative to.
            False indicates no scaling should be done, while True indicates that
            amplitude scaling should be allowed.
      shiftphase:
            Similar to scaleamp above, but allowing for positional/astrometric offsets.
      modelcal:
            Whether or not to perform the pseudo-selfcal procedure of H+13
      cosmo:
            The cosmology to use, as an astropy object, e.g.,
            from astropy.cosmology import WMAP9; cosmo=WMAP9
            Default is Planck15.
      nwalkers:
            Number of walkers to use in the mcmc process; see dan.iel.fm/emcee/current
            for more details.
      nburn:
            Number of burn-in steps to take with the chain.
      nstep:
            Number of actual steps to take in the mcmc chains after the burn-in
      nthreads:
            Number of threads (read: cores) to use during the fitting, default 1.
      mpirun:
            Whether to parallelize using MPI instead of multiprocessing. If True,
            nthreads has no effect, and your script should be run with, eg,
            mpirun -np 16 python lensmodel.py.

      Returns:
      mcmcresult:
            A nested dict containing the chains requested. Will have all the MCMC
            chain results, plus metadata about the run (initial params, data used,
            etc.). Formatting still a work in progress (esp. for modelcal phases).
      chains:
            The raw chain data, for testing.
      blobs:
            Everything else returned by the likelihood function; will have
            magnifications and any modelcal phase offsets at each step; eventually
            will remove this once get everything packaged up for mcmcresult nicely.
      colnames:
            Basically all the keys to the mcmcresult dict; eventually won't need
            to return this once mcmcresult is packaged up nicely.
      """

      if pool: nthreads = 1
      elif mpirun:
            nthreads = 1
            from emcee.utils import MPIPool
            pool = MPIPool(debug=False,loadbalance=True)
            if not pool.is_master():
            	pool.wait()
            	sys.exit(0)
      else: pool = None

      # Making these lists just makes later stuff easier since we now know the dtype
      lens = list(np.array([lens]).flatten())
      source = list(np.array([source]).flatten()) # Ensure source(s) are a list
      data = list(np.array([data]).flatten())     # Same for dataset(s)
      scaleamp = list(np.array([scaleamp]).flatten())
      shiftphase = list(np.array([shiftphase]).flatten())
      modelcal = list(np.array([modelcal]).flatten())
      if len(scaleamp)==1 and len(scaleamp)<len(data): scaleamp *= len(data)
      if len(shiftphase)==1 and len(shiftphase)<len(data): shiftphase *= len(data)
      if len(modelcal)==1 and len(modelcal)<len(data): modelcal *= len(data)
      if sourcedatamap is None: sourcedatamap = [None]*len(data)

      # emcee isn't very flexible in terms of how it gets initialized; start by
      # assembling the user-provided info into a form it likes
      ndim, p0, colnames = 0, [], []
      # Lens(es) first
      for i,ilens in enumerate(lens):
            if ilens.__class__.__name__=='SIELens':
                  for key in ['x','y','M','e','PA']:
                        if not vars(ilens)[key]['fixed']:
                              ndim += 1
                              p0.append(vars(ilens)[key]['value'])
                              colnames.append(key+'L'+str(i))
            elif ilens.__class__.__name__=='ExternalShear':
                  for key in ['shear','shearangle']:
                        if not vars(ilens)[key]['fixed']:
                              ndim += 1
                              p0.append(vars(ilens)[key]['value'])
                              colnames.append(key)
      # Then source(s)
      for i,src in enumerate(source):
            if src.__class__.__name__=='GaussSource':
                  for key in ['xoff','yoff','flux','width']:
                        if not vars(src)[key]['fixed']:
                              ndim += 1
                              p0.append(vars(src)[key]['value'])
                              colnames.append(key+'S'+str(i))
            elif src.__class__.__name__=='SersicSource':
                  for key in ['xoff','yoff','flux','majax','index','axisratio','PA']:
                        if not vars(src)[key]['fixed']:
                              ndim += 1
                              p0.append(vars(src)[key]['value'])
                              colnames.append(key+'S'+str(i))
            elif src.__class__.__name__=='PointSource':
                  for key in ['xoff','yoff','flux']:
                        if not vars(src)[key]['fixed']:
                              ndim += 1
                              p0.append(vars(src)[key]['value'])
                              colnames.append(key+'S'+str(i))
      # Then flux rescaling; only matters if >1 dataset
      for i,t in enumerate(scaleamp[1:]):
            if t:
                  ndim += 1
                  p0.append(1.) # Assume 1.0 scale factor to start
                  colnames.append('ampscale_dset'+str(i+1))
      # Then phase/astrometric shift; each has two vals for a shift in x&y
      for i,t in enumerate(shiftphase[1:]):
            if t:
                  ndim += 2
                  p0.append(0.); p0.append(0.) # Assume zero initial offset
                  colnames.append('astromshift_x_dset'+str(i+1))
                  colnames.append('astromshift_y_dset'+str(i+1))

      # Get any model-cal parameters set up. The process involves some expensive
      # matrix inversions, but these only need to be done once, so we'll do them
      # now and pass the results as arguments to the likelihood function. See docs
      # in calc_likelihood.model_cal for more info.
      for i,dset in enumerate(data):
            if modelcal[i]:
                  uniqant = np.unique(np.asarray([dset.ant1,dset.ant2]).flatten())
                  dPhi_dphi = np.zeros((uniqant.size-1,dset.u.size))
                  for j in range(1,uniqant.size):
                        dPhi_dphi[j-1,:]=(dset.ant1==uniqant[j])-1*(dset.ant2==uniqant[j])
                  C = scipy.sparse.diags((dset.sigma/dset.amp)**-2.,0)
                  F = np.dot(dPhi_dphi,C*dPhi_dphi.T)
                  Finv = np.linalg.inv(F)
                  FdPC = np.dot(-Finv,dPhi_dphi*C)
                  modelcal[i] = [dPhi_dphi,FdPC]


      # Create our lensing grid coordinates now, since those shouldn't be
      # recalculated with every call to the likelihood function
      xmap,ymap,xemit,yemit,indices = GenerateLensingGrid(data,xmax,highresbox,
                                                fieldres,emitres)

      # Calculate the uv coordinates we'll interpolate onto; only need to calculate
      # this once, so do it here.
      kmax = 0.5/((xmap[0,1]-xmap[0,0])*arcsec2rad)
      ug = np.linspace(-kmax,kmax,xmap.shape[0])

      # Calculate some distances; we only need to calculate these once.
      # This assumes multiple sources are all at same z; should be this
      # way anyway or else we'd have to deal with multiple lensing planes
      if cosmo is None: cosmo = Planck15
      Dd = cosmo.angular_diameter_distance(lens[0].z).value
      Ds = cosmo.angular_diameter_distance(source[0].z).value
      Dds= cosmo.angular_diameter_distance_z1z2(lens[0].z,source[0].z).value

      p0 = np.array(p0)
      # Create a ball of starting points for the walkers, gaussian ball of 
      # 10% width; if initial value is 0 (eg, astrometric shift), give a small sigma
      # for angles, generally need more spread than 10% to sample well, do 30% for those cases [~0.5% >180deg for p0=100deg]
      isangle = np.array([0.30 if 'PA' in s or 'angle' in s else 0.1 for s in colnames])
      initials = emcee.utils.sample_ball(p0,np.asarray([isangle[i]*x if x else 0.05 for i,x in enumerate(p0)]),int(nwalkers))

      # All the lens objects know if their parameters have been altered since the last time
      # we calculated the deflections. If all the lens pars are fixed, we only need to do the
      # deflections once. This step ensures that the lens object we create the sampler with
      # has these initial deflections.
      for i,ilens in enumerate(lens):
            if ilens.__class__.__name__ == 'SIELens': ilens.deflect(xemit,yemit,Dd,Ds,Dds)
            elif ilens.__class__.__name__ == 'ExternalShear': ilens.deflect(xemit,yemit,lens[0])

      # Create the sampler object; uses calc_likelihood function defined elsewhere
      lenssampler = emcee.EnsembleSampler(nwalkers,ndim,calc_vis_lnlike,
            args = [data,lens,source,Dd,Ds,Dds,ug,
                    xmap,ymap,xemit,yemit,indices,
                    sourcedatamap,scaleamp,shiftphase,modelcal],
            threads=nthreads,pool=pool)

      
      # Run burn-in phase
      print("Running burn-in... ")
      #pos,prob,rstate,mus = lenssampler.run_mcmc(initials,nburn,storechain=False)
      for i,result in enumerate(lenssampler.sample(initials,iterations=nburn,storechain=False)):
            if i%20==0: print('Burn-in step ',i,'/',nburn)
            pos,prob,rstate,blob = result
      
      
      lenssampler.reset()
      
      # Run actual chains
      print("Done. Running chains... ")
      for i,result in enumerate(lenssampler.sample(pos,rstate0=rstate,iterations=nstep,storechain=True)):
            if i%20==0: print('Chain step ',i,'/',nstep)
      
      #lenssampler.run_mcmc(pos,nstep,rstate0=rstate)
      if mpirun: pool.close()
      print("Mean acceptance fraction: ",np.mean(lenssampler.acceptance_fraction))

      #return lenssampler.flatchain,lenssampler.blobs,colnames
      
      # Package up the magnifications and modelcal phases; disregards nan points (where
      # we failed the prior, usu. because a periodic angle wrapped).
      blobs = lenssampler.blobs
      mus = np.asarray([[a[0] for a in l] for l in blobs]).flatten(order='F')
      bad = np.where(np.asarray([np.any(np.isnan(m)) for m in mus],dtype=bool))[0]
      for k in bad: mus[k] = np.array([np.nan]*len(source))
      mus = np.asarray(list(mus),dtype=float).reshape((-1,len(source)),order='F') # stupid-ass hack
      bad = np.isnan(mus)[:,0]
      #bad = bad.reshape((-1,len(source)),order='F')[:,0]
      #mus = np.atleast_2d(np.asarray([mus[i] if not bad[i] else [np.nan]*len(source) for i in range(mus.size)])).T
      colnames.extend(['mu{0:.0f}'.format(i) for i in range(len(source))])

      
      # Assemble the output. Want to return something that contains both the MCMC chains
      # themselves, but also metadata about the run.
      mcmcresult = {}

      # keep track of git revision, for reproducibility's sake
      # if run under mpi, this will spew some scaremongering warning text,
      # but it's fine. use --mca mpi_warn_on_fork 0 in the mpirun statement to disable
      try: 
            import subprocess
            gitd = os.path.abspath(os.path.join(os.path.dirname(__file__),os.pardir))
            mcmcresult['githash'] = subprocess.check_output('git --git-dir={0:s} --work-tree={1:s} '\
                  'rev-parse HEAD'.format(gitd+'/.git',gitd),shell=True).rstrip()
      except:
            mcmcresult['githash'] = 'No repo found'
      
      
      mcmcresult['datasets'] = [dset.filename for dset in data] # Data files used

      mcmcresult['lens_p0'] = lens      # Initial params for lens,src(s),shear; also tells if fixed, priors, etc.
      mcmcresult['source_p0'] = source
      
      if sourcedatamap: mcmcresult['sourcedatamap'] = sourcedatamap
      mcmcresult['xmax'] = xmax
      mcmcresult['highresbox'] = highresbox
      mcmcresult['fieldres'] = fieldres
      mcmcresult['emitres'] = emitres
      if any(scaleamp): mcmcresult['scaleamp'] = scaleamp
      if any(shiftphase): mcmcresult['shiftphase'] = shiftphase

      mcmcresult['chains'] = np.core.records.fromarrays(np.hstack((lenssampler.flatchain[~bad],mus[~bad])).T,names=colnames)
      mcmcresult['lnlike'] = lenssampler.flatlnprobability[~bad]
      
      # Keep track of best-fit params, derived from chains.
      c = copy.deepcopy(mcmcresult['chains'])
      mcmcresult['best-fit'] = {}
      pbest = []
      # Calculate the best fit values as medians of each param
      lens,source = copy.deepcopy(mcmcresult['lens_p0']), copy.deepcopy(mcmcresult['source_p0'])
      for i,ilens in enumerate(lens):
            if ilens.__class__.__name__ == 'SIELens':
                  ilens.__dict__['_altered'] = True
                  for key in ['x','y','M','e','PA']:
                        if not vars(ilens)[key]['fixed']:
                              ilens.__dict__[key]['value'] = np.median(c[key+'L'+str(i)])
                              pbest.append(np.median(c[key+'L'+str(i)]))
            elif ilens.__class__.__name__ == 'ExternalShear':
                  for key in ['shear','shearangle']:
                        if not vars(ilens)[key]['fixed']:
                              ilens.__dict__[key]['value'] = np.median(c[key])
                              pbest.append(np.median(c[key]))
      
      mcmcresult['best-fit']['lens'] = lens

      # now do the source(s)
      for i,src in enumerate(source): # Source is a list of source objects
            if src.__class__.__name__ == 'GaussSource':
                  for key in ['xoff','yoff','flux','width']:
                        if not vars(src)[key]['fixed']:
                              src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)])
                              pbest.append(np.median(c[key+'S'+str(i)]))
            elif src.__class__.__name__ == 'SersicSource':
                  for key in ['xoff','yoff','flux','majax','index','axisratio','PA']:
                        if not vars(src)[key]['fixed']:
                              src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)])
                              pbest.append(np.median(c[key+'S'+str(i)]))
            elif src.__class__.__name__ == 'PointSource':
                  for key in ['xoff','yoff','flux']:
                        if not vars(src)[key]['fixed']:
                              src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)])
                              pbest.append(np.median(c[key+'S'+str(i)]))

      mcmcresult['best-fit']['source'] = source
      mcmcresult['best-fit']['magnification'] = np.median(mus[~bad],axis=0)

      # Any amplitude scaling or astrometric shifts
      bfscaleamp = np.ones(len(data))
      if 'scaleamp' in mcmcresult.keys():
            for i,t in enumerate(mcmcresult['scaleamp']): # only matters if >1 datasets
                  if i==0: pass
                  elif t: 
                        bfscaleamp[i] = np.median(c['ampscale_dset'+str(i)])
                        pbest.append(np.median(c['ampscale_dset'+str(i)]))
                  else: pass
      mcmcresult['best-fit']['scaleamp'] = bfscaleamp
      
      bfshiftphase = np.zeros((len(data),2))
      if 'shiftphase' in mcmcresult.keys():
            for i,t in enumerate(mcmcresult['shiftphase']):
                  if i==0: pass # only matters if >1 datasets
                  elif t:
                        bfshiftphase[i][0] = np.median(c['astromshift_x_dset'+str(i)])
                        bfshiftphase[i][1] = np.median(c['astromshift_y_dset'+str(i)])
                        pbest.append(np.median(c['astromshift_x_dset'+str(i)]))
                        pbest.append(np.median(c['astromshift_y_dset'+str(i)]))
                  else: pass # no shifting
      mcmcresult['best-fit']['shiftphase'] = bfshiftphase
      
      mcmcresult['best-fit']['lnlike'] = calc_vis_lnlike(pbest,data,mcmcresult['best-fit']['lens'],
            mcmcresult['best-fit']['source'],
            Dd,Ds,Dds,ug,xmap,ymap,xemit,yemit,indices,
            sourcedatamap,scaleamp,shiftphase,modelcal)[0]
      
      # Calculate the deviance information criterion, using the Spiegelhalter+02 definition (cf Gelman+04)
      mcmcresult['best-fit']['DIC'] = -4*np.mean(mcmcresult['lnlike']) + 2*mcmcresult['best-fit']['lnlike']
      
      # If we did any modelcal stuff, keep the antenna phase offsets here
      if any(modelcal): 
            mcmcresult['modelcal'] = [True if j else False for j in modelcal]
            dp = np.squeeze(np.asarray([[a[1] for a in l if ~np.any(np.isnan(a[0]))] for l in blobs]))
            a = [x for l in dp for x in l] # Have to dick around with this if we had any nan's
            dphases = np.squeeze(np.reshape(a,(nwalkers*nstep-bad.sum(),len(data),-1),order='F'))
            if len(data) > 1: 
                  for i in range(len(data)):
                        if modelcal[i]: mcmcresult['calphases_dset'+str(i)] = np.vstack(dphases[:,i])
            else: 
                  if any(modelcal): mcmcresult['calphases_dset0'] = dphases
      
      return mcmcresult
예제 #51
0
def main(nw=1000,th=9,bi=500,fr=2000,thin=20,runmpi=True,local=False,
    dil=None,codedir='/Users/tom/Projects/doug_hz/code',
         ldfileloc='/Users/tom/Projects/doug_hz/code/'):
    if runmpi:
        pool = MPIPool()
        if not pool.is_master():
            pool.wait()
            sys.exit(0)
    else:
        pool=None

    #if not local:
        #sys.path.append('/u/tsbarcl2/svn_code/tom_code/')
        #ldfileloc = '/u/tsbarcl2/svn_code/tom_code/'
    #elif local:
        #sys.path.append('/Users/tom/svn_code/tom_code/')
        #ldfileloc = '/Users/tom/svn_code/tom_code/'


    if dil is None:
        dil = 0.0

    files = os.listdir('.')
    dat_d = get_data(files)

    rho_prior = True
    ldp_prior = False

    #mcmc params
    nwalkers = nw
    threads = th
    burnin = bi
    fullrun = fr

    #use quadratic or 4 parameter limb darkening
    n_ldparams = 2

    #lc time offset from BJD-24548333. 
    toffset = (54832.5 + 67.)

    #photometric zeropoint
    zpt_0 = 1.E-10

    #plot?
    #doplot=False

    ################

    M = tmod.transitemcee_fitldp(dat_d['nplanets'],dat_d['cadence'],
        ldfileloc=ldfileloc)

    #M.get_stellar(dat_d['teff'],dat_d['logg'],dat_d['feh'],n_ldparams)

    M.get_stellar(dat_d['teff'],
        dat_d['logg'],
        dat_d['feh'],
        n_ldparams,ldp_prior=ldp_prior)

    M.already_open(dat_d['time'],
        dat_d['flux'],dat_d['err'],
        timeoffset=toffset,normalize=False)

    rho_vals = np.array([dat_d['rho'],dat_d['rho_unc']])
    M.get_rho(rho_vals,rho_prior)
    M.get_zpt(zpt_0)

    if dil is not None:
        M.get_sol(*dat_d['sol_guess'],dil=dil)
    else:
        M.get_sol(*dat_d['sol_guess'])

    M.cut_non_transit(8)

    ################
    stophere = False
    if not stophere:

    #for threadnum in np.arange(2,32,2):
        p0 = M.get_guess(nwalkers)
        l_var = np.shape(p0)[1]

        N = len([indval for indval in xrange(fullrun)
                if indval%thin == 0])
        outfile = 'koi{0}_np{1}_prior{2}_dil{3}.hdf5'.format(
            dat_d['koi'],dat_d['nplanets'],rho_prior,dil)
        with h5py.File(outfile, u"w") as f:
            f.create_dataset("time", data=M.time)
            f.create_dataset("flux", data=M.flux)
            f.create_dataset("err", data=M.err)
            f.create_dataset("itime", data=M._itime)
            f.create_dataset("ntt", data = M._ntt)
            f.create_dataset("tobs", data = M._tobs)
            f.create_dataset("omc",data = M._omc)
            f.create_dataset("datatype",data = M._datatype)
            f.attrs["rho_0"] = M.rho_0
            f.attrs["rho_0_unc"] = M.rho_0_unc
            f.attrs["nplanets"] = M.nplanets
            f.attrs["ld1"] = M.ld1
            f.attrs["ld2"] = M.ld2
            f.attrs["koi"] = dat_d['koi']
            f.attrs["dil"] = dil
            g = f.create_group("mcmc")
            g.attrs["nwalkers"] = nwalkers
            g.attrs["burnin"] = burnin
            g.attrs["iterations"] = fullrun
            g.attrs["thin"] = thin
            g.attrs["rho_prior"] = rho_prior
            g.attrs["ldp_prior"] = ldp_prior
            g.attrs["onlytransits"] = M.onlytransits
            g.attrs["tregion"] = M.tregion
            g.attrs["ldfileloc"] = M.ldfileloc
            g.attrs["n_ldparams"] = M.n_ldparams
            g.create_dataset("fixed_sol", data= M.fixed_sol)
            g.create_dataset("fit_sol_0", data= M.fit_sol_0)


            c_ds = g.create_dataset("chain", 
                (nwalkers, N, l_var),
                dtype=np.float64)
            lp_ds = g.create_dataset("lnprob", 
                (nwalkers, N),
                dtype=np.float64)

        #args = [M.nplanets,M.rho_0,M.rho_0_unc,M.rho_prior,
        #    M.Teff,M.Teff_unc,M.logg,M.logg_unc,M.FeH,M.FeH_unc,    
        #    M.flux,M.err,M.fixed_sol,M.time,M._itime,M._ntt,
        #    M._tobs,M._omc,M._datatype,M.n_ldparams,M.ldfileloc,
        #    M.onlytransits,M.tregion]

        args = [M.nplanets,M.rho_0,M.rho_0_unc,M.rho_prior,
            M.ld1,M.ld1_unc,M.ld2,M.ld2_unc,M.ldp_prior,
            M.flux,M.err,M.fixed_sol,M.time,M._itime,M._ntt,
            M._tobs,M._omc,M._datatype,M.n_ldparams,M.ldfileloc,
            M.onlytransits,M.tregion]

        tom = tmod.logchi2_fitldp
        if runmpi:
            sampler = emcee.EnsembleSampler(nwalkers, l_var, tom, 
                args=args,pool=pool)
        else:
            sampler = emcee.EnsembleSampler(nwalkers, l_var, tom, 
                args=args,threads=th)

        time1 = thetime.time()
        p2, prob, state = sampler.run_mcmc(p0, burnin,
            storechain=False)
        sampler.reset()
        with h5py.File(outfile, u"a") as f:
            g = f["mcmc"]
            g.create_dataset("burnin_pos", data=p2)
            g.create_dataset("burnin_prob", data=prob)


        time2 = thetime.time()
        print 'burn-in took ' + str((time2 - time1)/60.) + ' min'
        time1 = thetime.time()
        for i, (pos, lnprob, state) in enumerate(sampler.sample(p2, 
            iterations=fullrun, rstate0=state,
            storechain=False)):

            #do the thinning in the loop here
            if i % thin == 0:
                ind = i / thin
                with h5py.File(outfile, u"a") as f:
                    g = f["mcmc"]
                    c_ds = g["chain"]
                    lp_ds = g["lnprob"]
                    c_ds[:, ind, :] = pos
                    lp_ds[:, ind] = lnprob

        time2 = thetime.time()
        print 'MCMC run took ' + str((time2 - time1)/60.) + ' min'
        print
        print("Mean acceptance: "
            + str(np.mean(sampler.acceptance_fraction)))
        print

        #try:
        #    print("Autocorrelation time:", sampler.acor)
        #    print("Autocorrelation times sampled:", 
        #        fullrun / sampler.acor)
        #except RuntimeError:
        #    print("No Autocorrelation")

        if runmpi:
            pool.close()
        # if doplot:
        #     plt.ioff()
        #     import triangle
        #     labels=[r"rho", r"zpt"]
        #     for ij in xrange(dat_d['nplanets']):
        #         labels = np.r_[labels,[r"T0",
        #             r"per",r"b", r"rprs", r"ecosw",r"esinw"]]
        #     figure = triangle.corner(sampler.flatchain, labels=labels)
        #     figure.savefig("data.png")

        #savefile = 'koi%s_np%s_prior%s.dat' %(dat_d['koi'],
        #    dat_d['nplanets'],rho_prior)
        #savefile2 = 'koi%s_np%s_prior%s_prob.dat' %(dat_d['koi'],
        #    dat_d['nplanets'],rho_prior)
        #np.savetxt(savefile,sampler.flatchain)
        #np.savetxt(savefile2,sampler.flatlnprobability)
        return sampler
예제 #52
0
def mcmc(zmin , zmax , iteration):     

    ndim, nwalkers = 16, 50
    bounds = [(-0.2, 0.2), (-0.2, 0.2),(-0.2,0.2),\
              (1.5, 3.0) , (0.7, 2.0) , (0.3, 1.0),\
	      (-20.0,-1.0),(-20.0,-2.0),(-20.0,-2.0),\
	      (0, 1),(0.0, 4.0), (0,2.0), (0,1),\
	      (-7.2, 10.0),(-7.2, 10.0),(-7.2, 10.0)]
 
    p0 = np.array([0.0, 0.0, 0.0, 2.0, 1.0, 0.5 , np.log(0.1), np.log(0.1),np.log(0.1),
                   0.7,1.5,1.0,0.4,np.log(2.0),np.log(2.0),np.log(2.0)])

    p0 = [p0 + 1e-5 * np.random.randn(ndim) for k in range(nwalkers)]
    
    pool = MPIPool(loadbalance=True)
    if not pool.is_master():
            pool.wait()
    	    sys.exit(0)

    # Set up the sampler.
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob , pool=pool)
    #sampler.sample(p0, 1000)
    #print sampler.chain.shape
    it = 0
    for result in sampler.sample(p0, iterations = 1000):
               print it
	       it += 1
    # Run a burn-in chain and save the final location.
    #pos, _, _, _ = sampler.run_mcmc(p0, 3000)
    #pos, _, _, _ = sampler.sample(p0, 3000)

    from matplotlib.ticker import MaxNLocator
    
    sample = sampler.chain
    npars = sample.shape[2]
    fig , axes = plt.subplots(npars , 1 , sharex=True, figsize=(10, 12))

    for i in xrange(npars):
        axes[i].plot(sample[:, :, i].T, color="b", alpha=.4 , lw = .5)
        axes[i].yaxis.set_major_locator(MaxNLocator(5))
	axes[i].set_ylim([bounds[i][0], bounds[i][1]])
	axes[i].set_xlim(0, 5000)
        #axes[i].set_ylabel(labels[i], fontsize=25)
    axes[-1].set_xlabel("Step Number", fontsize=25)
    fig.tight_layout(h_pad=0.0)
    fig_file = "/home/vakili/public_html/files/redsequence_all_temp/"+str(zmin)+"_z_"+str(zmax)+"burn_iter"+str(iteration)+".png"
    plt.savefig(fig_file)
    plt.close()

    # Run the production chain.
    #sampler.reset()

    #sampler.run_mcmc(pos, 1000)
    """
    import corner
    labels = ["$m$", "$b$", "\ln f", "$Q$", "$M$", "$\ln V$"]
    #truths = true_params + [true_frac, true_outliers[0], np.log(true_outliers[1])]
    bounds = [(-0.2, 0.2), (0.7,2.0), (-20.0, -2.0), (0, 1), (0.0, 2.0), (-7.2, 5.2)]
    #corner.corner(sampler.flatchain, bins=35, range=bounds, labels=labels)
 
    #plt.savefig("/home/vakili/public_html/files/mcmc.png")
    #plt.close()
    """
    sample = sampler.chain

    pool.close()

    npars = sample.shape[2]
    fig , axes = plt.subplots(npars , 1 , sharex=True, figsize=(10, 12))


    for i in xrange(npars):
        axes[i].plot(sample[:, :, i].T, color="b", alpha=.4 , lw = .5)
        axes[i].yaxis.set_major_locator(MaxNLocator(5))
	axes[i].set_ylim([bounds[i][0], bounds[i][1]])
	axes[i].set_xlim(0, 1500)
        #axes[i].set_ylabel(labels[i], fontsize=25)
    axes[-1].set_xlabel("Step Number", fontsize=25)
    fig.tight_layout(h_pad=0.0)
    fig_file = "/home/vakili/public_html/files/redsequence_all_temp/"+str(zmin)+"_z_"+str(zmax)+"chain_iter"+str(iteration)+".png"
    plt.savefig(fig_file)
    plt.close()
        
    """
    est = np.median(sampler.flatchain , axis = 0)
    est[2] = np.median(np.exp(sampler.flatchain)**.5 , axis = 0)[2] 

    est_err = np.std(sampler.flatchain , axis = 0)
    est_err[2] = np.std(np.exp(sampler.flatchain)**.5 , axis = 0)[2]

    xx = np.linspace(14.5 , 25.5 , 1000)
    pred = est[1] + est[0]*(xx - 19)
    """
    return None
    """
    norm = 0.0
    post_prob = np.zeros(len(x))
    for i in range(sampler.chain.shape[1]):
        for j in range(sampler.chain.shape[0]):
	        ll_fg, ll_bg = sampler.blobs[i][j]
		post_prob += np.exp(ll_fg - np.logaddexp(ll_fg, ll_bg))
	        norm += 1
    post_prob /= norm

    print post_prob
    """
    """
예제 #53
0
def mcmc(tag=None,
         zbin=1,
         nwalkers=48,
         Nchains=4,
         minlength=600,
         likelihood='pseudo'):
    '''
    
    Parameters
    ---------- 

    Nchains : int 
        Number of independent chains to run for the gelman rubin convergence test
    
    '''
    if tag is None:
        raise ValueError("specify a tag, otherwise it's confusing")
    temperature = 2.e-3  # temperature

    # read in BOSS P(k) NGC
    pkay = Dat.Pk()
    k0, p0k_ngc = pkay.Observation(0, zbin, 'ngc')
    k2, p2k_ngc = pkay.Observation(2, zbin, 'ngc')
    k4, p4k_ngc = pkay.Observation(4, zbin, 'ngc')
    pk_ngc_list = [p0k_ngc, p2k_ngc, p4k_ngc]
    k_list = [k0, k2, k4]
    # read in BOSS P(k) SGC
    k0, p0k_sgc = pkay.Observation(0, zbin, 'sgc')
    k2, p2k_sgc = pkay.Observation(2, zbin, 'sgc')
    k4, p4k_sgc = pkay.Observation(4, zbin, 'sgc')
    pk_sgc_list = [p0k_sgc, p2k_sgc, p4k_sgc]

    if likelihood == 'psuedo':  # standard pseudo Gaussian likelihood
        # read in Covariance matrix
        # currently for testing purposes,
        # implemented to read in Florian's covariance matrix
        _, _, C_pk_ngc = Dat.beutlerCov(zbin, NorS='ngc', ell='all')
        _, _, C_pk_sgc = Dat.beutlerCov(zbin, NorS='sgc', ell='all')

        # calculate precision matrices (including the hartlap factor)
        Cinv_ngc = np.linalg.inv(C_pk_ngc)
        Cinv_sgc = np.linalg.inv(C_pk_sgc)
        # hartlap factor
        n_mocks_ngc = 2045
        n_mocks_sgc = 2048
        f_hartlap_ngc = (float(n_mocks_ngc) - float(
            len(np.concatenate(pk_ngc_list))) - 2.) / (float(n_mocks_ngc) - 1.)
        f_hartlap_sgc = (float(n_mocks_sgc) - float(
            len(np.concatenate(pk_sgc_list))) - 2.) / (float(n_mocks_sgc) - 1.)
        Cinv_ngc *= f_hartlap_ngc
        Cinv_sgc *= f_hartlap_sgc

        # ln Posterior function
        lnPost = lnPost_pseudo
        # args for ln Posterior function
        # data ks, BOSS NGC P_l(k), BOSS SGC P_l(k), NGC precision matrix, SGC precision matrix
        lnpost_args = (k_list, pk_ngc_list, pk_sgc_list, Cinv_ngc, Cinv_sgc)
    elif likelihood in ['pca', 'ica']:
        # read in patchy mock P(k)s for ngc and sgc
        pk_ngc_list, pk_sgc_list = [], []
        for ell in [0, 2, 4]:
            if ell == 4: kmax = 0.1
            else: kmax = 0.15
            pk_ngc_list.append(
                NG.X_pk('patchy.z' + str(kwargs['zbin']),
                        krange=[0.01, kmax],
                        ell=ell,
                        NorS='ngc',
                        sys='fc'))
            pk_sgc_list.append(
                NG.X_pk('patchy.z' + str(kwargs['zbin']),
                        krange=[0.01, kmax],
                        ell=ell,
                        NorS='sgc',
                        sys='fc'))
        pk_ngc_mock = np.concatenate(pk_ngc_list, axis=1)
        pk_sgc_mock = np.concatenate(pk_sgc_list, axis=1)
    else:
        raise NotImplementedError

    if zbin == 1:  # 0.2 < z < 0.5
        # maximum likelihood value
        start = np.array([
            1.008, 1.001, 0.478, 1.339, 1.337, 1.16, 0.32, -1580., -930., 6.1,
            6.8
        ])
    ndim = len(start)

    # initialize MPI pool
    try:
        pool = MPIPool()
        if not pool.is_master():
            pool.wait()
            sys.exit(0)
    except ValueError:
        pool = None

    print("initializing ", Nchains, " independent emcee chains")
    pos, samplers = [], []
    for ichain in range(Nchains):
        pos.append([
            start + temperature * start *
            (2. * np.random.random_sample(ndim) - 1.) for i in range(nwalkers)
        ])
        samplers.append(
            emcee.EnsembleSampler(nwalkers,
                                  ndim,
                                  lnPost,
                                  args=lnpost_args,
                                  pool=pool))

    # Start MCMC
    print("Running MCMC...")
    withinchainvar = np.zeros((Nchains, ndim))
    meanchain = np.zeros((Nchains, ndim))
    scalereduction = np.repeat(2., ndim)

    # bunch of numbers for the mcmc run
    itercounter = 0
    chainstep = minlength
    loop = 1
    epsilon = 0.02  #0.02
    ichaincheck = 100
    rstate = np.random.get_state()

    while loop:
        itercounter += chainstep
        print("chain length =", itercounter)

        for jj in range(Nchains):
            for result in samplers[jj].sample(pos[jj],
                                              iterations=chainstep,
                                              rstate0=rstate,
                                              storechain=True):
                pos[jj] = result[0]
                chainchi2 = -2. * result[1]
                rstate = result[2]

                # append chain outputs to chain file
                chain_file = ''.join([
                    UT.dat_dir(), 'mcmc/', tag, '.chain',
                    str(jj), '.zbin',
                    str(zbin), '.dat'
                ])
                f = open(chain_file, 'a')
                for k in range(pos[jj].shape[0]):
                    output_str = '\t'.join(
                        pos[jj][k].astype('str')) + '\t' + str(
                            chainchi2[k]) + '\n'
                    f.write(output_str)
                f.close()

            # we do the convergence test on the second half of the current chain (itercounter/2)
            chainsamples = samplers[jj].chain[:, itercounter / 2:, :].reshape(
                (-1, ndim))
            withinchainvar[jj] = np.var(chainsamples, axis=0)
            meanchain[jj] = np.mean(chainsamples, axis=0)

        scalereduction = gelman_rubin_convergence(withinchainvar, meanchain,
                                                  itercounter / 2, Nchains,
                                                  ndim)
        print("scalereduction = ", scalereduction)

        loop = 0
        for jj in range(ndim):
            if np.abs(1 - scalereduction[jj]) > epsilon:
                loopcriteria = 1

        chainstep = ichaincheck

    if pool is not None:
        pool.close()
    return None
예제 #54
0
def run_espei(run_settings):
    """Wrapper around the ESPEI fitting procedure, taking only a settings dictionary.

    Parameters
    ----------
    run_settings : dict
        Dictionary of input settings

    Returns
    -------
    Either a Database (for generate parameters only) or a tuple of (Database, sampler)
    """
    run_settings = get_run_settings(run_settings)
    system_settings = run_settings['system']
    output_settings = run_settings['output']
    generate_parameters_settings = run_settings.get('generate_parameters')
    mcmc_settings = run_settings.get('mcmc')

    # handle verbosity
    verbosity = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
    logging.basicConfig(level=verbosity[output_settings['verbosity']])

    # load datasets and handle i/o
    logging.debug('Loading and checking datasets.')
    dataset_path = system_settings['datasets']
    datasets = load_datasets(sorted(recursive_glob(dataset_path, '*.json')))
    if len(datasets.all()) == 0:
        logging.warning(
            'No datasets were found in the path {}. This should be a directory containing dataset files ending in `.json`.'
            .format(dataset_path))
    logging.debug('Finished checking datasets')

    with open(system_settings['phase_models']) as fp:
        phase_models = json.load(fp)

    if generate_parameters_settings is not None:
        refdata = generate_parameters_settings['ref_state']
        excess_model = generate_parameters_settings['excess_model']
        dbf = generate_parameters(phase_models, datasets, refdata,
                                  excess_model)
        dbf.to_file(output_settings['output_db'], if_exists='overwrite')

    if mcmc_settings is not None:
        tracefile = output_settings['tracefile']
        probfile = output_settings['probfile']
        # check that the MCMC output files do not already exist
        # only matters if we are actually running MCMC
        if os.path.exists(tracefile):
            raise OSError(
                'Tracefile "{}" exists and would be overwritten by a new run. Use the ``output.tracefile`` setting to set a different name.'
                .format(tracefile))
        if os.path.exists(probfile):
            raise OSError(
                'Probfile "{}" exists and would be overwritten by a new run. Use the ``output.probfile`` setting to set a different name.'
                .format(probfile))

        # scheduler setup
        if mcmc_settings['scheduler'] == 'MPIPool':
            # check that cores is not an input setting
            if mcmc_settings.get('cores') != None:
                logging.warning("MPI does not take the cores input setting.")
            from emcee.utils import MPIPool
            # code recommended by emcee: if not master, wait for instructions then exit
            client = MPIPool()
            if not client.is_master():
                logging.debug(
                    'MPIPool is not master. Waiting for instructions...')
                client.wait()
                sys.exit(0)
            logging.info("Using MPIPool on {} MPI ranks".format(client.size))
        elif mcmc_settings['scheduler'] == 'dask':
            from distributed import LocalCluster
            cores = mcmc_settings.get('cores', multiprocessing.cpu_count())
            if (cores > multiprocessing.cpu_count()):
                cores = multiprocessing.cpu_count()
                logging.warning(
                    "The number of cores chosen is larger than available. "
                    "Defaulting to run on the {} available cores.".format(
                        cores))
            scheduler = LocalCluster(n_workers=cores,
                                     threads_per_worker=1,
                                     processes=True)
            client = ImmediateClient(scheduler)
            client.run(logging.basicConfig,
                       level=verbosity[output_settings['verbosity']])
            logging.info("Running with dask scheduler: %s [%s cores]" %
                         (scheduler, sum(client.ncores().values())))
            try:
                logging.info(
                    "bokeh server for dask scheduler at localhost:{}".format(
                        client.scheduler_info()['services']['bokeh']))
            except KeyError:
                logging.info("Install bokeh to use the dask bokeh server.")
        elif mcmc_settings['scheduler'] == 'emcee':
            from emcee.interruptible_pool import InterruptiblePool
            cores = mcmc_settings.get('cores', multiprocessing.cpu_count())
            if (cores > multiprocessing.cpu_count()):
                cores = multiprocessing.cpu_count()
                logging.warning(
                    "The number of cores chosen is larger than available. "
                    "Defaulting to run on the {} available cores.".format(
                        cores))
            client = InterruptiblePool(processes=cores)
            logging.info("Using multiprocessing on {} cores".format(cores))
        elif mcmc_settings['scheduler'] == 'None':
            client = None
            logging.info(
                "Not using a parallel scheduler. ESPEI is running MCMC on a single core."
            )

        # get a Database
        if mcmc_settings.get('input_db'):
            dbf = Database(mcmc_settings.get('input_db'))

        # load the restart chain if needed
        if mcmc_settings.get('restart_chain'):
            restart_chain = np.load(mcmc_settings.get('restart_chain'))
        else:
            restart_chain = None

        # load the remaning mcmc fitting parameters
        mcmc_steps = mcmc_settings.get('mcmc_steps')
        save_interval = mcmc_settings.get('mcmc_save_interval')
        chains_per_parameter = mcmc_settings.get('chains_per_parameter')
        chain_std_deviation = mcmc_settings.get('chain_std_deviation')
        deterministic = mcmc_settings.get('deterministic')

        dbf, sampler = mcmc_fit(
            dbf,
            datasets,
            scheduler=client,
            mcmc_steps=mcmc_steps,
            chains_per_parameter=chains_per_parameter,
            chain_std_deviation=chain_std_deviation,
            save_interval=save_interval,
            tracefile=tracefile,
            probfile=probfile,
            restart_chain=restart_chain,
            deterministic=deterministic,
        )

        dbf.to_file(output_settings['output_db'], if_exists='overwrite')
        # close the scheduler, if possible
        if hasattr(client, 'close'):
            client.close()
        return dbf, sampler
    return dbf
예제 #55
0
파일: Nemcee.py 프로젝트: Samreay/abc
def runModel():

    pool = MPIPool()
    if not pool.is_master():
        pool.wait()
        sys.exit(0)
    # pool=None

    observation = simulateData()
    nTrans = len(observation["spectype"])

    ndim, nwalkers = 8 + nTrans, 1000

    # mns = numpy.concatenate(([inputs.Om0, inputs.w0, inputs.rate_II_r, inputs.logL_snIa, inputs.sigma_snIa, \
    #             inputs.logL_snII,inputs.sigma_snII,inputs.Z], -.35*numpy.zeros(nTrans)))
    sigs = numpy.concatenate(
        (
            [
                0.1,
                0.2,
                0.1,
                uncertainties.logL_snIa,
                uncertainties.sigma_snIa,
                uncertainties.logL_snII,
                uncertainties.sigma_snII,
                uncertainties.Z,
            ],
            0.05 + numpy.zeros(nTrans),
        )
    )

    p0 = []
    for i in range(nwalkers):
        dum = numpy.random.rand(nTrans)
        dum = numpy.array(numpy.round(dum), dtype="int")
        lnL_init = dum + (1 - dum) * 0.5
        lnL_init = numpy.log(lnL_init)

        mns = numpy.concatenate(
            (
                [
                    inputs.Om0,
                    inputs.w0,
                    inputs.rate_II_r,
                    inputs.logL_snIa,
                    inputs.sigma_snIa,
                    inputs.logL_snII,
                    inputs.sigma_snII,
                    inputs.Z,
                ],
                lnL_init,
            )
        )

        p0.append((numpy.random.rand(ndim) - 0.5) * sigs + mns)

    # p0 = [numpy.random.randn(ndim)*sigs + mns for i in range(nwalkers)]

    dco = 1e-11  # measurement error very small

    sampler = emcee.EnsembleSampler(
        nwalkers,
        ndim,
        lnprob,
        args=[
            observation["counts"],
            observation["specz"],
            numpy.zeros(nTrans) + dco,
            observation["zprob"],
            observation["spectype"],
        ],
        pool=pool,
    )
    sampler.run_mcmc(p0, 2000)
    pool.close()

    output = open("data.pkl", "wb")
    pickle.dump(sampler.chain, output)
    output.close()
예제 #56
0
      8.055,  4.645,  8.221,  0.655,  0.792,  0.911,  0.843, 18.924])
 
    ol_swig = fit_group.lnprob_one_group(beta_pic_group, star_params, use_swig=True, return_overlaps=True)
    ol_old  = fit_group.lnprob_one_group(beta_pic_group, star_params, use_swig=False, return_overlaps=True)

    using_mpi = True
    try:
        # Initialize the MPI-based pool used for parallelization.
        pool = MPIPool()
    except:
        print("Either MPI doesn't seem to be installed or you aren't running with MPI... ")
        using_mpi = False
        pool=None
    
    if using_mpi:
        if not pool.is_master():
            # Wait for instructions from the master process.
            pool.wait()
            sys.exit(0)
    else:
        print("MPI available for this code! - call this with e.g. mpirun -np 16 python test_betapic_TGAS.py")

    sampler = fit_group.fit_one_group(star_params, init_mod=beta_pic_group, \
                                      nwalkers=30, nchain=10000, nburn=1000, return_sampler=True, pool=pool, \
                                      init_sdev = np.array([1,1,1,1,1,1,1,1,1,.01,.01,.01,.1,.1]), background_density=1e-6, use_swig=True, \
                                      plotit=False)
    
    if using_mpi:
        # Close the processes.
        pool.close()
예제 #57
0
def main():

	#################################################
	############Option parsing#######################
	#################################################

	#Parse command line options
	parser = argparse.ArgumentParser()
	parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file")
	parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity")
	parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity")
	parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area")
	parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!")
	parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix")
	parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file")
	parser.add_argument("-ss","--save_debug",dest="save_debug",action="store_true",default=False,help="save a bunch of debugging info for the analysis")
	parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle")
	parser.add_argument("-r","--realizations",dest="realizations",type=int,default=None,help="use only the first N realizations to estimate the covariance matrix")
	parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF")

	cmd_args = parser.parse_args()

	if cmd_args.options_file is None:
		parser.print_help()
		sys.exit(0)

	#Set verbosity level
	if cmd_args.verbose_plus:
		logging.basicConfig(level=DEBUG_PLUS)
	elif cmd_args.verbose:
		logging.basicConfig(level=logging.DEBUG)
	else:
		logging.basicConfig(level=logging.INFO)

	#Initialize MPI Pool
	try:
		pool = MPIPool()
	except:
		pool = None

	if (pool is not None) and (not pool.is_master()):
		pool.wait()
		sys.exit(0)

	if pool is not None:
		logging.info("Started MPI Pool.")

	#################################################################################################################
	#################Info gathering: covariance matrix, observation and emulator#####################################
	#################################################################################################################

	#start
	start = time.time()
	last_timestamp = start

	#Instantiate a FeatureLoader object that will take care of the memory loading
	feature_loader = FeatureLoader(cmd_args)

	###########################################################################################################################################

	#Use this model for the covariance matrix (from the new set of 50 N body simulations)
	covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path"))
	logging.info("Measuring covariance matrix from model {0}".format(covariance_model))
	
	#Load in the covariance matrix
	fiducial_feature_ensemble = feature_loader.load_features(covariance_model)

	#Create a LikelihoodAnalysis instance by unpickling one of the emulators
	emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators")
	emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string)))
	logging.info("Unpickling emulator from {0}...".format(emulator_file))
	analysis = LikelihoodAnalysis.load(emulator_file)

	#Return
	return fiducial_feature_ensemble,analysis
예제 #58
0
        ### skips if files do not exist for some reason
        print 'Warning: file does not exist, cosmo, snap'
        return
    if os.path.isfile(out_fn):  ###### in case the code breaks
        return

    ######### read rockstar files
    print 'Opening rockstar files:', rockstar_fn
    reader = sim_manager.TabularAsciiReader(rockstar_fn, columns_to_keep_dict)
    rock_arr = reader.read_ascii()
    logM = log10(rock_arr['halo_mvir'])
    rock_arr = 0  ## release memory
    hmf = histogram(logM, bins=hist_bins)[0]
    save(out_fn, hmf)


all_snaps = []
for i in range(len(cosmo_arr)):
    for isnap in arange(30, nsnaps_arr[i]):
        all_snaps.append([cosmo_arr[i], int(isnap)])

pool = MPIPool()
if not pool.is_master():
    pool.wait()
    sys.exit(0)

pool.map(Phm_gen, all_snaps)
#pool.map(Phh_gen, all_snaps)
#pool.map(hmf_gen, all_snaps)
pool.close()
예제 #59
0
파일: mcmc.py 프로젝트: psharda/gidget
def run(N):
    fn = chainDirRel + '.pickle'
    nwalkers = 500
    ndim = 9  # 15
    #eta, epsff, fg0, muNorm, muScaling, fixedQ, accScaleLength, xiREC, accNorm, accAlphaZ, accAlphaMh, accCeiling, fcool, kappaMetals, ZIGM = emceeParams

    #p00 = np.array([ .9, .1, -1., .08, .50959, .38, -.25, .7, .01 ])
    #p0 = [p00*(1.0+0.2*np.random.randn( ndim )) for i in range(nwalkers)]

    p0 = [sampleFromPrior() for i in range(nwalkers)]

    restart = {}
    restart['currentPosition'] = p0
    restart['chain'] = None
    restart['state'] = None
    restart['prob'] = None
    restart['iterationCounter'] = 0
    restart['mcmcRunCounter'] = 0

    updateRestart(fn, restart)

    global runNumber
    runNumber = restart['mcmcRunCounter']

    restart['iterationCounter'] += N
    restart['mcmcRunCounter'] += 1

    pool = MPIPool(comm=comm, loadbalance=True)
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnProb, pool=pool)
    #pos, prob, state = sampler.run_mcmc(restart['currentPosition'], N, rstate0=restart['state'], lnprob0=restart['prob'])

    counter = 0
    for result in sampler.sample(restart['currentPosition'],
                                 iterations=N,
                                 lnprob0=restart['prob'],
                                 rstate0=restart['state']):
        print "Beginning iteration number ", counter, " of ", N

        pos, prob, state = result

        restart[
            'acor'] = sampler.acor[:]  # autocorr length for each param (ndim)
        restart[
            'accept'] = sampler.acceptance_fraction[:]  # acceptance frac for each walker.
        restart['currentPosition'] = pos  # same shape as p0: nwalkers x ndim
        restart['state'] = state  # random number generator state
        restart['prob'] = prob  # nwalkers x dim
        if restart['chain'] is None:
            restart['chain'] = sampler.chain  # nwalkers x niterations x ndim
        else:
            print np.shape(restart['chain']), np.shape(
                sampler.chain[:, -1, :]), np.shape(sampler.chain)
            print restart['mcmcRunCounter'], restart['iterationCounter']
            #restart['chain'] = np.concatenate((restart['chain'], sampler.chain[:,-1,:]), axis=1)
            print "dbg1: ", np.shape(restart['chain']), np.shape(
                np.zeros(
                    (nwalkers, 1, ndim))), np.shape(np.expand_dims(pos, 1))
            restart['chain'] = np.concatenate(
                (restart['chain'], np.expand_dims(pos, 1)), axis=1)

        saveRestart(fn, restart)
        counter += 1

    pool.close()