Esempio n. 1
0
def test_autocorr_multi_works():
    np.random.seed(42)
    xs = np.random.randn(16384, 2)

    # This throws exception unconditionally in buggy impl's
    acls_multi = integrated_time(xs)
    acls_single = np.array(
        [integrated_time(xs[:, i]) for i in range(xs.shape[1])])

    assert np.all(np.abs(acls_multi - acls_single) < 2)
Esempio n. 2
0
def test_autocorr_multi_works():
    np.random.seed(42)
    xs = np.random.randn(16384, 2)

    # This throws exception unconditionally in buggy impl's
    acls_multi = integrated_time(xs)
    acls_single = np.array([integrated_time(xs[:, i])
                            for i in range(xs.shape[1])])

    assert np.all(np.abs(acls_multi - acls_single) < 2)
Esempio n. 3
0
def plot_autocorr(sampler, nburn, itemp=0, outfile=None):
    nwalkers = sampler.chain.shape[1]

    samples_before = sampler.chain[itemp, :, :nburn]
    samples_after = sampler.chain[itemp, :, nburn:]

    a_before = [autocorr.function(samples_before[i]) for i in range(nwalkers)]
    a_int_before = max(
        np.max([
            autocorr.integrated_time(samples_before[i])
            for i in range(nwalkers)
        ], 0))

    fig, [ax1, ax2] = plt.subplots(2)
    for a in a_before:
        ax1.plot(a[:200], "k", alpha=0.1)
    ax1.axhline(0, color="k")
    ax1.set_xlim(0, 200)
    ax1.set_xlabel(r"$\tau$")
    ax1.set_ylabel(r"Autocorrelation during burn-in")
    ax1.text(0.9,
             0.9,
             '{}'.format(a_int_before),
             horizontalalignment='right',
             verticalalignment='top',
             transform=ax1.transAxes)

    a_after = [autocorr.function(samples_after[i]) for i in range(nwalkers)]
    a_int_after = max(
        np.max([
            autocorr.integrated_time(samples_after[i]) for i in range(nwalkers)
        ], 0))

    for a in a_after:
        ax2.plot(a[:200], "k", alpha=0.1)
    ax2.axhline(0, color="k")
    ax2.set_xlim(0, 200)
    ax2.set_xlabel(r"$\tau$")
    ax2.set_ylabel(r"Autocorrelation after burn-in")
    ax2.text(0.9,
             0.9,
             '{}'.format(a_int_after),
             horizontalalignment='right',
             verticalalignment='top',
             transform=ax2.transAxes)

    plt.suptitle('autocorrelation')
    plt.tight_layout()
    plt.subplots_adjust(top=0.9)
    if outfile is not None:
        plt.savefig(outfile)
Esempio n. 4
0
def get_autocor(chainFile='chain.pkl'):
    '''
    get the AC length across all iterations for each param averaging over all the walkers

    Returns
    -------
    idx: int
        max. ac length among all parameters

    '''
    #     chainFile = 'chain_reconstructed.pkl'

    import cPickle as pickle
    with open(chainFile) as f:
        chain = pickle.load(f)

    from emcee import autocorr
    import numpy as np

    ac = []
    for i in range(chain.shape[-1]):
        dum = autocorr.integrated_time(np.mean(chain[:, :, i], axis=0), axis=0, fast=False)
        ac.append(dum)
        autocorr_message = '{0:.2f}'.format(dum)
#    print(autocorr_message)
    try:
        idx = int(np.max(ac))
    except ValueError:
        idx = 150
    return idx
Esempio n. 5
0
 def show_autocorrelation_time(self, figure=None, labels=[], burn=0, thin=1, accepted_only=False, figsize=None,
                               **kwargs):
     from emcee.autocorr import integrated_time
     if figure:
         # print('autocorr', figure.number)
         figure.clf()
     else:
         figure = plt.figure(figsize=figsize)
     ax = figure.subplots()
     samples = self.get_samples(burn=burn, thin=thin, accepted_only=accepted_only)
     rep, last = len(samples) // 50, len(samples) % 50
     if rep == 0:
         warn('Too low number of iterations to calculate autocorrelation time (min 50 required).')
         return figure
     tau = np.empty((rep + 1, self.Ndim)) if last else np.empty((rep, self.Ndim))
     Niterate = np.arange(1, rep + 1) * 50
     # color = kwargs.pop('color',['r','g','b'])
     # if type(color)==str or len(color)==1: color = list(np.atleast_1d(color))*self.Ndim
     newkwargs = [{} for _ in range(self.Ndim)]
     for key in kwargs:
         if isinstance(kwargs[key], (list, tuple)) and len(kwargs[key]) == self.Ndim:
             val = kwargs.pop(key)
             for i in range(self.Ndim): newkwargs[i].update({key: val[i]})
     for i in range(rep): tau[i] = integrated_time(samples[:50 * (i + 1)], tol=0)
     if last:
         tau[-1] = integrated_time(samples, tol=0)
         Niterate = np.append(Niterate, len(samples))
     # print(tau)
     for i in range(self.Ndim):
         newkwargs[i].update(kwargs.copy())
         ax.plot(Niterate, tau[:, i], **newkwargs[i])
         # print(tau[:,i])
     if np.any(np.isnan(tau[-1])): self.autocorr_nanlen = len(samples)
     ax.set_xlabel('Step number')
     ax.set_ylabel('Autocorrelation time ($\\tau$)')
     ax.xaxis.set_label_coords(0.5, -0.1)
     ax.yaxis.set_label_coords(-0.1, 0.5)
     figure.suptitle(f'Iteration: {self.sampler.iteration}')
     # figure.canvas.draw()
     # figure.canvas.flush_events()
     show_accto_backend(figure)
     # plt.pause(0.2)
     return figure
Esempio n. 6
0
 def remove_burn_in(self, chain):
     from emcee.autocorr import integrated_time
     nsteps = self.p.get("mcmc")["n_steps"]
     nwalkers = self.p.get("mcmc")["n_walkers"]
     # first dim should be time
     chain = chain.reshape((nsteps, nwalkers))
     tau = integrated_time(chain, tol=20, quiet=True)
     # remove burn-in elements from chain
     chain = chain[int(np.ceil(tau)):].flatten()
     return chain
Esempio n. 7
0
    def get_autocorr_time(self, window=50):
        """
        Compute an estimate of the autocorrelation time for each parameter
        (length: ``dim``).

        :param window: (optional)
            The size of the windowing function. This is equivalent to the
            maximum number of lags to use. (default: 50)

        """
        return autocorr.integrated_time(self.chain, axis=0, window=window)
Esempio n. 8
0
    def get_autocorr_time(self, window=50):
        """
        Compute an estimate of the autocorrelation time for each parameter
        (length: ``dim``).

        :param window: (optional)
            The size of the windowing function. This is equivalent to the
            maximum number of lags to use. (default: 50)

        """
        return autocorr.integrated_time(self.chain, axis=0, window=window)
Esempio n. 9
0
def correlation_time(chain, window=None, c=10, fast=False):
    from emcee.autocorr import integrated_time
    nw, nstep, ndim = chain.shape
    x = np.mean(chain, axis=0)
    m = 0
    if window is None:
        for m in np.arange(10, nstep):
            tau = integrated_time(x, axis=0, fast=fast,
                                   window=m)
            if np.all(tau * c < m) and np.all(tau > 0):
                break
        window = m
    else:
        tau = integrated_time(x, axis=0, fast=fast,
                              window=window)
        
    if m == (nstep-1) or (np.any(tau < 0)):
        raise(ValueError)
    
    return tau, window
Esempio n. 10
0
File: gp_lc.py Progetto: aimalz/cpm
def get_kernel(t, x, f, ell_factor=5, tau_factor=2, amp_factor=10, K=10000):
    # Estimate hyperparameters and set up the kernel.
    i = np.random.randint(len(x), size=K)
    j = np.random.randint(len(x), size=K)
    r = np.sqrt(np.median(np.sum((x[i]-x[j])**2, axis=1)))
    amp = amp_factor * np.var(f)
    tau2 = (tau_factor * np.median(np.diff(t)) * integrated_time(f)) ** 2
    kernel = IsotropicKernel(amp, ell_factor * r, ndim=x.shape[1])

    print(amp, r, tau2)

    K = kernel(x, x) * np.exp(-0.5 * (t[None, :] - t[:, None])**2 / tau2)
    return K
Esempio n. 11
0
def get_kernel(t, x, f, ell_factor=5, tau_factor=2, amp_factor=10, K=10000):
    # Estimate hyperparameters and set up the kernel.
    i = np.random.randint(len(x), size=K)
    j = np.random.randint(len(x), size=K)
    r = np.sqrt(np.median(np.sum((x[i] - x[j])**2, axis=1)))
    amp = amp_factor * np.var(f)
    tau2 = (tau_factor * np.median(np.diff(t)) * integrated_time(f))**2
    kernel = IsotropicKernel(amp, ell_factor * r, ndim=x.shape[1])

    print(amp, r, tau2)

    K = kernel(x, x) * np.exp(-0.5 * (t[None, :] - t[:, None])**2 / tau2)
    return K
def autocorrelation(chain,labels,plt_label):

	npars = chain.shape[1]
	maxlags = chain.shape[0]/5.
	nlags = 100
	lags = np.linspace(1,maxlags,nlags).astype(int)

	tau = np.zeros(shape=(lags.shape[0],npars))
	for l, lag in enumerate(lags):
		#print('maxlag:{}').format(lag)
		print l
		for i in xrange(npars):
			tau[l,i] = acor.acor(chain[:,i], maxlag=lag)[0]
			#print('\t '+labels[i]+': {0}'.format(tau[l,i]))

	### emcee version
	from emcee import autocorr
	c = 10
	good = False
	while good == False:
		try:
			emcee_tau = autocorr.integrated_time(chain, c=c)
			good = True
		except:
			if c > 2:
				c -= 0.5
			else:
				c = c ** 0.95
		if (c-1) < 1e-3:
			print 'FAILED TO CALCULATE AUTOCORRELATION TIMES'
			emcee_tau = np.zeros(len(labels))
			break

	print 'AUTOCORRELATION LENGTHS'
	for r, l in zip(emcee_tau,labels): print l+': '+"{:.2f}".format(r)

	### plotting
	fig, ax = plt.subplots(1,1, figsize=(8,8))
	cmap = get_cmap(npars)
	for i in xrange(npars):
		ax.plot(lags,tau[:,i],label=labels[i]+'='+"{:.2f}".format(emcee_tau[i]),color=cmap(i),lw=2)

	ax.set_xlabel('lag')
	ax.set_ylabel('autocorrelation')

	ax.legend(prop={'size':10},title='autocorrelation lengths',ncol=npars / 5,numpoints=1,markerscale=0.7)

	fig.tight_layout()
	plt.savefig('autocorrelation_time_'+plt_label+'.png',dpi=150)
	plt.close()
Esempio n. 13
0
    def get_tau(self, chains):
        from emcee.autocorr import integrated_time
        nsteps = self.p.get("mcmc")["n_steps"]
        nwalkers = self.p.get("mcmc")["n_walkers"]

        pars = list(chains.keys())
        npars = len(pars)
        nzbins = len(self.p.get("data_vectors"))
        taus = np.zeros((npars, nzbins))
        for i, par in enumerate(pars):
            for j, chain in enumerate(chains[par]):
                # first dim should be time
                chain = chain.reshape((nsteps, nwalkers))
                taus[i, j] = integrated_time(chain, tol=20, quiet=True)
        return taus
Esempio n. 14
0
def plot_acor(sampler, nmin=100, nsample=10, tol_length=50, ax=None, **kwargs):
    if ax is None:
        fig, ax = plt.subplots(**kwargs)

    chain = sampler.get_chain(discard=sampler.nburn)
    assert len(chain) > nmin, "Not enough samples in chain"

    n, tau = np.transpose([(nmax, np.mean(integrated_time(chain[:nmax], tol=0)))
        for nmax in tqdm(np.linspace(100, len(chain), nsample, dtype=int),
            desc="Compute autocorrelation times")])

    ax.plot(n, tau)
    ax.plot(n, n/tol_length, linestyle="--", color="gray")

    ax.set_ylabel(r"$\tau$")

    return ax.get_figure(), ax
Esempio n. 15
0
def autocor_checks(sampler, nburn, itemp=0, outfile=None):
    print('Chains contain {} samples'.format(sampler.chain.shape[-2]),
          file=outfile)
    print('Specified burn-in is {} samples'.format(nburn), file=outfile)
    a_exp = sampler.acor[0]
    a_int = np.max([
        autocorr.integrated_time(sampler.chain[itemp, i, nburn:])
        for i in range(sampler.chain.shape[1])
    ], 0)
    a_exp = max(a_exp)
    a_int = max(a_int)
    print('A reasonable burn-in should be around '
          '{:d} steps'.format(int(10 * a_exp)),
          file=outfile)
    print('After burn-in, each chain produces one independent '
          'sample per {:d} steps'.format(int(a_int)),
          file=outfile)
    return a_exp, a_int
Esempio n. 16
0
 def autocorrelation(self, inputData, nMax):
     predictions = self.predict(inputData, n=1)
     output = np.squeeze(np.array(predictions)).T
     
     valFunc=0
     accepted=0
     
     for x in range(len(output)):
         temp = (integrated_time(output[x], tol=5, quiet=True))
         if(not math.isnan(temp)):
             valFunc += np.array((function_1d(output[x])))
             accepted+=1
     
     valFunc=valFunc/accepted
     if(nMax<len(valFunc)):
         valFunc = valFunc[:nMax]
     
     return(valFunc)
Esempio n. 17
0
 def autoCorrelationLength(self, inputData, nMax):
     predictions = self.predict(inputData, n=1)
     output = np.squeeze(np.array(predictions)).T
     
     val=0
     accepted=0
     
     for x in range(len(output)):
         temp = (integrated_time(output[x], tol=5, quiet=True))
         if(not math.isnan(temp)):
             val += temp
             accepted+=1
     
     val=val/accepted
     
     if(val[0]>nMax):
         print("Correlation time is greater than maximum accepted value.")
     
     return(val[0])
Esempio n. 18
0
    def __init__(self, lc, dist_factor=10.0, time_factor=0.1, matern=False):
        self.time = lc.time
        self.flux = lc.flux - 1.0
        self.ferr = lc.ferr

        # Convert to parts per thousand.
        self.flux *= 1e3
        self.ferr *= 1e3

        # Hackishly build a kernel.
        tau = np.median(np.diff(self.time)) * integrated_time(self.flux)
        tau = max(0.1, tau)  # Tau should be floored.
        amp = np.median((self.flux - np.median(self.flux))**2)
        self.kernel = amp * ExpSquaredKernel(tau**2)
        self.gp = george.GP(self.kernel, solver=george.HODLRSolver)
        self.gp.compute(self.time, self.ferr, seed=1234)

        # Compute the likelihood of the null model.
        self.ll0 = self.lnlike()
Esempio n. 19
0
    def __init__(self, lc, dist_factor=10.0, time_factor=0.1, matern=False):
        self.time = lc.time
        self.flux = lc.flux - 1.0
        self.ferr = lc.ferr

        # Convert to parts per thousand.
        self.flux *= 1e3
        self.ferr *= 1e3

        # Hackishly build a kernel.
        tau = np.median(np.diff(self.time)) * integrated_time(self.flux)
        tau = max(0.1, tau)  # Tau should be floored.
        amp = np.median((self.flux - np.median(self.flux))**2)
        self.kernel = amp * ExpSquaredKernel(tau ** 2)
        self.gp = george.GP(self.kernel, solver=george.HODLRSolver)
        self.gp.compute(self.time, self.ferr, seed=1234)

        # Compute the likelihood of the null model.
        self.ll0 = self.lnlike()
Esempio n. 20
0
def autocorrelation(mcmc_fit_instance,
                    correlations_to_plot=None,
                    flat_chain=None,
                    variable_labels=None):
    """
    Plots correlation function of defined parameters.

    :param mcmc_fit_instance: Union[elisa.analytics.binary_fit.lc_fit.LCFit, elisa.analytics.binary_fit.rv_fit.RVFit];
    :param correlations_to_plot: List; names of variables which autocorrelation function will be displayed
    :param flat_chain: numpy.array; flattened chain of all parameters
    :param variable_labels: List; list of variables during a MCMC run, which is used
                                  to identify columns in `flat_chain`
    """
    autocorr_plot_kwargs = dict()

    flat_chain = deepcopy(mcmc_fit_instance.flat_chain
                          ) if flat_chain is None else deepcopy(flat_chain)
    variable_labels = mcmc_fit_instance.variable_labels if variable_labels is None else variable_labels
    correlations_to_plot = variable_labels if correlations_to_plot is None else correlations_to_plot

    if flat_chain is None:
        raise ValueError('You can use trace plot only in case of mcmc method '
                         'or for some reason the flat chain was not found.')

    labels = serialize_plot_labels(variable_labels)

    autocorr_fns = np.empty((flat_chain.shape[0], len(variable_labels)))
    autocorr_time = np.empty((flat_chain.shape[0]))

    for i, lbl in enumerate(variable_labels):
        autocorr_fns[:, i] = function_1d(flat_chain[:, i])
        autocorr_time[i] = integrated_time(flat_chain[:, i], quiet=True)

    autocorr_plot_kwargs.update({
        'correlations_to_plot': correlations_to_plot,
        'autocorr_fns': autocorr_fns,
        'autocorr_time': autocorr_time,
        'variable_labels': variable_labels,
        'labels': labels
    })

    MCMCPlot.autocorr(**autocorr_plot_kwargs)
Esempio n. 21
0
def main(config_file, mpi=False, threads=None, overwrite=False, continue_sampler=False):
    """ TODO: """

    # get a pool object given the configuration parameters
    # -- This needs to go here so I don't read in the particle file for each thread. --
    pool = get_pool(mpi=mpi, threads=threads)

    # read configuration from a YAML file
    config = io.read_config(config_file)
    np.random.seed(config["seed"])
    random.seed(config["seed"])

    if not os.path.exists(config['streams_path']):
        raise IOError("Specified streams path '{}' doesn't exist!".format(config['streams_path']))
    logger.debug("Path to streams project: {}".format(config['streams_path']))

    # the path to write things to
    output_path = config["output_path"]
    logger.debug("Will write data to:\n\t{}".format(output_path))
    cache_output_path = os.path.join(output_path, "cache")

    # get a StreamModel from a config dict
    model = si.StreamModel.from_config(config)
    logger.info("Model has {} parameters".format(model.nparameters))

    if os.path.exists(cache_output_path) and overwrite:
        logger.info("Writing over output path '{}'".format(cache_output_path))
        logger.debug("Deleting files: '{}'".format(os.listdir(cache_output_path)))
        shutil.rmtree(cache_output_path)

    # emcee parameters
    # read in the number of walkers to use
    nwalkers = config["walkers"]
    nsteps = config["steps"]
    output_every = config.get("output_every", None)
    nburn = config.get("burn_in", 0)
    start_truth = config.get("start_truth", False)
    a = config.get("a", 2.) # emcee tuning param

    if not os.path.exists(cache_output_path) and not continue_sampler:
        logger.info("Output path '{}' doesn't exist, running inference..."\
                    .format(cache_output_path))
        os.mkdir(cache_output_path)

        # sample starting positions
        p0 = model.sample_priors(size=nwalkers,
                                 start_truth=start_truth)
        logger.debug("Priors sampled...")

        if nburn > 0:
            sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a)

            time0 = time.time()
            logger.info("Burning in sampler for {} steps...".format(nburn))
            pos, xx, yy = sampler.run_mcmc(p0, nburn)

            pos = fix_whack_walkers(pos, sampler.acceptance_fraction,
                                    sampler.flatlnprobability, sampler.flatchain,
                                    threshold=config.get("acceptance_threshold", None))

            t = time.time() - time0
            logger.debug("Spent {} seconds on burn-in...".format(t))

        else:
            pos = p0

        if nsteps > 0:
            sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a)
            sampler.run_inference(pos, nsteps, path=cache_output_path, first_step=0,
                                  output_every=output_every,
                                  output_file_fmt="inference_{:06d}.hdf5")

    elif os.path.exists(cache_output_path) and not continue_sampler:
        logger.info("Output path '{}' already exists, not running sampler..."\
                    .format(cache_output_path))

    elif os.path.exists(cache_output_path) and continue_sampler:
        if len(os.listdir(cache_output_path)) == 0:
            logger.error("No files in path: {}".format(cache_output_path))
            sys.exit(1)

        continue_files = glob.glob(os.path.join(cache_output_path, "inference_*.hdf5"))
        continue_file = config.get("continue_file", sorted(continue_files)[-1])
        continue_file = os.path.join(cache_output_path, continue_file)
        if not os.path.exists(continue_file):
            logger.error("File {} doesn't exist!".format(continue_file))
            sys.exit(1)

        with h5py.File(continue_file, "r") as f:
            old_chain = f["chain"].value
            old_flatchain = np.vstack(old_chain)
            old_lnprobability = f["lnprobability"].value
            old_flatlnprobability = np.vstack(old_lnprobability)
            old_acc_frac = f["acceptance_fraction"].value
            last_step = f["last_step"].value

        pos = old_chain[:,-1]
        pos = fix_whack_walkers(pos, old_acc_frac,
                                old_flatlnprobability,
                                old_flatchain,
                                threshold=config.get("acceptance_threshold", None))

        sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a)
        logger.info("Continuing sampler...running {} walkers for {} steps..."\
                .format(nwalkers, nsteps))
        sampler.run_inference(pos, nsteps, path=cache_output_path, first_step=last_step,
                              output_every=output_every,
                              output_file_fmt = "inference_{:07d}.hdf5")

    else:
        print("Unknown state.")
        sys.exit(1)

    pool.close() if hasattr(pool, 'close') else None

    #############################################################
    # Plotting
    #
    plot_config = config.get("plot", dict())
    plot_ext = plot_config.get("ext", "png")

    # glob properly orders the list
    for filename in sorted(glob.glob(os.path.join(cache_output_path,"inference_*.hdf5"))):
        logger.debug("Reading file {}...".format(filename))
        with h5py.File(filename, "r") as f:
            try:
                chain = np.hstack((chain,f["chain"].value))
            except NameError:
                chain = f["chain"].value

            acceptance_fraction = f["acceptance_fraction"].value

    try:
        acor = autocorr.integrated_time(np.mean(chain, axis=0), axis=0,
                                        window=50) # 50 comes from emcee
    except:
        acor = []

    flatchain = np.vstack(chain)

    # thin chain
    if config.get("thin_chain", True):
        if len(acor) > 0:
            t_med = np.median(acor)
            thin_chain = chain[:,::int(t_med)]
            thin_flatchain = np.vstack(thin_chain)
            logger.info("Median autocorrelation time: {}".format(t_med))
        else:
            logger.warn("FAILED TO THIN CHAIN")
            thin_chain = chain
            thin_flatchain = flatchain
    else:
        thin_chain = chain
        thin_flatchain = flatchain

    # plot true_particles, true_satellite over the rest of the stream
    gc_particles = model.true_particles.to_frame(galactocentric)
    m = model.true_satellite.mass
    # HACK
    sgr = SgrSimulation("sgr_nfw/M2.5e+0{}".format(int(np.floor(np.log10(m)))), "SNAP113")
    all_gc_particles = sgr.particles(n=1000, expr="tub!=0").to_frame(galactocentric)

    fig,axes = plt.subplots(1,2,figsize=(16,8))
    axes[0].plot(all_gc_particles["x"].value, all_gc_particles["z"].value,
                 markersize=10., marker='.', linestyle='none', alpha=0.25)
    axes[0].plot(gc_particles["x"].value, gc_particles["z"].value,
                 markersize=10., marker='o', linestyle='none', alpha=0.75)
    axes[1].plot(all_gc_particles["vx"].to(u.km/u.s).value,
                 all_gc_particles["vz"].to(u.km/u.s).value,
                 markersize=10., marker='.', linestyle='none', alpha=0.25)
    axes[1].plot(gc_particles["vx"].to(u.km/u.s).value,
                 gc_particles["vz"].to(u.km/u.s).value,
                 markersize=10., marker='o', linestyle='none', alpha=0.75)
    fig.savefig(os.path.join(output_path, "xyz_vxvyvz.{}".format(plot_ext)))

    if plot_config.get("mcmc_diagnostics", False):
        logger.debug("Plotting MCMC diagnostics...")

        diagnostics_path = os.path.join(output_path, "diagnostics")
        if not os.path.exists(diagnostics_path):
            os.mkdir(diagnostics_path)

        # plot histogram of autocorrelation times
        if len(acor) > 0:
            fig,ax = plt.subplots(1,1,figsize=(12,6))
            ax.plot(acor, marker='o', linestyle='none') #model.nparameters//5)
            ax.set_xlabel("Parameter index")
            ax.set_ylabel("Autocorrelation time")
            fig.savefig(os.path.join(diagnostics_path, "acor.{}".format(plot_ext)))

        # plot histogram of acceptance fractions
        fig,ax = plt.subplots(1,1,figsize=(8,8))
        ax.hist(acceptance_fraction, bins=nwalkers//5)
        ax.set_xlabel("Acceptance fraction")
        fig.suptitle("Histogram of acceptance fractions for all walkers")
        fig.savefig(os.path.join(diagnostics_path, "acc_frac.{}".format(plot_ext)))

        # plot individual walkers
        plt.figure(figsize=(12,6))
        for k in range(model.nparameters):
            plt.clf()
            for ii in range(nwalkers):
                plt.plot(chain[ii,:,k], alpha=0.4, drawstyle='steps', color='k')

            plt.axhline(model.truths[k], color='r', lw=2., linestyle='-', alpha=0.5)
            plt.savefig(os.path.join(diagnostics_path, "param_{}.{}".format(k, plot_ext)))

        plt.close('all')

    if plot_config.get("posterior", False):
        logger.debug("Plotting posterior distributions...")

        flatchain_dict = model.label_flatchain(thin_flatchain)
        p0 = model.sample_priors(size=1000) # HACK HACK HACK
        p0_dict = model.label_flatchain(np.vstack(p0))
        potential_group = model.parameters.get('potential', None)
        particles_group = model.parameters.get('particles', None)
        satellite_group = model.parameters.get('satellite', None)
        flatchains = dict()

        if potential_group:
            this_flatchain = np.zeros((len(thin_flatchain),len(potential_group)))
            this_p0 = np.zeros((len(p0),len(potential_group)))
            this_truths = []
            this_extents = []
            for ii,pname in enumerate(potential_group.keys()):
                f = _unit_transform[pname]
                p = model.parameters['potential'][pname]

                this_flatchain[:,ii] = f(np.squeeze(flatchain_dict['potential'][pname]))
                this_p0[:,ii] = f(np.squeeze(p0_dict['potential'][pname]))
                this_truths.append(f(p.truth))
                this_extents.append((f(p._prior.a), f(p._prior.b)))

                print(pname, np.median(this_flatchain[:,ii]), np.std(this_flatchain[:,ii]))

            fig = triangle.corner(this_p0,
                        point_kwargs=dict(color='#2b8cbe',alpha=0.1),
                        hist_kwargs=dict(color='#2b8cbe',alpha=0.75,normed=True,bins=50),
                        plot_contours=False)

            fig = triangle.corner(this_flatchain,
                        fig=fig,
                        truths=this_truths,
                        labels=[_label_map[k] for k in potential_group.keys()],
                        extents=this_extents,
                        point_kwargs=dict(color='k',alpha=1.),
                        hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50))
            fig.savefig(os.path.join(output_path, "potential.{}".format(plot_ext)))

            flatchains['potential'] = this_flatchain

        nparticles = model.true_particles.nparticles
        if particles_group and len(particles_group) > 1:
            for jj in range(nparticles):
                this_flatchain = np.zeros((len(thin_flatchain),len(particles_group)))
                this_p0 = np.zeros((len(p0),len(particles_group)))
                this_truths = []
                this_extents = None
                for ii,pname in enumerate(particles_group.keys()):
                    f = _unit_transform[pname]
                    p = model.parameters['particles'][pname]

                    this_flatchain[:,ii] = f(np.squeeze(flatchain_dict['particles'][pname][:,jj]))
                    this_p0[:,ii] = f(np.squeeze(p0_dict['particles'][pname][:,jj]))
                    this_truths.append(f(p.truth[jj]))
                    #this_extents.append((f(p._prior.a), f(p._prior.b)))

                fig = triangle.corner(this_p0,
                            point_kwargs=dict(color='#2b8cbe',alpha=0.1),
                            hist_kwargs=dict(color='#2b8cbe',alpha=0.75,normed=True,bins=50),
                            plot_contours=False)

                fig = triangle.corner(this_flatchain,
                            fig=fig,
                            truths=this_truths,
                            labels=[_label_map[k] for k in particles_group.keys()],
                            extents=this_extents,
                            point_kwargs=dict(color='k',alpha=1.),
                            hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50))
                fig.savefig(os.path.join(output_path, "particle{}.{}".format(jj,plot_ext)))

        # plot the posterior for the satellite parameters
        if satellite_group and len(satellite_group) > 1:
            jj = 0
            this_flatchain = np.zeros((len(thin_flatchain),len(satellite_group)))
            this_p0 = np.zeros((len(p0),len(satellite_group)))
            this_truths = []
            this_extents = None
            for ii,pname in enumerate(satellite_group.keys()):
                f = _unit_transform[pname]
                p = model.parameters['satellite'][pname]

                this_flatchain[:,ii] = f(np.squeeze(flatchain_dict['satellite'][pname][:,jj]))
                this_p0[:,ii] = f(np.squeeze(p0_dict['satellite'][pname][:,jj]))
                try:
                    this_truths.append(f(p.truth[jj]))
                except: # IndexError:
                    this_truths.append(f(p.truth))
                #this_extents.append((f(p._prior.a), f(p._prior.b)))

            fig = triangle.corner(this_p0,
                        point_kwargs=dict(color='#2b8cbe',alpha=0.1),
                        hist_kwargs=dict(color='#2b8cbe',alpha=0.75,normed=True,bins=50),
                        plot_contours=False)

            fig = triangle.corner(this_flatchain,
                        fig=fig,
                        truths=this_truths,
                        labels=[_label_map[k] for k in satellite_group.keys()],
                        extents=this_extents,
                        point_kwargs=dict(color='k',alpha=1.),
                        hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50))
            fig.savefig(os.path.join(output_path, "satellite.{}".format(plot_ext)))

            flatchains['satellite'] = this_flatchain

        if flatchains.has_key('potential') and flatchains.has_key('satellite'):
            this_flatchain = np.hstack((flatchains['potential'],flatchains['satellite']))
            labels = [_label_map[k] for k in potential_group.keys()+satellite_group.keys()]
            fig = triangle.corner(this_flatchain,
                        labels=labels,
                        point_kwargs=dict(color='k',alpha=1.),
                        hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50))
            fig.savefig(os.path.join(output_path, "suck-it-up.{}".format(plot_ext)))
Esempio n. 22
0
def test_nd(seed=1234, ndim=3, N=150000):
    x = get_chain(seed=seed, ndim=ndim, N=N)
    tau = integrated_time(x)
    assert np.all(np.abs(tau - 19.0) / 19.0 < 0.2)
Esempio n. 23
0
def _plot_chain_func(sampler, p, last_step=False):
    chain = sampler.chain
    label = sampler.labels[p]

    import matplotlib.pyplot as plt

    from scipy import stats
    if len(chain.shape) > 2:
        traces = chain[:, :, p]
        if last_step:
            # keep only last step
            dist = traces[:, -1]
        else:
            # convert chain to flatchain
            dist = traces.flatten()
    else:
        log.warning(
            'we need the full chain to plot the traces, not a flatchain!')
        return None

    nwalkers = traces.shape[0]
    nsteps = traces.shape[1]

    f = plt.figure()

    ax1 = f.add_subplot(221)
    ax2 = f.add_subplot(122)

    f.subplots_adjust(left=0.1, bottom=0.15, right=0.95, top=0.9)

    # plot five percent of the traces darker

    if nwalkers < 60:
        thresh = 1 - 3. / nwalkers
    else:
        thresh = 0.95
    red = np.arange(nwalkers) / float(nwalkers) >= thresh

    ax1.set_rasterization_zorder(1)
    for t in traces[-red]:  # range(nwalkers):
        ax1.plot(t, color=(0.1,) * 3, lw=1.0, alpha=0.25, zorder=0)
    for t in traces[red]:
        ax1.plot(t, color=color_cycle[0], lw=1.5, alpha=0.75, zorder=0)
    ax1.set_xlabel('step number')
    # [l.set_rotation(45) for l in ax1.get_yticklabels()]
    ax1.set_ylabel(label)
    ax1.yaxis.set_label_coords(-0.15, 0.5)
    ax1.set_title('Walker traces')

    nbins = min(max(25, int(len(dist) / 100.)), 100)
    xlabel = label
    n, x, _ = ax2.hist(
        dist,
        nbins,
        histtype='stepfilled',
        color=color_cycle[0],
        lw=0,
        normed=1)
    kde = stats.kde.gaussian_kde(dist)
    ax2.plot(x, kde(x), color='k', label='KDE')
    quant = [16, 50, 84]
    xquant = np.percentile(dist, quant)
    quantiles = dict(six.moves.zip(quant, xquant))

    ax2.axvline(
        quantiles[50],
        ls='--',
        color='k',
        alpha=0.5,
        lw=2,
        label='50% quantile')
    ax2.axvspan(
        quantiles[16],
        quantiles[84],
        color=(0.5,) * 3,
        alpha=0.25,
        label='68% CI',
        lw=0)
    # ax2.legend()
    for l in ax2.get_xticklabels():
        l.set_rotation(45)
    ax2.set_xlabel(xlabel)
    ax2.xaxis.set_label_coords(0.5, -0.1)
    ax2.set_title('posterior distribution')
    ax2.set_ylim(top=n.max() * 1.05)

    # Print distribution parameters on lower-left

    try:
        try:
            ac = sampler.get_autocorr_time()[p]
        except AttributeError:
            ac = autocorr.integrated_time(
                np.mean(
                    chain, axis=0), axis=0, fast=False)[p]
        autocorr_message = '{0:.1f}'.format(ac)
    except autocorr.AutocorrError:
        # Raised when chain is too short for meaningful auto-correlation
        # estimation
        autocorr_message = None

    if last_step:
        clen = 'last ensemble'
    else:
        clen = 'whole chain'

    chain_props = 'Walkers: {0} \nSteps in chain: {1} \n'.format(nwalkers,
                                                                 nsteps)
    if autocorr_message is not None:
        chain_props += 'Autocorrelation time: {0}\n'.format(autocorr_message)
    chain_props += 'Mean acceptance fraction: {0:.3f}\n'.format(
                        np.mean(sampler.acceptance_fraction)) +\
                   'Distribution properties for the {clen}:\n \
    $-$ median: ${median}$, std: ${std}$ \n \
    $-$ median with uncertainties based on \n \
      the 16th and 84th percentiles ($\sim$1$\sigma$):\n'.format(
              median=_latex_float(quantiles[50]),
              std=_latex_float(np.std(dist)), clen=clen)

    info_line = ' ' * 10 + label + ' = ' + _latex_value_error(
        quantiles[50], quantiles[50] - quantiles[16], quantiles[84] -
        quantiles[50])

    chain_props += info_line

    if 'log10(' in label or 'log(' in label:
        nlabel = label.split('(')[-1].split(')')[0]
        ltype = label.split('(')[0]
        if ltype == 'log10':
            new_dist = 10**dist
        elif ltype == 'log':
            new_dist = np.exp(dist)

        quant = [16, 50, 84]
        quantiles = dict(six.moves.zip(quant, np.percentile(new_dist, quant)))

        label_template = '\n' + ' ' * 10 + '{{label:>{0}}}'.format(len(label))

        new_line = label_template.format(label=nlabel)
        new_line += ' = ' + _latex_value_error(quantiles[50], quantiles[50] -
                                               quantiles[16], quantiles[84] -
                                               quantiles[50])

        chain_props += new_line
        info_line += new_line

    log.info('{0:-^50}\n'.format(label) + info_line)
    f.text(0.05, 0.45, chain_props, ha='left', va='top')

    return f
Esempio n. 24
0
        print("Diffs: max:{}, low:{}, high:{}, dbin:{}".format(
            vals["max"], vals["minus"], vals["plus"], vals["dbin"]))
        print()

    fig.subplots_adjust(hspace=0.5, bottom=0.05, top=0.99)
    fig.savefig(fname)


if args.hdis:
    plot_hdis(flatchain)

# Make the triangle plot
if args.tri:
    import corner
    figure = corner.corner(flatchain,
                           bins=30,
                           labels=labels,
                           quantiles=[0.16, 0.5, 0.84],
                           plot_contours=True,
                           plot_datapoints=True,
                           show_titles=True)
    figure.savefig("triangle.png")
else:
    print("Not plotting triangle, no --tri flag.")

# Compute the autocorrelation time, following emcee
# Notes here: http://dfm.io/posts/autocorr/
print("Integrated autocorrelation time")
from emcee import autocorr
print(autocorr.integrated_time(chain))
Esempio n. 25
0
def _plot_chain_func(sampler, p, last_step=False):
    chain = sampler.chain
    label = sampler.labels[p]

    import matplotlib.pyplot as plt

    from scipy import stats
    if len(chain.shape) > 2:
        traces = chain[:, :, p]
        if last_step:
            # keep only last step
            dist = traces[:, -1]
        else:
            # convert chain to flatchain
            dist = traces.flatten()
    else:
        log.warning(
            'we need the full chain to plot the traces, not a flatchain!')
        return None

    nwalkers = traces.shape[0]
    nsteps = traces.shape[1]

    f = plt.figure()

    ax1 = f.add_subplot(221)
    ax2 = f.add_subplot(122)

    f.subplots_adjust(left=0.1, bottom=0.15, right=0.95, top=0.9)

    # plot five percent of the traces darker

    if nwalkers < 60:
        thresh = 1 - 3. / nwalkers
    else:
        thresh = 0.95
    red = np.arange(nwalkers) / float(nwalkers) >= thresh

    ax1.set_rasterization_zorder(1)
    for t in traces[~red]:  # range(nwalkers):
        ax1.plot(t, color=(0.1, ) * 3, lw=1.0, alpha=0.25, zorder=0)
    for t in traces[red]:
        ax1.plot(t, color=color_cycle[0], lw=1.5, alpha=0.75, zorder=0)
    ax1.set_xlabel('step number')
    # [l.set_rotation(45) for l in ax1.get_yticklabels()]
    ax1.set_ylabel(label)
    ax1.yaxis.set_label_coords(-0.15, 0.5)
    ax1.set_title('Walker traces')

    nbins = min(max(25, int(len(dist) / 100.)), 100)
    xlabel = label
    n, x, _ = ax2.hist(dist,
                       nbins,
                       histtype='stepfilled',
                       color=color_cycle[0],
                       lw=0,
                       normed=1)
    kde = stats.kde.gaussian_kde(dist)
    ax2.plot(x, kde(x), color='k', label='KDE')
    quant = [16, 50, 84]
    xquant = np.percentile(dist, quant)
    quantiles = dict(six.moves.zip(quant, xquant))

    ax2.axvline(quantiles[50],
                ls='--',
                color='k',
                alpha=0.5,
                lw=2,
                label='50% quantile')
    ax2.axvspan(quantiles[16],
                quantiles[84],
                color=(0.5, ) * 3,
                alpha=0.25,
                label='68% CI',
                lw=0)
    # ax2.legend()
    for l in ax2.get_xticklabels():
        l.set_rotation(45)
    ax2.set_xlabel(xlabel)
    ax2.xaxis.set_label_coords(0.5, -0.1)
    ax2.set_title('posterior distribution')
    ax2.set_ylim(top=n.max() * 1.05)

    # Print distribution parameters on lower-left

    try:
        try:
            ac = sampler.get_autocorr_time()[p]
        except AttributeError:
            ac = autocorr.integrated_time(np.mean(chain, axis=0),
                                          axis=0,
                                          fast=False)[p]
        autocorr_message = '{0:.1f}'.format(ac)
    except autocorr.AutocorrError:
        # Raised when chain is too short for meaningful auto-correlation
        # estimation
        autocorr_message = None

    if last_step:
        clen = 'last ensemble'
    else:
        clen = 'whole chain'

    chain_props = 'Walkers: {0} \nSteps in chain: {1} \n'.format(
        nwalkers, nsteps)
    if autocorr_message is not None:
        chain_props += 'Autocorrelation time: {0}\n'.format(autocorr_message)
    chain_props += 'Mean acceptance fraction: {0:.3f}\n'.format(
                        np.mean(sampler.acceptance_fraction)) +\
                   'Distribution properties for the {clen}:\n \
    $-$ median: ${median}$, std: ${std}$ \n \
    $-$ median with uncertainties based on \n \
      the 16th and 84th percentiles ($\sim$1$\sigma$):\n'                                                         .format(
              median=_latex_float(quantiles[50]),
              std=_latex_float(np.std(dist)), clen=clen)

    info_line = ' ' * 10 + label + ' = ' + _latex_value_error(
        quantiles[50], quantiles[50] - quantiles[16],
        quantiles[84] - quantiles[50])

    chain_props += info_line

    if 'log10(' in label or 'log(' in label:
        nlabel = label.split('(')[-1].split(')')[0]
        ltype = label.split('(')[0]
        if ltype == 'log10':
            new_dist = 10**dist
        elif ltype == 'log':
            new_dist = np.exp(dist)

        quant = [16, 50, 84]
        quantiles = dict(six.moves.zip(quant, np.percentile(new_dist, quant)))

        label_template = '\n' + ' ' * 10 + '{{label:>{0}}}'.format(len(label))

        new_line = label_template.format(label=nlabel)
        new_line += ' = ' + _latex_value_error(quantiles[50],
                                               quantiles[50] - quantiles[16],
                                               quantiles[84] - quantiles[50])

        chain_props += new_line
        info_line += new_line

    log.info('{0:-^50}\n'.format(label) + info_line)
    f.text(0.05, 0.45, chain_props, ha='left', va='top')

    return f
Esempio n. 26
0
def mcmc_std(vals: np.ndarray) -> list:
    tau_f = integrated_time(vals)[0]
    # print(tau_f, vals.size, np.var(vals))
    return np.sqrt(tau_f / vals.size * np.var(vals)), tau_f
Esempio n. 27
0
        pl.plot(lc[0], lc[1], ".", ms=3)
    pl.savefig("raw_data.png")

    # Set up the initial system.
    system = transit.System(transit.Central(radius=0.95))
    planet = transit.Body(r=2.03 * 0.01, period=period, t0=t0, b=0.9)
    system.add_body(planet)
    texp = kplr.EXPOSURE_TIMES[1] / 60. / 60. / 24.
    mean_function = partial(system.light_curve, texp=texp)

    # Set up the Gaussian processes.
    pl.clf()
    offset = 0.001
    models = []
    for i, lc in enumerate(light_curves):
        dt = np.median(np.diff(lc[0])) * integrated_time(lc[1])
        kernel = np.var(lc[1]) * kernels.Matern32Kernel(dt ** 2)
        gp = george.GP(kernel, mean=mean_function, solver=george.HODLRSolver)
        gp.compute(lc[0], lc[2])
        models.append((gp, lc[1]))

        t = (lc[0]-t0+hp) % period-hp
        pl.plot(t, lc[1] + i * offset, ".k", ms=3)
        pl.plot(t, gp.predict(lc[1], lc[0], mean_only=True) + i*offset, "b")

    pl.savefig("initial.png")
    pl.xlim(-5, 5)
    pl.savefig("initial_zoom.png")

    model = ProbabilisticModel(system, planet, models)
    p0 = model.get_parameters()
Esempio n. 28
0
            lp = lq
            acc += 1

        chain[i] = theta

    return chain, acc / niter


if __name__ == "__main__":
    import corner
    from emcee import autocorr
    import matplotlib.pyplot as plt

    # Run the sampler.
    chain, acc_frac = mh(log_p_func, np.random.randn(2), 400000)
    tau = autocorr.integrated_time(chain)
    print("Acceptance fraction: {0:.3f}".format(acc_frac))
    print("Autocorrelation times: {0}, {1}".format(
        *(map("{0:.0f}".format, tau))))
    with open("numbers-mh.tex", "w") as f:
        f.write("% Automatically generated\n")
        f.write("\\newcommand{{\\accfrac}}{{{0:.2f}}}\n".format(acc_frac))
        f.write("\\newcommand{{\\taua}}{{{0:.0f}}}\n".format(tau[0]))
        f.write("\\newcommand{{\\taub}}{{{0:.0f}}}\n".format(tau[1]))

    # Plot the traces and corner plot.
    fig, axes = plt.subplots(2, 1, figsize=SQUARE_FIGSIZE, sharex=True)
    axes[0].plot(chain[:5000, 0], "k")
    axes[1].plot(chain[:5000, 1], "k")
    axes[0].set_ylabel(r"$\theta_1$")
    axes[1].set_ylabel(r"$\theta_2$")
Esempio n. 29
0
def main(potential_name, results_path=None, split_ix=None, overwrite=False):
    all_ophdata = OphiuchusData()

    # top-level output path for saving (this will create a subdir within output_path)
    if results_path is None:
        top_path = RESULTSPATH
    else:
        top_path = os.path.abspath(os.path.expanduser(results_path))

    if top_path is None:
        raise ValueError("If $PROJECTSPATH is not set, you must provide a path to save "
                         "the results in with the --results_path argument.")

    output_path = os.path.join(top_path, potential_name, "orbitfit")
    logger.debug("Output path: {}".format(output_path))

    w0_filename = os.path.join(output_path, "w0.npy")
    if os.path.exists(w0_filename) and overwrite:
        os.remove(w0_filename)

    if os.path.exists(w0_filename):
        logger.debug("File {} exists".format(w0_filename))
        return

    with open(os.path.join(output_path, "sampler.pickle"), 'rb') as f:
        sampler = pickle.load(f)

    # default is to split in half
    if split_ix is None:
        split_ix = sampler.chain.shape[1] // 2

    # measure the autocorrelation time for each parameter
    tau = np.median(acor.integrated_time(np.mean(sampler.chain[:,split_ix:], axis=0)))
    logger.debug("Autocorrelation time: {:.1f}".format(tau))
    every = int(tau)
    logger.debug("Taking every {} sample".format(every))

    if every == 0 or tau > sampler.chain.shape[1]:
        logger.warning("Autocorrelation time is too long! Run your MCMC for longer...")
        raise ValueError("Autocorrelation time is too long to thin chains")

    _x0 = np.vstack(sampler.chain[:,split_ix::every,:5])
    np.random.shuffle(_x0)
    w0 = all_ophdata._mcmc_sample_to_w0(_x0.T).T

    mean_w0 = all_ophdata._mcmc_sample_to_w0(np.mean(_x0, axis=0)).T
    w0 = np.vstack((mean_w0, w0))

    logger.info("{} initial conditions after thinning chains".format(w0.shape[0]))

    # convert to w0 and save
    np.save(w0_filename, w0)

    potential = op.load_potential(potential_name)

    # plot orbit fits
    ix = np.random.randint(len(sampler.flatchain), size=64)
    fig = plot_data_orbit(all_ophdata)
    for sample in sampler.flatchain[ix]:
        sample_w0 = all_ophdata._mcmc_sample_to_w0(sample[:5])[:,0]
        tf,tb = (5.,-5.)
        w = integrate_forward_backward(potential, sample_w0, t_forw=tf, t_back=tb)
        fig = plot_data_orbit(all_ophdata, orbit=w, data_style=dict(marker=None),
                              orbit_style=dict(color='#2166AC', alpha=0.1), fig=fig)
    fig.savefig(os.path.join(output_path, "orbits.png"), dpi=300)
Esempio n. 30
0
nuts = sampler.run_mcmc(q, 10000)

# In[14]:

plt.plot(nuts[0][:, 0])

# In[18]:

import corner
corner.corner(nuts[0][:, -4:])

# In[19]:

chain = nuts[0]

# In[20]:

from emcee.autocorr import integrated_time
tau_nuts = integrated_time(chain[:, None, :])
neff_nuts = len(chain) / np.mean(tau_nuts)
tau_nuts, neff_nuts

# In[24]:

nu_max_value = np.exp(get_value_for_param(nuts[0][:, 0], *log_numax_range))
dnu_value = np.exp(get_value_for_param(nuts[0][:, 3], *log_dnu_range))

corner.corner(np.vstack((nu_max_value, dnu_value)).T)

# In[ ]:
Esempio n. 31
0
def plot_autocorr(trace_name, db, save=False):
    """
    Plot autocorrelation diagrams for a given traced quantity. For ensemble
    (multi-walker) database data, the mean of all walkers for the given traced
    quantity is used to estimate autocorrelation (same as emcee)

    :param trace_name: Name of traced quantity, including all with priors, as
        well as derived quantities: magdiff, centerdist, sbeff, and axisratio.
    :param db: Filename of psfMC database
    :param save: If True, plots will not be displayed but will be saved to disk
        in pdf format.
    """
    disp_name, db, model = _load_db_and_model(db, None)

    trace = _get_trace(trace_name, db)

    n_walkers = db['walker'].max() + 1
    n_samples = trace.shape[0] // n_walkers

    for col in range(trace.shape[1]):

        fig_acorr = pp.figure()
        ax_acorr = fig_acorr.add_subplot(111)

        trace_walkers = trace[:, col].reshape((n_samples, n_walkers),
                                              order='F')

        lags = np.arange(n_samples)
        acorr_all = autocorr.function(trace_walkers)

        trace_avg = np.mean(trace_walkers, axis=1)
        acorr_avg = autocorr.function(trace_avg)
        tau = autocorr.integrated_time(trace_avg, c=1)
        eff_samples = n_samples / tau

        maxlag = np.argmin(acorr_avg > 0)

        for walk in range(n_walkers):
            ax_acorr.plot(lags,
                          acorr_all[:, walk],
                          marker=None,
                          ls='solid',
                          lw=1,
                          color='black',
                          alpha=0.3,
                          drawstyle='steps-mid')

        ax_acorr.plot(lags,
                      acorr_avg,
                      marker=None,
                      ls='solid',
                      lw=2,
                      drawstyle='steps-mid')

        neff_label = '$n_{{eff}}$ = {:0.1f}'.format(eff_samples)

        trace_label = trace_name
        if 'xy' in trace_label:
            trace_label = trace_label.replace('xy', 'xy'[col])
        disp_name = ' '.join([disp_name, _axis_label(trace_label)])
        fig_acorr.suptitle(disp_name)
        ax_acorr.set_xlim(0, maxlag * 1.01)
        ax_acorr.axhline(0.0, color='black')
        ax_acorr.set_xlabel('Lag Length (Samples)')
        ax_acorr.set_ylabel('Autocorrelation (Normalized)')

        ax_acorr.text(0.95,
                      0.95,
                      neff_label,
                      va='top',
                      ha='right',
                      transform=ax_acorr.transAxes)

        if save:
            fig_acorr.savefig('_'.join([disp_name, trace_name, 'acorr.pdf']))
        else:
            pp.show()
        pp.close(fig_acorr)
Esempio n. 32
0
model = pickle.load(open(args.model))
# print(model.parameters)

# Load the samples.
with h5py.File(args.chainfile, "r") as f:
    i = f.attrs["iteration"]+1
    chain = f["samples"][:, args.skip:i:args.thin, :]
    lnprob = f["lnprob"][:, args.skip:i:args.thin]

# Get the dimensions.
nwalkers, nsteps, ndim = chain.shape
flatchain = chain.reshape((nwalkers*nsteps, ndim))

# Get the autocorrelation time.
from emcee.autocorr import integrated_time
print(integrated_time(np.mean(chain, axis=0)))
# assert 0


# Get some basic results.
def print_constraint(nm, s):
    print("{0} = {1} +/- {2}".format(nm, np.mean(s), np.std(s)))


if ndim == 53:
    print("Circular orbit")
    columns = [("\ln a", None), ("r/R", None)]
    columns += [("t_{{{0}}}".format(j+1), None) for j in range(21)]
    columns += [("b_{{{0}}}".format(j+1), None) for j in range(21)]
    columns += [("q_1", None), ("q_2", None),
                (r"\ln \alpha_\mathrm{LC}", None),
Esempio n. 33
0
for i in range(hyper.shape[1]):
    pl.clf()
    pl.plot(hyper[:, i])
    pl.savefig(os.path.join(bp, "time-hyper-{0:03d}.png".format(i)))

pl.clf()
pl.plot(lnprob)
pl.savefig(os.path.join(bp, "time-lnprob.png"))

nstar = 42557.0
ntot = 200000
samples = samples[-ntot:, :]  # [::50, :]

# Reformat the samples and save the samples.
thin_by = int(np.min(integrated_time(samples, axis=0)))
thinned = samples[::thin_by, :]
grids = thinned.reshape((len(thinned), pop.shape[0], pop.shape[1]))
print(grids.shape)
print([b.shape for b in pop.bins])


def xmap(f, i):
    return (f(*x) for x in i)

print("Hyper:")

h_mu = np.mean(hyper[-ntot:][::thin_by, :2], axis=0)
h_std = np.std(hyper[-ntot:][::thin_by, :2], axis=0)
print("\n".join(xmap("{0} ± {1}".format, zip(h_mu, h_std))))
Esempio n. 34
0
def test_too_short(seed=1234, ndim=3, N=100):
    x = get_chain(seed=seed, ndim=ndim, N=N)
    with pytest.raises(AutocorrError):
        integrated_time(x)
    tau = integrated_time(x, quiet=True)  # NOQA
Esempio n. 35
0
def sample_emcee(logpdf_tt,
                 sampler,
                 start,
                 timers,
                 time_grid_ms,
                 n_grid,
                 n_walkers_min=50,
                 thin=100,
                 data_scale=None,
                 ball_size=1e-6):
    '''Use default thin of 100 since otherwise too fast and could blow out
    memory with samples on high time limit.'''
    assert (start.ndim == 1)
    D, = start.shape
    data_scale = np.ones(D) if data_scale is None else data_scale
    assert (data_scale.shape == (D, ))

    n_walkers = max(2 * D + 2, n_walkers_min)
    ball = (ball_size * data_scale[None, :]) * np.random.randn(n_walkers, D)
    start = ball + start[None, :]

    # emcee does not need gradients so we could pass np only implemented
    # version if that is less overhead, but not that is not clear. So, just
    # compile the theano version.
    x_tt = T.vector('x')
    x_tt.tag.test_value = np.zeros(D)
    logpdf_val = logpdf_tt(x_tt)
    logpdf_f = theano.function([x_tt], logpdf_val)

    print 'running emcee with %d, %d' % (n_walkers, D)
    sampler_obj = BUILD_STEP_MC[sampler](n_walkers, D, logpdf_f)

    print 'doing init'
    # Might want to consider putting save chain to false since emcee uses
    # np.concat to grow chain. Might be less overhead to append to list in the
    # loop below.
    sample_gen = sampler_obj.sample(start,
                                    iterations=(MAX_N * thin) / n_walkers,
                                    thin=thin,
                                    storechain=True)

    time_grid_s = 1e-3 * time_grid_ms
    TC = time_chunker(sample_gen, time_grid_s, timers, n_grid=n_grid)

    print 'starting to sample'
    # This could all go in a list comp if we get rid of the assert check
    cum_size = 0
    meta = []
    for trace, metarow in TC:
        meta.append(metarow)
        cum_size += metarow[CHUNK_SIZE]
        # assert(sampler_obj.chain.shape == (n_walkers, MAX_N, D))
    # Build rep for trace data
    # Same as:
    # np.concatenate([X[ii, :, :] for ii in xrange(X.shape[0])], axis=0)
    # EnsembleSampler.flatchain does this too but doesn't truncate at cum_size
    trace = np.reshape(sampler_obj.chain[:, :cum_size, :], (-1, D))
    # TODO
    # assert(trace.shape == (cum_size * n_walkers, D))

    # Log the emcee version of autocorr for future ref
    try:
        tau = integrated_time(trace, axis=0)
        print 'flat auto-corr'
        print tau
    except Exception as err:
        print 'emcee autocorr est failed'
        print str(err)

    return trace, meta
Esempio n. 36
0
def main(config_file,
         mpi=False,
         threads=None,
         overwrite=False,
         continue_sampler=False):
    """ TODO: """

    # get a pool object given the configuration parameters
    # -- This needs to go here so I don't read in the particle file for each thread. --
    pool = get_pool(mpi=mpi, threads=threads)

    # read configuration from a YAML file
    config = io.read_config(config_file)
    np.random.seed(config["seed"])
    random.seed(config["seed"])

    if not os.path.exists(config['streams_path']):
        raise IOError("Specified streams path '{}' doesn't exist!".format(
            config['streams_path']))
    logger.debug("Path to streams project: {}".format(config['streams_path']))

    # the path to write things to
    output_path = config["output_path"]
    logger.debug("Will write data to:\n\t{}".format(output_path))
    cache_output_path = os.path.join(output_path, "cache")

    # get a StreamModel from a config dict
    model = si.StreamModel.from_config(config)
    logger.info("Model has {} parameters".format(model.nparameters))

    if os.path.exists(cache_output_path) and overwrite:
        logger.info("Writing over output path '{}'".format(cache_output_path))
        logger.debug("Deleting files: '{}'".format(
            os.listdir(cache_output_path)))
        shutil.rmtree(cache_output_path)

    # emcee parameters
    # read in the number of walkers to use
    nwalkers = config["walkers"]
    nsteps = config["steps"]
    output_every = config.get("output_every", None)
    nburn = config.get("burn_in", 0)
    start_truth = config.get("start_truth", False)
    a = config.get("a", 2.)  # emcee tuning param

    if not os.path.exists(cache_output_path) and not continue_sampler:
        logger.info("Output path '{}' doesn't exist, running inference..."\
                    .format(cache_output_path))
        os.mkdir(cache_output_path)

        # sample starting positions
        p0 = model.sample_priors(size=nwalkers, start_truth=start_truth)
        logger.debug("Priors sampled...")

        if nburn > 0:
            sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a)

            time0 = time.time()
            logger.info("Burning in sampler for {} steps...".format(nburn))
            pos, xx, yy = sampler.run_mcmc(p0, nburn)

            pos = fix_whack_walkers(pos,
                                    sampler.acceptance_fraction,
                                    sampler.flatlnprobability,
                                    sampler.flatchain,
                                    threshold=config.get(
                                        "acceptance_threshold", None))

            t = time.time() - time0
            logger.debug("Spent {} seconds on burn-in...".format(t))

        else:
            pos = p0

        if nsteps > 0:
            sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a)
            sampler.run_inference(pos,
                                  nsteps,
                                  path=cache_output_path,
                                  first_step=0,
                                  output_every=output_every,
                                  output_file_fmt="inference_{:06d}.hdf5")

    elif os.path.exists(cache_output_path) and not continue_sampler:
        logger.info("Output path '{}' already exists, not running sampler..."\
                    .format(cache_output_path))

    elif os.path.exists(cache_output_path) and continue_sampler:
        if len(os.listdir(cache_output_path)) == 0:
            logger.error("No files in path: {}".format(cache_output_path))
            sys.exit(1)

        continue_files = glob.glob(
            os.path.join(cache_output_path, "inference_*.hdf5"))
        continue_file = config.get("continue_file", sorted(continue_files)[-1])
        continue_file = os.path.join(cache_output_path, continue_file)
        if not os.path.exists(continue_file):
            logger.error("File {} doesn't exist!".format(continue_file))
            sys.exit(1)

        with h5py.File(continue_file, "r") as f:
            old_chain = f["chain"].value
            old_flatchain = np.vstack(old_chain)
            old_lnprobability = f["lnprobability"].value
            old_flatlnprobability = np.vstack(old_lnprobability)
            old_acc_frac = f["acceptance_fraction"].value
            last_step = f["last_step"].value

        pos = old_chain[:, -1]
        pos = fix_whack_walkers(pos,
                                old_acc_frac,
                                old_flatlnprobability,
                                old_flatchain,
                                threshold=config.get("acceptance_threshold",
                                                     None))

        sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a)
        logger.info("Continuing sampler...running {} walkers for {} steps..."\
                .format(nwalkers, nsteps))
        sampler.run_inference(pos,
                              nsteps,
                              path=cache_output_path,
                              first_step=last_step,
                              output_every=output_every,
                              output_file_fmt="inference_{:07d}.hdf5")

    else:
        print("Unknown state.")
        sys.exit(1)

    pool.close() if hasattr(pool, 'close') else None

    #############################################################
    # Plotting
    #
    plot_config = config.get("plot", dict())
    plot_ext = plot_config.get("ext", "png")

    # glob properly orders the list
    for filename in sorted(
            glob.glob(os.path.join(cache_output_path, "inference_*.hdf5"))):
        logger.debug("Reading file {}...".format(filename))
        with h5py.File(filename, "r") as f:
            try:
                chain = np.hstack((chain, f["chain"].value))
            except NameError:
                chain = f["chain"].value

            acceptance_fraction = f["acceptance_fraction"].value

    try:
        acor = autocorr.integrated_time(np.mean(chain, axis=0),
                                        axis=0,
                                        window=50)  # 50 comes from emcee
    except:
        acor = []

    flatchain = np.vstack(chain)

    # thin chain
    if config.get("thin_chain", True):
        if len(acor) > 0:
            t_med = np.median(acor)
            thin_chain = chain[:, ::int(t_med)]
            thin_flatchain = np.vstack(thin_chain)
            logger.info("Median autocorrelation time: {}".format(t_med))
        else:
            logger.warn("FAILED TO THIN CHAIN")
            thin_chain = chain
            thin_flatchain = flatchain
    else:
        thin_chain = chain
        thin_flatchain = flatchain

    # plot true_particles, true_satellite over the rest of the stream
    gc_particles = model.true_particles.to_frame(galactocentric)
    m = model.true_satellite.mass
    # HACK
    sgr = SgrSimulation("sgr_nfw/M2.5e+0{}".format(int(np.floor(np.log10(m)))),
                        "SNAP113")
    all_gc_particles = sgr.particles(n=1000,
                                     expr="tub!=0").to_frame(galactocentric)

    fig, axes = plt.subplots(1, 2, figsize=(16, 8))
    axes[0].plot(all_gc_particles["x"].value,
                 all_gc_particles["z"].value,
                 markersize=10.,
                 marker='.',
                 linestyle='none',
                 alpha=0.25)
    axes[0].plot(gc_particles["x"].value,
                 gc_particles["z"].value,
                 markersize=10.,
                 marker='o',
                 linestyle='none',
                 alpha=0.75)
    axes[1].plot(all_gc_particles["vx"].to(u.km / u.s).value,
                 all_gc_particles["vz"].to(u.km / u.s).value,
                 markersize=10.,
                 marker='.',
                 linestyle='none',
                 alpha=0.25)
    axes[1].plot(gc_particles["vx"].to(u.km / u.s).value,
                 gc_particles["vz"].to(u.km / u.s).value,
                 markersize=10.,
                 marker='o',
                 linestyle='none',
                 alpha=0.75)
    fig.savefig(os.path.join(output_path, "xyz_vxvyvz.{}".format(plot_ext)))

    if plot_config.get("mcmc_diagnostics", False):
        logger.debug("Plotting MCMC diagnostics...")

        diagnostics_path = os.path.join(output_path, "diagnostics")
        if not os.path.exists(diagnostics_path):
            os.mkdir(diagnostics_path)

        # plot histogram of autocorrelation times
        if len(acor) > 0:
            fig, ax = plt.subplots(1, 1, figsize=(12, 6))
            ax.plot(acor, marker='o', linestyle='none')  #model.nparameters//5)
            ax.set_xlabel("Parameter index")
            ax.set_ylabel("Autocorrelation time")
            fig.savefig(
                os.path.join(diagnostics_path, "acor.{}".format(plot_ext)))

        # plot histogram of acceptance fractions
        fig, ax = plt.subplots(1, 1, figsize=(8, 8))
        ax.hist(acceptance_fraction, bins=nwalkers // 5)
        ax.set_xlabel("Acceptance fraction")
        fig.suptitle("Histogram of acceptance fractions for all walkers")
        fig.savefig(
            os.path.join(diagnostics_path, "acc_frac.{}".format(plot_ext)))

        # plot individual walkers
        plt.figure(figsize=(12, 6))
        for k in range(model.nparameters):
            plt.clf()
            for ii in range(nwalkers):
                plt.plot(chain[ii, :, k],
                         alpha=0.4,
                         drawstyle='steps',
                         color='k')

            plt.axhline(model.truths[k],
                        color='r',
                        lw=2.,
                        linestyle='-',
                        alpha=0.5)
            plt.savefig(
                os.path.join(diagnostics_path,
                             "param_{}.{}".format(k, plot_ext)))

        plt.close('all')

    if plot_config.get("posterior", False):
        logger.debug("Plotting posterior distributions...")

        flatchain_dict = model.label_flatchain(thin_flatchain)
        p0 = model.sample_priors(size=1000)  # HACK HACK HACK
        p0_dict = model.label_flatchain(np.vstack(p0))
        potential_group = model.parameters.get('potential', None)
        particles_group = model.parameters.get('particles', None)
        satellite_group = model.parameters.get('satellite', None)
        flatchains = dict()

        if potential_group:
            this_flatchain = np.zeros(
                (len(thin_flatchain), len(potential_group)))
            this_p0 = np.zeros((len(p0), len(potential_group)))
            this_truths = []
            this_extents = []
            for ii, pname in enumerate(potential_group.keys()):
                f = _unit_transform[pname]
                p = model.parameters['potential'][pname]

                this_flatchain[:, ii] = f(
                    np.squeeze(flatchain_dict['potential'][pname]))
                this_p0[:, ii] = f(np.squeeze(p0_dict['potential'][pname]))
                this_truths.append(f(p.truth))
                this_extents.append((f(p._prior.a), f(p._prior.b)))

                print(pname, np.median(this_flatchain[:, ii]),
                      np.std(this_flatchain[:, ii]))

            fig = triangle.corner(this_p0,
                                  point_kwargs=dict(color='#2b8cbe',
                                                    alpha=0.1),
                                  hist_kwargs=dict(color='#2b8cbe',
                                                   alpha=0.75,
                                                   normed=True,
                                                   bins=50),
                                  plot_contours=False)

            fig = triangle.corner(
                this_flatchain,
                fig=fig,
                truths=this_truths,
                labels=[_label_map[k] for k in potential_group.keys()],
                extents=this_extents,
                point_kwargs=dict(color='k', alpha=1.),
                hist_kwargs=dict(color='k', alpha=0.75, normed=True, bins=50))
            fig.savefig(
                os.path.join(output_path, "potential.{}".format(plot_ext)))

            flatchains['potential'] = this_flatchain

        nparticles = model.true_particles.nparticles
        if particles_group and len(particles_group) > 1:
            for jj in range(nparticles):
                this_flatchain = np.zeros(
                    (len(thin_flatchain), len(particles_group)))
                this_p0 = np.zeros((len(p0), len(particles_group)))
                this_truths = []
                this_extents = None
                for ii, pname in enumerate(particles_group.keys()):
                    f = _unit_transform[pname]
                    p = model.parameters['particles'][pname]

                    this_flatchain[:, ii] = f(
                        np.squeeze(flatchain_dict['particles'][pname][:, jj]))
                    this_p0[:, ii] = f(
                        np.squeeze(p0_dict['particles'][pname][:, jj]))
                    this_truths.append(f(p.truth[jj]))
                    #this_extents.append((f(p._prior.a), f(p._prior.b)))

                fig = triangle.corner(this_p0,
                                      point_kwargs=dict(color='#2b8cbe',
                                                        alpha=0.1),
                                      hist_kwargs=dict(color='#2b8cbe',
                                                       alpha=0.75,
                                                       normed=True,
                                                       bins=50),
                                      plot_contours=False)

                fig = triangle.corner(
                    this_flatchain,
                    fig=fig,
                    truths=this_truths,
                    labels=[_label_map[k] for k in particles_group.keys()],
                    extents=this_extents,
                    point_kwargs=dict(color='k', alpha=1.),
                    hist_kwargs=dict(color='k',
                                     alpha=0.75,
                                     normed=True,
                                     bins=50))
                fig.savefig(
                    os.path.join(output_path,
                                 "particle{}.{}".format(jj, plot_ext)))

        # plot the posterior for the satellite parameters
        if satellite_group and len(satellite_group) > 1:
            jj = 0
            this_flatchain = np.zeros(
                (len(thin_flatchain), len(satellite_group)))
            this_p0 = np.zeros((len(p0), len(satellite_group)))
            this_truths = []
            this_extents = None
            for ii, pname in enumerate(satellite_group.keys()):
                f = _unit_transform[pname]
                p = model.parameters['satellite'][pname]

                this_flatchain[:, ii] = f(
                    np.squeeze(flatchain_dict['satellite'][pname][:, jj]))
                this_p0[:,
                        ii] = f(np.squeeze(p0_dict['satellite'][pname][:, jj]))
                try:
                    this_truths.append(f(p.truth[jj]))
                except:  # IndexError:
                    this_truths.append(f(p.truth))
                #this_extents.append((f(p._prior.a), f(p._prior.b)))

            fig = triangle.corner(this_p0,
                                  point_kwargs=dict(color='#2b8cbe',
                                                    alpha=0.1),
                                  hist_kwargs=dict(color='#2b8cbe',
                                                   alpha=0.75,
                                                   normed=True,
                                                   bins=50),
                                  plot_contours=False)

            fig = triangle.corner(
                this_flatchain,
                fig=fig,
                truths=this_truths,
                labels=[_label_map[k] for k in satellite_group.keys()],
                extents=this_extents,
                point_kwargs=dict(color='k', alpha=1.),
                hist_kwargs=dict(color='k', alpha=0.75, normed=True, bins=50))
            fig.savefig(
                os.path.join(output_path, "satellite.{}".format(plot_ext)))

            flatchains['satellite'] = this_flatchain

        if flatchains.has_key('potential') and flatchains.has_key('satellite'):
            this_flatchain = np.hstack(
                (flatchains['potential'], flatchains['satellite']))
            labels = [
                _label_map[k]
                for k in potential_group.keys() + satellite_group.keys()
            ]
            fig = triangle.corner(this_flatchain,
                                  labels=labels,
                                  point_kwargs=dict(color='k', alpha=1.),
                                  hist_kwargs=dict(color='k',
                                                   alpha=0.75,
                                                   normed=True,
                                                   bins=50))
            fig.savefig(
                os.path.join(output_path, "suck-it-up.{}".format(plot_ext)))
Esempio n. 37
0
for i in range(hyper.shape[1]):
    pl.clf()
    pl.plot(hyper[:, i])
    pl.savefig(os.path.join(bp, "time-hyper-{0:03d}.png".format(i)))

pl.clf()
pl.plot(lnprob)
pl.savefig(os.path.join(bp, "time-lnprob.png"))

nstar = 42557.0
ntot = 200000
samples = samples[-ntot:, :]  # [::50, :]

# Reformat the samples and save the samples.
thin_by = int(np.min(integrated_time(samples, axis=0)))
thinned = samples[::thin_by, :]
grids = thinned.reshape((len(thinned), pop.shape[0], pop.shape[1]))
print(grids.shape)
print([b.shape for b in pop.bins])


def xmap(f, i):
    return (f(*x) for x in i)


print("Hyper:")

h_mu = np.mean(hyper[-ntot:][::thin_by, :2], axis=0)
h_std = np.std(hyper[-ntot:][::thin_by, :2], axis=0)
print("\n".join(xmap("{0} ± {1}".format, zip(h_mu, h_std))))
Esempio n. 38
0
def autocorr_plot(outfile, skip_step=100, **kwargs):
    """Autocorrelation plots.
    0 is good, 1 is bad
    extra keyword arguments get passed into emcee.autocorr.intergrated_time
    
    Parameters
    ----------
    outfile : str
        hdf5 file name
    skip_step : int, optional
        number of steps to skip to thin the flattened chain

    Returns
    -------
    fig : matplotlib.figure.Figure
    axarr : ndarray
        2d array of matplotlib.axes._subplots.AxesSubplot instances
    """
    chain = io.read_dataset(outfile, "chain")
    model = io.read_model(outfile)
    nwalkers, niterations, ndim = chain.shape
    assert ndim == len(model.params)
    labels = []
    for i, name in enumerate(model.params.names):
        if name in label_map:
            labels.append(label_map[name])
        else:
            labels.append(name)
    # lower integrated autocorrelation times are better
    flatchain = chain.reshape((-1, ndim))
    nsamples = flatchain.shape[0]
    acorr = autocorr_function(flatchain)
    try:
        acorr_times = integrated_time(flatchain, **kwargs)
    except:
        acorr_times = np.zeros(ndim)
    n = skip_step
    steps = n * np.arange(1, flatchain.shape[0] / n + 1)

    ncols = int(np.sqrt(ndim))
    nrows = int(np.ceil(ndim / ncols))
    label_str = r'$\sqrt{\tau_\mathrm{int} / n} = $'
    fig, axarr = plt.subplots(nrows,
                              ncols,
                              sharex="col",
                              sharey="row",
                              figsize=(4.8 * ncols, 2.4 * nrows))
    # fig.tight_layout()
    for i in range(ndim):
        col = i % ncols
        row = int(np.floor((i - col) / ncols))
        unc = np.sqrt(acorr_times[i] / nsamples)
        axarr[row][col].plot(steps, acorr[::n, i], alpha=0.5)
        axarr[row][col].annotate(labels[i],
                                 xy=(0.1, 0.7),
                                 xycoords="axes fraction",
                                 bbox={
                                     "fc": "w",
                                     "ec": "k",
                                     "pad": 4.0,
                                     "alpha": 0.5
                                 })
        axarr[row][col].annotate(label_str + '{:.1e}'.format(unc),
                                 xy=(0.5, 0.7),
                                 xycoords="axes fraction",
                                 fontsize=10,
                                 bbox={
                                     "fc": "w",
                                     "ec": "k",
                                     "pad": 4.0,
                                     "alpha": 0.5
                                 })
    for col in range(ncols):
        axarr[-1][col].set_xlabel('Iterations')
    return fig, axarr
Esempio n. 39
0
    def MCMC(self,
             niter=500,
             nburn=200,
             nwalkers=200,
             threads=1,
             fit_partial=False,
             width=3,
             savedir=None,
             refit=False,
             thin=10,
             conf=0.95,
             maxslope=MAXSLOPE,
             debug=False,
             p0=None):
        """
        Fit transit signal to trapezoid model using MCMC

        .. note:: As currently implemented, this method creates a
            bunch of attributes relevant to the MCMC fit; I plan
            to refactor this to define those attributes as properties
            so as not to have their creation hidden away here.  I plan
            to refactor how this works.
        """
        if fit_partial:
            wok = np.where((np.absolute(self.ts - self.center) <
                            (width * self.dur)) & ~np.isnan(self.fs))
        else:
            wok = np.where(~np.isnan(self.fs))

        if savedir is not None:
            if not os.path.exists(savedir):
                os.mkdir(savedir)

        alreadydone = True
        alreadydone &= savedir is not None
        alreadydone &= os.path.exists('%s/ts.npy' % savedir)
        alreadydone &= os.path.exists('%s/fs.npy' % savedir)

        if savedir is not None and alreadydone:
            ts_done = np.load('%s/ts.npy' % savedir)
            fs_done = np.load('%s/fs.npy' % savedir)
            alreadydone &= np.all(ts_done == self.ts[wok])
            alreadydone &= np.all(fs_done == self.fs[wok])

        if alreadydone and not refit:
            logging.info('MCMC fit already done for %s.  Loading chains.' %
                         self.name)
            Ts = np.load('%s/duration_chain.npy' % savedir)
            ds = np.load('%s/depth_chain.npy' % savedir)
            slopes = np.load('%s/slope_chain.npy' % savedir)
            tcs = np.load('%s/tc_chain.npy' % savedir)
        else:
            logging.info(
                'Fitting data to trapezoid shape with MCMC for %s....' %
                self.name)
            if p0 is None:
                p0 = self.trapfit.copy()
                p0[0] = np.absolute(p0[0])
                if p0[2] < 2:
                    p0[2] = 2.01
                if p0[1] < 0:
                    p0[1] = 1e-5
            logging.debug('p0 for MCMC = {}'.format(p0))
            sampler = traptransit_MCMC(self.ts[wok],
                                       self.fs[wok],
                                       self.dfs[wok],
                                       niter=niter,
                                       nburn=nburn,
                                       nwalkers=nwalkers,
                                       threads=threads,
                                       p0=p0,
                                       return_sampler=True,
                                       maxslope=maxslope)

            Ts, ds, slopes, tcs = (sampler.flatchain[:,
                                                     0], sampler.flatchain[:,
                                                                           1],
                                   sampler.flatchain[:,
                                                     2], sampler.flatchain[:,
                                                                           3])

            self.sampler = sampler
            if savedir is not None:
                np.save('%s/duration_chain.npy' % savedir, Ts)
                np.save('%s/depth_chain.npy' % savedir, ds)
                np.save('%s/slope_chain.npy' % savedir, slopes)
                np.save('%s/tc_chain.npy' % savedir, tcs)
                np.save('%s/ts.npy' % savedir, self.ts[wok])
                np.save('%s/fs.npy' % savedir, self.fs[wok])

        if debug:
            print(Ts)
            print(ds)
            print(slopes)
            print(tcs)

        N = len(Ts)
        try:
            self.Ts_acor = integrated_time(Ts)
            self.ds_acor = integrated_time(ds)
            self.slopes_acor = integrated_time(slopes)
            self.tcs_acor = integrated_time(tcs)
            self.fit_converged = True
        except AutocorrError:
            self.fit_converged = False

        ok = (Ts > 0) & (ds > 0) & (slopes > 0) & (slopes < self.maxslope)
        logging.debug('trapezoidal fit has {} good sample points'.format(
            ok.sum()))
        if ok.sum() == 0:
            if (Ts > 0).sum() == 0:
                #logging.debug('{} points with Ts > 0'.format((Ts > 0).sum()))
                logging.debug('{}'.format(Ts))
                raise MCMCError('{}: 0 points with Ts > 0'.format(self.name))
            if (ds > 0).sum() == 0:
                #logging.debug('{} points with ds > 0'.format((ds > 0).sum()))
                logging.debug('{}'.format(ds))
                raise MCMCError('{}: 0 points with ds > 0'.format(self.name))
            if (slopes > 0).sum() == 0:
                #logging.debug('{} points with slopes > 0'.format((slopes > 0).sum()))
                logging.debug('{}'.format(slopes))
                raise MCMCError('{}: 0 points with slopes > 0'.format(
                    self.name))
            if (slopes < self.maxslope).sum() == 0:
                #logging.debug('{} points with slopes < maxslope ({})'.format((slopes < self.maxslope).sum(),self.maxslope))
                logging.debug('{}'.format(slopes))
                raise MCMCError('{} points with slopes < maxslope ({})'.format(
                    (slopes < self.maxslope).sum(), self.maxslope))

        durs, deps, logdeps, slopes = (Ts[ok], ds[ok], np.log10(ds[ok]),
                                       slopes[ok])

        inds = (np.arange(len(durs) / thin) * thin).astype(int)
        durs, deps, logdeps, slopes = (durs[inds], deps[inds], logdeps[inds],
                                       slopes[inds])

        self.durs, self.deps, self.logdeps, self.slopes = (durs, deps, logdeps,
                                                           slopes)

        self._make_kde(conf=conf)

        self.hasMCMC = True
Esempio n. 40
0
def convergenceVals(algor, ndim, varIdxs, chains_nruns, bi_steps):
    """
    Convergence statistics.
    """
    if algor == 'emcee':
        from emcee import autocorr

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        if algor == 'ptemcee':
            # Mean Tau across chains, shape: (post-bi steps, ndims)
            x = np.mean(chains_nruns.T, axis=1).T
            tau_autocorr = []
            j = 10  # Here in case the line below is skipped
            for j in np.arange(50, x.shape[0], 50):
                # tau.shape: ndim
                tau = util.autocorr_integrated_time(x[:j])
                # Autocorrelation time. Mean across dimensions.
                tau_autocorr.append([bi_steps + j, np.mean(tau)])
            # Add one last point with the entire chain.
            if j < x.shape[0]:
                tau = util.autocorr_integrated_time(x)
                tau_autocorr.append([bi_steps + x.shape[0], np.mean(tau)])
            tau_autocorr = np.array(tau_autocorr).T
        elif algor == 'emcee':
            tau_autocorr = None

        # Autocorrelation time for each parameter, mean across chains.
        if algor == 'emcee':
            acorr_t = autocorr.integrated_time(chains_nruns, tol=0, quiet=True)
        elif algor == 'ptemcee':
            x = np.mean(chains_nruns.transpose(1, 0, 2), axis=0)
            acorr_t = util.autocorr_integrated_time(x)

        # Autocorrelation time for each chain for each parameter.
        logger = logging.getLogger()
        logger.disabled = True
        at = []
        # For each parameter/dimension
        for p in chains_nruns.T:
            at_p = []
            # For each chain for this parameter/dimension
            for c in p:
                if algor == 'emcee':
                    at_p.append(autocorr.integrated_time(c, quiet=True)[0])
                elif algor == 'ptemcee':
                    at_p.append(util.autocorr_integrated_time(c))
            at.append(at_p)
        logger.disabled = False
        # IAT for all chains and all parameters.
        all_taus = [item for subl in at for item in subl]

        # # Worst chain: chain with the largest acorr time.
        # max_at_c = [np.argmax(a) for a in at]
        # # Best chain: chain with the smallest acorr time.
        # min_at_c = [np.argmin(a) for a in at]
        # Chain with the closest IAT to the median
        med_at_c = [np.argmin(np.abs(np.median(a) - a)) for a in at]

        # Mean Geweke z-scores and autocorrelation functions for all chains.
        geweke_z, acorr_function = [[] for _ in range(ndim)],\
            [[] for _ in range(ndim)]
        for i, p in enumerate(chains_nruns.T):
            for c in p:
                try:
                    geweke_z[i].append(geweke(c))
                except ZeroDivisionError:
                    geweke_z[i].append([np.nan, np.nan])
                try:
                    if algor == 'emcee':
                        acorr_function[i].append(autocorr.function_1d(c))
                    elif algor == 'ptemcee':
                        acorr_function[i].append(util.autocorr_function(c))
                except FloatingPointError:
                    acorr_function[i].append([np.nan])
        # Mean across chains
        geweke_z = np.nanmean(geweke_z, axis=1)
        acorr_function = np.nanmean(acorr_function, axis=1)

        # # Cut the autocorrelation function just after *all* the parameters
        # # have crossed the zero line.
        # try:
        #     lag_zero = max([np.where(_ < 0)[0][0] for _ in acorr_function])
        # except IndexError:
        #     # Could not obtain zero lag
        #     lag_zero = acorr_function.shape[-1]
        # acorr_function = acorr_function[:, :int(lag_zero + .2 * lag_zero)]

        # # Approx IAT
        # lag_iat = 1. + 2. * np.sum(acorr_function, axis=1)
        # print("  Approx (zero lag) IAT: ", lag_iat)

        # Effective Sample Size (per param) = (nsteps / tau) * nchains
        mcmc_ess = (chains_nruns.shape[0] / acorr_t) * chains_nruns.shape[1]

        # TODO fix this function
        # # Minimum effective sample size (ESS), and multi-variable ESS.
        # minESS, mESS = fminESS(ndim), multiESS(chains_nruns)
        # # print("mESS: {}".format(mESS))
        # mESS_epsilon = [[], [], []]
        # for alpha in [.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95]:
        #     mESS_epsilon[0].append(alpha)
        #     mESS_epsilon[1].append(fminESS(ndim, alpha=alpha, ess=minESS))
        #     mESS_epsilon[2].append(fminESS(ndim, alpha=alpha, ess=mESS))

    return tau_autocorr, acorr_t, med_at_c, all_taus, geweke_z,\
        acorr_function, mcmc_ess
Esempio n. 41
0
def test_nd(seed=1234, ndim=3, N=150000):
    x = get_chain(seed=seed, ndim=ndim, N=N)
    tau = integrated_time(x)
    assert np.all(np.abs(tau - 19.0) / 19. < 0.2)
Esempio n. 42
0
def test_too_short(seed=1234, ndim=3, N=100):
    x = get_chain(seed=seed, ndim=ndim, N=N)
    with pytest.raises(AutocorrError):
        integrated_time(x)
    tau = integrated_time(x, quiet=True)  # NOQA
Esempio n. 43
0
        pl.plot(lc[0], lc[1], ".", ms=3)
    pl.savefig("raw_data.png")

    # Set up the initial system.
    system = transit.System(transit.Central(radius=0.95))
    planet = transit.Body(r=2.03 * 0.01, period=period, t0=t0, b=0.9)
    system.add_body(planet)
    texp = kplr.EXPOSURE_TIMES[1] / 60. / 60. / 24.
    mean_function = partial(system.light_curve, texp=texp)

    # Set up the Gaussian processes.
    pl.clf()
    offset = 0.001
    models = []
    for i, lc in enumerate(light_curves):
        dt = np.median(np.diff(lc[0])) * integrated_time(lc[1])
        kernel = np.var(lc[1]) * kernels.Matern32Kernel(dt**2)
        gp = george.GP(kernel, mean=mean_function, solver=george.HODLRSolver)
        gp.compute(lc[0], lc[2])
        models.append((gp, lc[1]))

        t = (lc[0] - t0 + hp) % period - hp
        pl.plot(t, lc[1] + i * offset, ".k", ms=3)
        pl.plot(t, gp.predict(lc[1], lc[0], mean_only=True) + i * offset, "b")

    pl.savefig("initial.png")
    pl.xlim(-5, 5)
    pl.savefig("initial_zoom.png")

    model = ProbabilisticModel(system, planet, models)
    p0 = model.get_parameters()