def contour_transform(x0, x1, joint_pdf):
    """
    Convert a 2D likelihood contour from modified to original space
    along with marginalized statistics

    ** THERE IS A BUG IN THIS CODE - DON'T DO SPLINE INTERPOLATION OVER
    CHI2 CONTOUTS, INSTEAD DO THEM OVER THE LIKELIHOOD SPACE **

    Paramters:
    x0 : grid points in first dimension
    x1 : grid points in second dimension
    joint_pdf : posterior probability over the grid
    """
    mu_x0, sig_x0, mu_x1, sig_x1 = marg_estimates(x0, x1, joint_pdf)

    # Convert the convert to original space
    corners = np.array([[mu_x0 - 5 * sig_x0, mu_x1 - 5 * sig_x1],
                        [mu_x0 - 5 * sig_x0, mu_x1 + 5 * sig_x1],
                        [mu_x0 + 5 * sig_x0, mu_x1 - 5 * sig_x1],
                        [mu_x0 + 5 * sig_x0, mu_x1 + 5 * sig_x1]])

    extents = xfm(corners, shift, tilt, dir='down')

    extent_t0 = [extents[:, 0].min(), extents[:, 0].max()]
    extent_gamma = [extents[:, 1].min(), extents[:, 1].max()]

    # suitable ranges for spline interpolation in modified space
    range_stats = np.array([
        mu_x0 - 5 * sig_x0, mu_x0 + 5 * sig_x0, mu_x1 - 5 * sig_x1,
        mu_x1 + 5 * sig_x1
    ])

    x0_line, x1_line = x0[:, 0], x1[0]
    mask_x0 = np.where((x0_line > range_stats[0])
                       & (x0_line < range_stats[1]))[0]
    mask_x1 = np.where((x1_line > range_stats[2])
                       & (x1_line < range_stats[3]))[0]

    # create a rectbivariate spline in the modified space
    _b = RectBivariateSpline(x0_line[mask_x0], x1_line[mask_x1],
                             cts(joint_pdf[mask_x0[:, None], mask_x1]))

    # Rectangular grid in original space
    tau0, gamma = np.mgrid[extent_t0[0]:extent_t0[1]:250j,
                           extent_gamma[0]:extent_gamma[1]:250j]

    _point_orig = np.vstack([tau0.ravel(), gamma.ravel()]).T
    _grid_in_mod = xfm(_point_orig, shift, tilt, dir='up')

    values_orig = _b.ev(_grid_in_mod[:, 0], _grid_in_mod[:, 1])
    values_orig = values_orig.reshape(tau0.shape)

    return tau0, gamma, values_orig
def addLogs(fname,
            npix=200,
            sfx_lst=None,
            mod_ax=None,
            get_est=False,
            basis='orig',
            orig_ax=None,
            orig_space=True,
            mycolor='k',
            plotit=False,
            mylabel='temp',
            ls='solid',
            individual=False,
            save=True,
            model=False,
            force=False,
            plot_marg=True,
            **kwargs):
    """
    Plots the log-likelihood surface for each skewer in a given folder

    Parameters:
    -----------
        fname : the path to the folder containing the files
        npix : # of grid points in modified space
        suffix_list : indices of the skewers to plot, None for all
        mod_ax : axes over which to draw the contours in modified space
        orig_ax : axes over which to draw the contours in original space
        orig_space : do conversions to original space?
        mycolor : edgecolor of the joint pdf contour (JPC)
        mylabel : label of the JPC
        ls : linestyle of JPC
        individual : whether to draw contours for individual skewers

    Returns:
    --------
        None
    """

    import glob
    import os
    from scipy.interpolate import RectBivariateSpline

    if not os.path.exists(fname):
        print('Oops! There is no such folder')
        return None

    currdir = os.getcwd()
    os.chdir(fname)

    try:
        if get_est:
            if basis == "orig":
                n_dim = 3
                e_lst = glob.glob('tg_est*')
                labels = [r"$f_0$", r"$\ln \tau_0$", r"$\gamma$"]
            else:
                n_dim = 2
                e_lst = glob.glob('xx_est*')
                labels = [r"$x_0$", r"$x_1$"]

            sfx = []

            # 2. pull the names from the files and read the data
            e_cube = np.empty((len(e_lst), n_dim, 3))
            for ct, ele in enumerate(e_lst):
                temp = str.split(ele, '_')
                sfx.append(int(temp[2][:-4]))
                e_cube[ct] = np.loadtxt(ele)

            # Sort the data according to the skewer index
            e_cube = np.array([ele for _, ele in sorted(zip(sfx, e_cube))])
            sfx = np.array(sfx)
            sfx.sort()

            # Plotting
            if plotit:
                fig, axs = plt.subplots(nrows=n_dim,
                                        sharex=True,
                                        figsize=(9, 5))

                for i in range(n_dim):
                    axs[i].errorbar(sfx,
                                    e_cube[:, i, 0],
                                    yerr=[e_cube[:, i, 2], e_cube[:, i, 1]],
                                    fmt='.-',
                                    color='k',
                                    lw=0.6)
                    axs[i].set_ylabel(labels[i])
                plt.tight_layout()
                plt.show()

            # Best-fit after modeling correlation matrix
            results = None
            if model:
                # Cannot handle assymetric errorbars - so take the average
                err0 = (e_cube[:, -2, 1] + e_cube[:, -2, 2]) / 2.

                res0 = utils.get_corrfunc(e_cube[:, -2, 0],
                                          err0,
                                          model=True,
                                          est=True,
                                          sfx="x0_corr",
                                          scale_factor=2.24,
                                          viz=False)

                print("W/o correlations: %.5f pm %.5f" % (res0[0], res0[1]))
                print("With correlations: %.5f pm %.5f" % (res0[2], res0[3]))

                err1 = (e_cube[:, -1, 1] + e_cube[:, -1, 2]) / 2.
                res1 = utils.get_corrfunc(e_cube[:, -1, 0],
                                          err1,
                                          model=True,
                                          est=True,
                                          sfx="x1_corr",
                                          scale_factor=3.4,
                                          viz=False)

                print("W/o correlations: %.5f pm %.5f" % (res1[0], res1[1]))
                print("With correlations: %.5f pm %.5f" % (res1[2], res1[3]))

                results = [res0, res1]

            os.chdir(currdir)
            return sfx, e_cube, results

        # Joint PDF from the combined likelihoods
        # Read data from the files
        if not os.path.isfile('joint_pdf.dat') or force:
            f_lst = glob.glob('gridlnlike_*')

            d_cube = np.empty((len(f_lst), npix, npix))

            # Read the skewer number from file itself for now
            sfx = []
            for ct, ele in enumerate(f_lst):
                d_cube[ct] = np.loadtxt(ele)

                temp = str.split(ele, '_')
                sfx.append(int(temp[1][:-4]))

            # sort the data for visualization
            d_cube = np.array([ele for _, ele in sorted(zip(sfx, d_cube))])

            sfx = np.array(sfx)
            sfx.sort()

            # choose a specific subset of the skewers
            if sfx_lst is not None:
                ind = [(ele in sfx_lst) for ele in sfx]
                d_cube = d_cube[ind]
                sfx = sfx[ind]

            # joint pdf #######################################################
            joint_pdf = d_cube.sum(0)
            joint_pdf -= joint_pdf.max()
            if save:
                np.savetxt('joint_pdf.dat', joint_pdf)
        else:
            print("****** File already exists. Reading from it *******")
            joint_pdf = np.loadtxt('joint_pdf.dat')

        # simple point statistics in modified space
        if mod_ax is None:
            fig, mod_ax = plt.subplots(1)

        print("Modified space estimates:")
        res = utils.marg_estimates(x0_line,
                                   x1_line,
                                   joint_pdf,
                                   mod_ax,
                                   plot_marg,
                                   labels=["x_0", "x_1"],
                                   **kwargs)
        mu_x0, sig_x0, mu_x1, sig_x1, _ = res

        # Plotting individual + joint contour in likelihood space
        if individual:
            colormap = plt.cm.rainbow
            colors = [colormap(i) for i in np.linspace(0, 1, len(sfx))]
            for i in range(len(sfx)):
                CS = mod_ax.contour(x0,
                                    x1,
                                    cts(d_cube[i]),
                                    levels=[
                                        0.68,
                                    ],
                                    colors=(colors[i], ))
                CS.collections[0].set_label(sfx[i])

        mod_ax.legend(loc='upper center', ncol=6)
        mod_ax.set_xlabel('$x_0$')
        mod_ax.set_ylabel('$x_1$')

        # 1. Find the appropriate ranges in tau0-gamma space
        corners = np.array([[mu_x0 - 5 * sig_x0, mu_x1 - 5 * sig_x1],
                            [mu_x0 - 5 * sig_x0, mu_x1 + 5 * sig_x1],
                            [mu_x0 + 5 * sig_x0, mu_x1 - 5 * sig_x1],
                            [mu_x0 + 5 * sig_x0, mu_x1 + 5 * sig_x1]])
        extents = utils.xfm(corners, shift, tilt, direction='down')

        extent_t0 = [extents[:, 0].min(), extents[:, 0].max()]
        extent_gamma = [extents[:, 1].min(), extents[:, 1].max()]

        # suitable ranges for spline interpolation in modified space
        range_stats = np.array([
            mu_x0 - 5 * sig_x0, mu_x0 + 5 * sig_x0, mu_x1 - 5 * sig_x1,
            mu_x1 + 5 * sig_x1
        ])

        mask_x0 = np.where((x0_line > range_stats[0])
                           & (x0_line < range_stats[1]))[0]
        mask_x1 = np.where((x1_line > range_stats[2])
                           & (x1_line < range_stats[3]))[0]

        # create a rectbivariate spline in the modified space logP
        _b = RectBivariateSpline(x0_line[mask_x0], x1_line[mask_x1],
                                 joint_pdf[mask_x0[:, None], mask_x1])

        # Rectangular grid in original space
        _tau0, _gamma = np.mgrid[extent_t0[0]:extent_t0[1]:500j,
                                 extent_gamma[0]:extent_gamma[1]:501j]

        _point_orig = np.vstack([_tau0.ravel(), _gamma.ravel()]).T
        _grid_in_mod = utils.xfm(_point_orig, shift, tilt, direction='up')

        values_orig = _b.ev(_grid_in_mod[:, 0], _grid_in_mod[:, 1])
        values_orig = values_orig.reshape(_tau0.shape)

        # Best fit + statistical errors
        print("Original space estimates:")
        if orig_ax is None:
            fig, orig_ax = plt.subplots(1)
        utils.marg_estimates(_tau0[:, 0],
                             _gamma[0],
                             values_orig,
                             orig_ax,
                             plot_marg,
                             labels=[r"\ln \tau_0", "\gamma"],
                             **kwargs)

        plt.show()
        os.chdir(currdir)
        return res
    except Exception:
        os.chdir(currdir)
        raise
def mcmcSkewer(bundleObj,
               logdef=3,
               binned=False,
               niter=2500,
               do_mcmc=True,
               return_sampler=False,
               evalgrid=True,
               in_axes=None,
               viz=False,
               VERBOSITY=False,
               seed=None,
               truths=[0.002, 3.8]):
    """
    Script to fit simple flux model on each restframe wavelength skewer

    Parameters:
    -----------
        bundleObj : A list of [z, f, ivar] with the skewer_index
        logdef : Which model to use
        niter : The number of iterations to run the mcmc (40% for burn-in)
        do_mcmc : Flag whether to perform mcmc
        plt_pts : Plot the data along with best fit from scipy and mcmc
        return_sampler : Whether to return the raw sampler  without flatchaining
        triangle : Display triangle plot of the parameters
        evalgrid : Whether to compute loglikelihood on a specified grid
        in_axes : axes over which to draw the plots
        xx_viz : draw marginalized contour in modifed space
        VERBOSITY : print extra information
        seed : how to seed the random state
        truths : used with logdef=4, best-fit values of tau0 and gamma

    Returns:
        mcmc_chains if return_sampler, else None
    """

    z, f, ivar = bundleObj[0].T

    ind = (ivar > 0) & (np.isfinite(f))
    z, f, sigma = z[ind], f[ind], 1.0 / np.sqrt(ivar[ind])
    # -------------------------------------------------------------------------
    # continuum flux estimate given a value of (tau0, gamma)
    if logdef == 4:
        if VERBOSITY:
            print('Continuum estimates using optical depth parameters:',
                  truths)
        chisq4 = lambda *args: -outer(*truths)(*args)

        opt_res = minimize(chisq4,
                           1.5,
                           args=(z, f, sigma),
                           method='Nelder-Mead')
        return opt_res['x']

    if VERBOSITY:
        print('Carrying analysis for skewer', bundleObj[1])

    if logdef == 1:
        nll, names, labels, guess = chisq1, names1, labels1, guess1
        ndim, kranges, lnlike = 4, kranges1, lnlike1

    elif logdef == 2:
        nll, names, labels, guess = chisq2, names2, labels2, guess2
        ndim, kranges, lnlike = 5, kranges2, lnlike2

    elif logdef == 3:
        nll, names, labels, guess = chisq3, names3, labels3, guess3
        ndim, kranges, lnlike = 3, kranges3, lnlike3

    # Try to fit with scipy optimize routine
    opt_res = minimize(nll, guess, args=(z, f, sigma), method='Nelder-Mead')
    print('Scipy optimize results:')
    print('Success =', opt_res['success'], 'params =', opt_res['x'], '\n')

    if viz:
        if in_axes is None:
            fig, in_axes = plt.subplots(1)
        in_axes.errorbar(z, f, sigma, fmt='o', color='gray', alpha=0.2)
        in_axes.plot(
            zline, opt_res['x'][0] * np.exp(-np.exp(opt_res['x'][1]) *
                                            (1 + zline)**opt_res['x'][2]))

    if binned:
        mu = binned_statistic(z, f, bins=binx).statistic
        sig = binned_statistic(z, f, bins=binx, statistic=sig_func).statistic

        ixs = sig > 0
        z, f, sigma = centers[ixs], mu[ixs], sig[ixs]

        if viz:
            in_axes.errorbar(z, f, sigma, fmt='o', color='r')

        nll, names, labels, guess = lsq, names3, labels3, guess3
        ndim, kranges, lnlike = 3, kranges3, simpleln

    # --------------------------------------------------------------------------
    if do_mcmc:
        np.random.seed()

        nwalkers = 100
        p0 = [guess + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)]

        # configure the sampler
        sampler = emcee.EnsembleSampler(nwalkers,
                                        ndim,
                                        lnlike,
                                        args=(z, f, sigma))

        # burn-in time - Is this enough?
        p0, __, __ = sampler.run_mcmc(p0, 500)
        sampler.reset()

        # Production step
        sampler.run_mcmc(p0, niter)
        print("Burn-in and production completed \n")

        if return_sampler:
            return sampler.chain
        else:
            # pruning 40 percent of the samples as extra burn-in
            lInd = int(niter * 0.4)
            samps = sampler.chain[:, lInd:, :].reshape((-1, ndim))

            # using percentiles as confidence intervals
            CenVal = np.median(samps, axis=0)

            # print BIC at the best estimate point, BIC = - 2 * ln(L_0) + k ln(n)
            print('CHISQ_R', -2 * lnlike(CenVal, z, f, sigma) / (len(z) - 3))
            print('BIC:',
                  -2 * lnlike(CenVal, z, f, sigma) + ndim * np.log(len(z)))

            # Rotate the points to the other basis and 1D estimates
            # and write them to the file

            # Format : center, top error, bottom error
            tg_est = list(
                map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
                    zip(*np.percentile(samps, [16, 50, 84], axis=0))))

            xx = xfm(samps[:, 1:], shift, tilt, direction='up')
            xx_est = list(
                map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
                    zip(*np.percentile(xx, [16, 50, 84], axis=0))))

            f_name2 = 'tg_est_' + str(bundleObj[1]) + '.dat'
            np.savetxt(f_name2, tg_est)
            f_name3 = 'xx_est_' + str(bundleObj[1]) + '.dat'
            np.savetxt(f_name3, xx_est)

            if viz:
                in_axes.plot(
                    zline, CenVal[0] * np.exp(-np.exp(CenVal[1]) *
                                              (1 + zline)**CenVal[2]), '-g')

            # instantiate a getdist object
            MC = MCSamples(samples=samps,
                           names=names,
                           labels=labels,
                           ranges=kranges)

            # MODIFY THIS TO BE PRETTIER
            if viz:
                g = plots.getSubplotPlotter()
                g.triangle_plot(MC)

            # Evaluate the pdf on a rotated grid for better estimation
            if evalgrid:
                print('Evaluating on the grid specified \n')
                pdist = MC.get2DDensity('t0', 'gamma')

                # Evalaute density on a grid
                pgrid = np.array([pdist.Prob(*ele) for ele in modPos])
                # Prune to remove negative densities
                pgrid[pgrid < 0] = 1e-50

                # Convert to logLikelihood
                logP = np.log(pgrid)
                logP -= logP.max()
                logP = logP.reshape(x0.shape)

                # Visualize the contour in modified space per skewer
                if viz:
                    fig, ax2 = plt.subplots(1)
                    ax2.contour(x0,
                                x1,
                                cts(logP),
                                levels=[
                                    0.683,
                                    0.955,
                                ],
                                colors='k')
                    ax2.axvline(xx_est[0][0] + xx_est[0][1])
                    ax2.axvline(xx_est[0][0] - xx_est[0][2])
                    ax2.axhline(xx_est[1][0] + xx_est[1][1])
                    ax2.axhline(xx_est[1][0] - xx_est[1][2])
                    ax2.set_xlabel(r'$x_0$')
                    ax2.set_ylabel(r'$x_1$')
                    plt.show()

                # fileName1: the log-probability evaluated in the tilted grid
                f_name1 = 'gridlnlike_' + str(bundleObj[1]) + '.dat'
                np.savetxt(f_name1, logP)
Exemple #4
0
def marg_estimates(xx, yy, logL, levels=None, par_labels=["x_0", "x_1"],
                   ax=None, plot_marg=True, label='temp', **kwargs):
    """
    Marginalized statistics that follows from a jont likelihood.
    Simple mean and standard deviation estimates.

    Parameters:
        x0 : vector in x-direction of the grid
        x1 : vector in y-direction of the grid
        joint_pdf : posterior log probability on the 2D grid

    Returns:
        [loc_x0, sig_x0, loc_x1, sig_x1, sig_x0_x1]
    """
    if levels is None:
        levels = [0.683, 0.955]

    pdf = np.exp(logL)

    # normalize the pdf too --> though not necessary for
    # getting mean and the standard deviation
    x0_pdf = np.sum(pdf, axis=1)
    x0_pdf /= x0_pdf.sum() * (xx[1] - xx[0])

    x1_pdf = np.sum(pdf, axis=0)
    x1_pdf /= x1_pdf.sum() * (yy[1] - yy[0])

    mu_x0 = (xx * x0_pdf).sum() / x0_pdf.sum()
    mu_x1 = (yy * x1_pdf).sum() / x1_pdf.sum()

    sig_x0 = np.sqrt((xx ** 2 * x0_pdf).sum() / x0_pdf.sum() - mu_x0 ** 2)
    sig_x1 = np.sqrt((yy ** 2 * x1_pdf).sum() / x1_pdf.sum() - mu_x1 ** 2)

    sig_x0_x1 = ((xx - mu_x0) * (yy[:, None] - mu_x1) * pdf.T).sum() / pdf.sum()

    print("param1 = %.4f pm %.4f" % (mu_x0, sig_x0))
    print("param2 = %.4f pm %.4f\n" % (mu_x1, sig_x1))

    if ax is None:
        ax = plt.axes()
    CS = ax.contour(xx, yy,  cts(logL.T),
                    levels=levels, label=label, **kwargs)
    CS.collections[0].set_label(label)

    ax.set_xlim(mu_x0 - 4 * sig_x0, mu_x0 + 4 * sig_x0)
    ax.set_ylim(mu_x1 - 4 * sig_x1, mu_x1 + 4 * sig_x1)

    if plot_marg:
        xx_extent = 8 * sig_x0
        yy_extent = 8 * sig_x1

        pdf_xx_ext = x0_pdf.max() - x0_pdf.min()
        pdf_yy_ext = x1_pdf.max() - x1_pdf.min()

        ax.plot(xx, 0.2 * (x0_pdf - x0_pdf.min()) * yy_extent / pdf_xx_ext
                + ax.get_ylim()[0])
        ax.axvline(mu_x0 - sig_x0)
        ax.axvline(mu_x0 + sig_x0)
        ax.plot(0.2 * (x1_pdf - x1_pdf.min()) * xx_extent / pdf_yy_ext +
                ax.get_xlim()[0], yy)
        ax.axhline(mu_x1 - sig_x1)
        ax.axhline(mu_x1 + sig_x1)

        plt.title(r"$%s = %.3f \pm %.3f, %s = %.3f \pm %.3f$" %
                  (par_labels[0], mu_x0, sig_x0, par_labels[1], mu_x1, sig_x1))
    plt.legend()
    plt.tight_layout()
    plt.show()

    return mu_x0, sig_x0, mu_x1, sig_x1, sig_x0_x1
Exemple #5
0
def marg_estimates(x0_line,
                   x1_line,
                   joint_pdf,
                   ax=None,
                   plot_marg=True,
                   labels=None):
    """
    Marginalized statistics that follows from a jont likelihood.
    Simple mean and standard deviation estimates.

    Parameters:
        x0 : vector in x-direction of the grid
        x1 : vector in y-direction of the grid
        joint_pdf : posterior log probability on the 2D grid

    Returns:
        [loc_x0, sig_x0, loc_x1, sig_x1]
    """
    x0_pdf = np.sum(np.exp(joint_pdf), axis=1)
    x0_pdf /= x0_pdf.sum() * (x0_line[1] - x0_line[0])
    x1_pdf = np.sum(np.exp(joint_pdf), axis=0)
    x0_pdf /= x0_pdf.sum() * (x0_line[1] - x0_line[0])

    mu_x0 = (x0_line * x0_pdf).sum() / x0_pdf.sum()
    mu_x1 = (x1_line * x1_pdf).sum() / x1_pdf.sum()
    sig_x0 = np.sqrt((x0_line**2 * x0_pdf).sum() / x0_pdf.sum() - mu_x0**2)
    sig_x1 = np.sqrt((x1_line**2 * x1_pdf).sum() / x1_pdf.sum() - mu_x1**2)

    print("param1 = %.4f pm %.4f" % (mu_x0, sig_x0))
    print("param2 = %.4f pm %.4f\n" % (mu_x1, sig_x1))

    if labels is None:
        labels = ["p0", "p1"]

    if ax is None:
        fig, ax = plt.subplots(1)
    ax.contour(x0_line,
               x1_line,
               cts(joint_pdf.T),
               colors=('k', ),
               levels=[0.668, 0.955])
    ax.set_xlabel(labels[0])
    ax.set_ylabel(labels[1])
    ax.set_xlim(mu_x0 - 4 * sig_x0, mu_x0 + 4 * sig_x0)
    ax.set_ylim(mu_x1 - 4 * sig_x1, mu_x1 + 4 * sig_x1)

    if plot_marg:
        xx_extent = 8 * sig_x0
        yy_extent = 8 * sig_x1

        pdf_xx_ext = x0_pdf.max() - x0_pdf.min()
        pdf_yy_ext = x1_pdf.max() - x1_pdf.min()

        ax.plot(
            x0_line, 0.2 * (x0_pdf - x0_pdf.min()) * yy_extent / pdf_xx_ext +
            ax.get_ylim()[0])
        ax.axvline(mu_x0 - sig_x0)
        ax.axvline(mu_x0 + sig_x0)
        ax.plot(
            0.2 * (x1_pdf - x1_pdf.min()) * xx_extent / pdf_yy_ext +
            ax.get_xlim()[0], x1_line)
        ax.axhline(mu_x1 - sig_x1)
        ax.axhline(mu_x1 + sig_x1)

        plt.title(r"$%s = %.3f \pm %.3f, %s = %.3f \pm %.3f$" %
                  (labels[0], mu_x0, sig_x0, labels[1], mu_x1, sig_x1))
    plt.tight_layout()
    plt.show()

    return mu_x0, sig_x0, mu_x1, sig_x1