Beispiel #1
0
    def test_get_lnp_grid_vals(self):
        """
        Read in the lnp and sed grid data from cached files and test that
        selected values are as expected.
        """
        ldata = read_lnp_data(self.lnp_fname_cache)

        requested_params = [
            "Av", "Rv", "f_A", "M_ini", "logA", "Z", "distance"
        ]
        sdata = read_sed_data(self.seds_trim_fname_cache,
                              param_list=requested_params)

        lgvals_data = get_lnp_grid_vals(sdata, ldata)

        # check that otherwise, the requested lgvals data is returned
        expected_values = {
            "Av": [0.0, 0.0, 0.0, 0.0, 0.0],
            "Rv": [2.0, 2.0, 2.0, 2.0, 2.0],
            "f_A": [1.0, 1.0, 1.0, 1.0, 1.0],
            "M_ini":
            [3.89416909, 3.92726111, 3.95603228, 2.04966068, 2.04999995],
            "logA": [6.0, 6.0, 6.0, 9.0, 9.0],
            "Z": [0.03, 0.03, 0.03, 0.004, 0.004],
            "distance": [
                783429.64276621,
                783429.64276621,
                783429.64276621,
                783429.64276621,
                783429.64276621,
            ],
        }
        for cname in requested_params:
            assert (cname in lgvals_data.keys()
                    ), f"requsted parameter {cname} not in sed data"
            np.testing.assert_allclose(
                lgvals_data[cname][0:5, 10],
                expected_values[cname],
                err_msg=f"expected value of {cname} is not found",
            )
Beispiel #2
0
def fit_ensemble(beast_data,
                 lnp_filename,
                 beast_priormodel,
                 nstars_expected=None):
    """
    Run the MegaBEAST on a single set of BEAST results.

    Parameters
    ----------
    beast_data : dict
        information about the BEAST runs including SED grid and noise model

    lnp_filename : string
        file with posteriors from BEAST fitting

    beast_priormodel : dict
        dictionary of the BEAST prior model information

    nstars_expected : int
        number of stars expected, used as a check

    Returns
    -------
    fit_results : array
        set of best fit parameters
    """
    # get the saved sparse likelihoods
    lnp_data = read_lnp_data(lnp_filename,
                             nstars=nstars_expected,
                             shift_lnp=True)

    # get the completeness and BEAST model parameters for the
    #   same grid points as the sparse likelihoods
    lnp_grid_vals = get_lnp_grid_vals(beast_data, lnp_data)

    # compute the BEAST prior weights
    #  needed so the BEAST posteriors updated with the MegaBEAST model
    # ***currently only AV ensemble model supported***
    avs = lnp_grid_vals["Av"]
    rvs = [3.1]  # beast_data['Rv']
    fAs = [1.0]  # beast_data['f_A']
    beast_dust_priors = PriorWeightsDust(
        avs,
        beast_priormodel["AV"],
        rvs,
        beast_priormodel["RV"],
        fAs,
        beast_priormodel["fA"],
    )

    # standard minimization to find initial values
    def chi2(args):
        return -1.0 * lnprob(*args)

    result = op.minimize(
        chi2,
        [0.25, 2.0, 0.5, 0.5, 1],
        args=(beast_dust_priors, lnp_data, lnp_grid_vals),
        method="Nelder-Mead",
    )

    # next step would be to
    # run through MCMC to fully sample likelihood
    # maybe include option not to run MCMC

    return result["x"]
Beispiel #3
0
def megabeast(megabeast_input_file, verbose=True):
    """
    Run the MegaBEAST on each of the spatially-reordered BEAST outputs.

    Parameters
    ----------
    megabeast_input_file : string
        Name of the file that contains settings, filenames, etc

    verbose : boolean (default=True)
        print extra info

    """
    # read in the settings from the file
    mb_settings = read_megabeast_input(megabeast_input_file)

    # setup the megabeast model including defining the priors
    #   - dust distribution model
    #   - stellar populations model (later)

    # use nstars image to setup for each pixel
    nstars_image, nstars_header = fits.getdata(mb_settings["nstars_filename"],
                                               header=True)
    n_x, n_y = nstars_image.shape

    # read in the beast data that is needed by all the pixels
    beast_data = {}
    # - SED data
    beast_data.update(
        read_beast_data.read_sed_data(
            mb_settings["beast_seds_filename"],
            param_list=["Av"]  # , "Rv", "f_A"]
        ))
    # - max completeness
    beast_data.update(
        read_beast_data.read_noise_data(
            mb_settings["beast_noise_filename"],
            param_list=["completeness"],
        ))
    beast_data["completeness"] = np.max(beast_data["completeness"], axis=1)

    # setup for output
    pixel_fit_status = np.full((n_x, n_y), False, dtype=bool)
    n_fit_params = len(mb_settings["fit_param_names"])
    best_fit_images = np.zeros((n_x, n_y, n_fit_params), dtype=float) + np.nan

    # loop over the pixels with non-zero entries in the nstars image
    for i in trange(n_x, desc="x pixels"):
        for j in trange(n_y, desc="y pixels", leave=False):
            # for i in [6]:
            #    for j in [6]:
            if verbose:
                print("working on (%i,%i)" % (i, j))
            if nstars_image[i, j] >= mb_settings["min_for_fit"]:
                pixel_fit_status[i, j] = True
                # get the saved sparse likelihoods
                lnp_filename = mb_settings[
                    "lnp_file_prefix"] + "_{0}_{1}_lnp.hd5".format(j, i)
                lnp_data = read_beast_data.read_lnp_data(
                    lnp_filename,
                    nstars=nstars_image[i, j],
                    shift_lnp=True,
                )

                # get the completeness and BEAST model parameters for the
                #   same grid points as the sparse likelihoods
                lnp_grid_vals = read_beast_data.get_lnp_grid_vals(
                    beast_data, lnp_data)

                # initialize the ensemble model with the parameters used
                # for the saved BEAST model run results
                #   currently only dust parameters allowed
                #   for testing -> only Av
                avs = lnp_grid_vals["Av"]
                rvs = [3.1]  # beast_data['Rv']
                fAs = [1.0]  # beast_data['f_A']
                beast_dust_priors = PriorWeightsDust(
                    avs,
                    mb_settings["av_prior_model"],
                    rvs,
                    mb_settings["rv_prior_model"],
                    fAs,
                    mb_settings["fA_prior_model"],
                )

                # standard minimization to find initial values
                def chi2(args):
                    return -1.0 * lnprob(*args)

                result = op.minimize(
                    chi2,
                    [0.25, 2.0, 0.5, 0.5, 1],
                    args=(beast_dust_priors, lnp_data, lnp_grid_vals),
                    method="Nelder-Mead",
                )
                best_fit_images[i, j, :] = result["x"]
                # print(result)
                # print(result['x'])
                # print(result['success'])

                # then run through MCMC to fully sample likelihood
                #    include option not to run MCMC

    # output results
    #    - best fit
    #    - megabeast parameter 1D pPDFs
    #    - MCMC chain

    master_header = nstars_header
    # Now, write the maps to disk

    # check that the directory exists
    if not os.path.exists("./" + mb_settings["projectname"] + "_megabeast/"):
        os.makedirs("./" + mb_settings["projectname"] + "_megabeast/")

    for k, cname in enumerate(mb_settings["fit_param_names"]):

        hdu = fits.PrimaryHDU(best_fit_images[:, :, k], header=master_header)

        # Save to FITS file
        hdu.writeto(
            "%s_megabeast/%s_%s_bestfit.fits" %
            (mb_settings["projectname"], mb_settings["projectname"], cname),
            overwrite=True,
        )
Beispiel #4
0
def plot_input_data(megabeast_input_file, chi2_plot=[], log_scale=False):
    """
    Parameters
    ----------
    megabeast_input_file : string
        Name of the file that contains settings, filenames, etc

    chi2_plot : list of floats (default=[])
        Make A_V histogram(s) with chi2 less than each of the values in this list

    log_scale : boolean (default=False)
        If True, make the histogram x-axis a log scale (to visualize log-normal
        A_V distribution)

    """

    # read in the settings from the file
    mb_settings = read_input(megabeast_input_file)

    # get the project name
    projectname = mb_settings["projectname"]

    # read in the beast data that is needed by all the pixels
    beast_data = {}
    # - SED data
    beast_data.update(
        read_beast_data.read_sed_data(
            mb_settings["beast_seds_filename"],
            param_list=["Av"]  # , "Rv", "f_A"]
        ))
    # - max completeness
    beast_data.update(
        read_beast_data.read_noise_data(
            mb_settings["beast_noise_filename"],
            param_list=["completeness"],
        ))
    beast_data["completeness"] = np.max(beast_data["completeness"], axis=1)

    # read in the nstars image
    nstars_image, nstars_header = fits.getdata(mb_settings["nstars_filename"],
                                               header=True)
    # dimensions of images/plotting
    y_dimen = nstars_image.shape[0]
    x_dimen = nstars_image.shape[1]

    # set up multi-page figure
    if not log_scale:
        pp = PdfPages("{0}_megabeast/plot_input_data.pdf".format(projectname))
    if log_scale:
        pp = PdfPages(
            "{0}_megabeast/plot_input_data_log.pdf".format(projectname))

    # save the best-fit A_V
    best_av = [[[] for j in range(x_dimen)] for i in range(y_dimen)]
    best_av_chi2 = [[[] for j in range(x_dimen)] for i in range(y_dimen)]

    # -----------------
    # Completeness vs A_V
    # -----------------

    print("")
    print("Making completeness/Av plot")
    print("")

    # set up figure
    plt.figure(figsize=(6, 6))
    plt.subplot(1, 1, 1)

    for i in tqdm(range(y_dimen), desc="y pixels"):
        for j in tqdm(range(x_dimen), desc="x pixels"):
            # for i in tqdm(range(int(y_dimen/3)), desc='y pixels'):
            #    for j in tqdm(range(int(x_dimen/3)), desc='x pixels'):
            # for i in [0]:
            #    for j in [12]:

            if nstars_image[i, j] > 20:

                # get info about the fits
                lnp_filename = mb_settings[
                    "lnp_file_prefix"] + "_{0}_{1}_lnp.hd5".format(j, i)
                lnp_data = read_beast_data.read_lnp_data(
                    lnp_filename,
                    nstars=nstars_image[i, j],
                    shift_lnp=True,
                )

                # get the completeness and BEAST model parameters for the
                #   same grid points as the sparse likelihoods
                lnp_grid_vals = read_beast_data.get_lnp_grid_vals(
                    beast_data, lnp_data)

                # grab the things we want to plot
                plot_av = lnp_grid_vals["Av"]
                plot_comp = lnp_grid_vals["completeness"]

                for n in range(nstars_image[i, j]):

                    # plot a random subset of the AVs and completenesses
                    if (i % 3 == 0) and (j % 3 == 0):
                        plot_these = np.random.choice(plot_av[:, n].size,
                                                      size=20,
                                                      replace=False)
                        plt.plot(
                            plot_av[plot_these, n] +
                            np.random.normal(scale=0.02, size=plot_these.size),
                            plot_comp[plot_these, n],
                            marker=".",
                            c="black",
                            ms=3,
                            mew=0,
                            linestyle="None",
                            alpha=0.05,
                        )

                    # also overplot the values for the best fit
                    max_ind = np.where(lnp_data["vals"][:, n] == np.max(
                        lnp_data["vals"][:, n]))[0][0]
                    best_av[i][j].append(plot_av[max_ind, n])
                    best_av_chi2[i][j].append(-2 *
                                              np.max(lnp_data["vals"][:, n]))
                    if (i % 3 == 0) and (j % 3 == 0):
                        plt.plot(
                            plot_av[max_ind, n] + np.random.normal(scale=0.01),
                            plot_comp[max_ind, n],
                            marker=".",
                            c="magenta",
                            ms=2,
                            mew=0,
                            linestyle="None",
                            alpha=0.3,
                            zorder=9999,
                        )

    ax = plt.gca()
    ax.set_xlabel(r"$A_V$")
    ax.set_ylabel("Completeness")

    pp.savefig()

    # -----------------
    # histograms of AVs
    # -----------------

    print("")
    print("Making Av Histograms")
    print("")

    # set up figure
    plt.figure(figsize=(x_dimen * 2, y_dimen * 2))

    # flat list of A_V
    # https://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python
    flat_av = [i for sublist in best_av for item in sublist for i in item]
    # grab the max A_V of all of them
    # max_av = max(flat_av)
    # define bins
    if not log_scale:
        uniq_av = np.unique(flat_av)
        gap = np.min(np.diff(uniq_av))
        bins = np.arange(uniq_av[0], uniq_av[-1], gap)
    if log_scale:
        uniq_av = np.unique(np.log10(flat_av))
        gap = (uniq_av[-1] - uniq_av[0]) / len(uniq_av)
        bins = np.arange(uniq_av[0], uniq_av[-1], gap)

    for i in tqdm(range(y_dimen), desc="y pixels"):
        for j in tqdm(range(x_dimen), desc="x pixels"):
            # for i in [0]:
            #    for j in [12]:

            if nstars_image[i, j] > 20:

                # set up the subplot
                plt.subplot(y_dimen, x_dimen,
                            (y_dimen - i - 1) * (x_dimen) + j + 1)

                # make a histogram
                if best_av[i][j] != []:
                    if not log_scale:
                        plt.hist(
                            best_av[i][j],
                            bins=bins.size,
                            range=(uniq_av[0] - gap / 2,
                                   uniq_av[-1] + gap / 2),
                            facecolor="xkcd:azure",
                            linewidth=0.25,
                            edgecolor="xkcd:azure",
                        )
                    if log_scale:
                        plt.hist(
                            np.log10(best_av[i][j]),
                            bins=bins.size,
                            range=(uniq_av[0] - gap / 2,
                                   uniq_av[-1] + gap / 2),
                            facecolor="xkcd:azure",
                            linewidth=0.25,
                            edgecolor="xkcd:azure",
                        )
                    # plt.xlim(xmax=max_av)

    plt.suptitle(r"Best-fit $A_V$ for each pixel", fontsize=40)

    pp.savefig()

    # -----------------
    # histograms of AVs with a chi2 cut
    # -----------------

    if len(chi2_plot) > 0:
        print("")
        print("Making Av Histograms with chi^2 cut")
        print("")

    for chi2_cut in chi2_plot:

        # set up figure
        plt.figure(figsize=(x_dimen * 2, y_dimen * 2))

        for i in tqdm(range(y_dimen), desc="y pixels"):
            for j in tqdm(range(x_dimen), desc="x pixels"):
                # for i in [0]:
                #    for j in [12]:

                if nstars_image[i, j] > 20:

                    # set up the subplot
                    plt.subplot(y_dimen, x_dimen,
                                (y_dimen - i - 1) * (x_dimen) + j + 1)

                    # make a histogram
                    if best_av[i][j] != []:
                        if not log_scale:
                            plot_av = np.array(best_av[i][j])[
                                np.array(best_av_chi2[i][j]) < chi2_cut]
                        if log_scale:
                            plot_av = np.log10(
                                np.array(best_av[i][j])[
                                    np.array(best_av_chi2[i][j]) < chi2_cut])
                        if len(plot_av) != 0:
                            plt.hist(
                                plot_av,
                                bins=bins.size,
                                range=(uniq_av[0] - gap / 2,
                                       uniq_av[-1] + gap / 2),
                                facecolor="xkcd:azure",
                                linewidth=0.25,
                                edgecolor="xkcd:azure",
                            )

        plt.suptitle(
            r"Best-fit $A_V$ for each pixel, but only using sources with $\chi^2 < $"
            + str(chi2_cut),
            fontsize=40,
        )

        pp.savefig()

    # close PDF figure
    pp.close()