def test_values():
    parameters = Parameters()

    assert abs(parameters.lya_wavelength - 1216) < 1
    assert abs(parameters.lyb_wavelength - 1025) < 1

    assert abs(parameters.kms_to_z(3000) - 0.01) < 1e-4

    wavelength = 1000
    z_qso = 3

    assert (abs(wavelength - parameters.emitted_wavelengths(
        parameters.observed_wavelengths(wavelength, z_qso), z_qso)) < 1e-4)
Esempio n. 2
0
def test_prior():
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    dla_samples = DLASamplesMAT(param, prior,
                                "data/dr12q/processed/dla_samples_a03.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    dla_gp = DLAGPMAT(
        param,
        prior,
        dla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        True,
    )
    dla_gp.set_data(rest_wavelengths,
                    flux,
                    noise_variance,
                    pixel_mask,
                    z_qso,
                    build_model=True)

    log_priors = dla_gp.log_priors(z_qso, max_dlas=4)

    catalog_log_priors = np.array(
        [-2.53774598, -4.97413739, -7.40285925, -9.74851888])

    assert np.all(np.abs(log_priors - catalog_log_priors) < 1e-4)
Esempio n. 3
0
def prepare_subdla_model(plate: int = 5309,
                         mjd: int = 55929,
                         fiber_id: int = 362,
                         z_qso: float = 3.166) -> SubDLAGPMAT:
    """
    Return a SubDLAGP instance from an input SDSS DR12 spectrum.
    """
    filename = "spec-{}-{}-{}.fits".format(plate, mjd, str(fiber_id).zfill(4))

    if not os.path.exists(filename):
        retrieve_raw_spec(plate, mjd, fiber_id)  # the spectrum at paper

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    subdla_samples = SubDLASamplesMAT(
        param, prior, "data/dr12q/processed/subdla_samples.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    subdla_gp = SubDLAGPMAT(
        param,
        prior,
        subdla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        True,
    )
    subdla_gp.set_data(rest_wavelengths,
                       flux,
                       noise_variance,
                       pixel_mask,
                       z_qso,
                       build_model=True)

    return subdla_gp
Esempio n. 4
0
def test_prior_catalog():
    params = Parameters()
    prior = PriorCatalog(
        params,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )

    # 94892842 2.0969 20.0292
    ind = prior.thing_ids == 94892842
    assert np.all(prior.z_dlas[ind] == 2.0969)
    assert np.all(prior.log_nhis[ind] == 20.0292)

    # P(DLA | zQSO) prior would saturated at around 0.1
    zQSO = 5
    div = lambda M, N: M / N
    p_dla_z = div(*prior.less_ind(zQSO))
    assert 0.09 < p_dla_z < 0.11
Esempio n. 5
0
def test_effective_optical_depth():
    z_qso = 4
    rest_wavelengths = np.linspace(911, 1216, 500)
    wavelengths = Parameters.observed_wavelengths(rest_wavelengths, z_qso)

    total_optical_depth = effective_optical_depth(wavelengths, 3.65, 0.0023,
                                                  z_qso, 31, True)

    assert 0 < np.exp(-total_optical_depth.sum(axis=1)).min() < 1
    assert 0 < np.exp(-total_optical_depth.sum(axis=1)).max() < 1

    total_optical_depth_2 = effective_optical_depth(wavelengths, 3.65, 0.0023,
                                                    z_qso, 1, True)

    assert np.mean(np.exp(-total_optical_depth.sum(axis=1))) < np.mean(
        np.exp(-total_optical_depth_2.sum(axis=1)))

    total_optical_depth_3 = effective_optical_depth(wavelengths, 3.65, 0.0023,
                                                    2.2, 31, True)

    assert np.mean(np.exp(-total_optical_depth.sum(axis=1))) < np.mean(
        np.exp(-total_optical_depth_3.sum(axis=1)))
Esempio n. 6
0
def test_log_likelihood_no_dla():
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    gp = NullGPMAT(
        param,
        prior,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
    )

    gp.set_data(rest_wavelengths,
                flux,
                noise_variance,
                pixel_mask,
                z_qso,
                build_model=True)

    log_likelihood_no_dla = gp.log_model_evidence()
    print(
        "log p(  D  | z_QSO, no DLA ) : {:.5g}".format(log_likelihood_no_dla))

    assert (np.abs(log_likelihood_no_dla - (-889.04809017)) < 1
            )  # there is some numerical difference

    plt.figure(figsize=(16, 5))
    plt.plot(gp.x, gp.y, label="observed flux")
    plt.plot(gp.rest_wavelengths, gp.mu, label="null GP")
    plt.plot(gp.x, gp.this_mu, label="interpolated null GP")
    plt.xlabel("rest wavelengths")
    plt.ylabel("normalised flux")
    plt.legend()
    plt.savefig("test1.pdf", format="pdf", dpi=300)
    plt.clf()
    plt.close()

    # test 2
    filename = "spec-3816-55272-0076.fits"
    z_qso = 3.68457627

    if not os.path.exists(filename):
        retrieve_raw_spec(3816, 55272, 76)  # the spectrum at paper

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    gp.set_data(rest_wavelengths,
                flux,
                noise_variance,
                pixel_mask,
                z_qso,
                build_model=True)

    log_likelihood_no_dla = gp.log_model_evidence()
    print(
        "log p(  D  | z_QSO, no DLA ) : {:.5g}".format(log_likelihood_no_dla))

    assert np.abs(log_likelihood_no_dla - (-734.3727266)) < 1

    plt.figure(figsize=(16, 5))
    plt.plot(gp.x, gp.y, label="observed flux")
    plt.plot(gp.rest_wavelengths, gp.mu, label="null GP")
    plt.plot(gp.x, gp.this_mu, label="interpolated null GP")
    plt.xlabel("rest wavelengths")
    plt.ylabel("normalised flux")
    plt.legend()
    plt.savefig("test2.pdf", format="pdf", dpi=300)
    plt.clf()
    plt.close()
Esempio n. 7
0
def test_dla_model_evidences(broadening: bool = True):
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    dla_samples = DLASamplesMAT(param, prior,
                                "data/dr12q/processed/dla_samples_a03.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    dla_gp = DLAGPMAT(
        param,
        prior,
        dla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        broadening,
    )
    dla_gp.set_data(rest_wavelengths,
                    flux,
                    noise_variance,
                    pixel_mask,
                    z_qso,
                    build_model=True)

    tic = time.time()

    max_dlas = 4
    log_likelihoods_dla = dla_gp.log_model_evidences(max_dlas)

    toc = time.time()
    # very time consuming: ~ 4 mins for a single spectrum without parallelized.
    print("spent {} mins; {} seconds".format((toc - tic) // 60,
                                             (toc - tic) % 60))

    # log likelihood results from the catalog
    catalog_log_likelihoods_dla = np.array(
        [-688.91647288, -633.00070813, -634.08569242, -640.77120558])

    for i in range(max_dlas):
        print("log p(  D  | z_QSO, DLA{} ) : {:.5g}; MATLAB value: {:.5g}".
              format(i + 1, log_likelihoods_dla[i],
                     catalog_log_likelihoods_dla[i]))

    # the accuracy down to 2.5 in log scale, this needs to be investigated.
    assert np.all(
        np.abs(catalog_log_likelihoods_dla - log_likelihoods_dla) < 2.5)
Esempio n. 8
0
def test_dla_model(broadening: bool = True):
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    dla_samples = DLASamplesMAT(param, prior,
                                "data/dr12q/processed/dla_samples_a03.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    dla_gp = DLAGPMAT(
        param,
        prior,
        dla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        broadening,
    )
    dla_gp.set_data(rest_wavelengths,
                    flux,
                    noise_variance,
                    pixel_mask,
                    z_qso,
                    build_model=True)

    # These are the MAPs from the paper
    z_dlas = np.array([2.52182382, 3.03175723])
    nhis = 10**np.array([20.63417494, 22.28420156])

    sample_log_likelihood_dla = dla_gp.sample_log_likelihood_k_dlas(
        z_dlas, nhis)
    print("log p(  D  | z_QSO, zdlas, nhis ) : {:.5g}".format(
        sample_log_likelihood_dla))

    # Build a Null model
    gp = NullGPMAT(
        param,
        prior,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
    )
    gp.set_data(rest_wavelengths,
                flux,
                noise_variance,
                pixel_mask,
                z_qso,
                build_model=True)

    log_likelihood_no_dla = gp.log_model_evidence()
    print(
        "log p(  D  | z_QSO, no DLA ) : {:.5g}".format(log_likelihood_no_dla))

    assert sample_log_likelihood_dla > log_likelihood_no_dla
def process_qso(
    qso_list: List,
    z_qso_list: List,
    read_spec=read_spec.read_spec,
    max_dlas: int = 4,
    broadening: bool = True,
    plot_figures: bool = False,
):
    """
    Read fits file from qso_list and process each QSO with Bayesian model selection.

    :param qso_list: a list of fits filenames
    :param z_qso_list: a list of zQSO corresponding to the spectra in the qso_list
    :param read_spec: a function to read the fits file.
    :param broadening: whether to implement the instrumental broadening in the SDSS.
    :param plot_figures: if True, plot the maximum a posteriori estimate of DLAs
        into a plot, with the likelihoods in the parameter space.

    A HDF5 File will be saved after running this function.

    (Comments from the MATLAB code)
    Run DLA detection algorithm on specified objects while using
    lower lognhi range (defined in set_lls_parameters.m) as an
    alternative model; 

    Note: model_posterior(quasar_ind, :) ... 
        = [p(no dla | D), p(lls | D), p(1 dla | D), p(2 dla | D), ...]
    also note that we should treat lls as no dla. 
    
    For higher order of DLA models, we consider the Occam's Razor effect due
    to normalisation in the higher dimensions of the parameter space.
    
    We implement exp(-optical_depth) to the mean-flux
        µ(z) := µ * exp( - τ (1 + z)^β ) ; 
        1 + z = lambda_obs / lambda_lya
    the prior of τ and β are taken from Kim, et al. (2007). 
    
    Nov 18, 2019: add all Lyman series to the effective optical depth
        effective_optical_depth := ∑ τ fi1 λi1 / ( f21 λ21 ) * ( 1 + z_i1 )^β
    where 
        1 + z_i1 =  λobs / λ_i1 = λ_lya / λ_i1 *  (1 + z_a)
    
    Dec 25, 2019: add Lyman series to the noise variance training
      s(z)     = 1 - exp(-effective_optical_depth) + c_0 
    
    March 8, 2019: add additional Occam's razor factor between DLA models and null model:
      P(DLAs | D) := P(DLAs | D) / num_dla_samples
    """

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    dla_samples = DLASamplesMAT(
        param, prior, "data/dr12q/processed/dla_samples_a03.mat"
    )

    subdla_samples = SubDLASamplesMAT(
        param, prior, "data/dr12q/processed/subdla_samples.mat"
    )

    # initialize Bayesian model selection class, with maximum 4 DLAs and at least 1 subDLA,
    # which is the same as Ho-Bird-Garnett (2020).
    bayes = BayesModelSelect([0, 1, max_dlas], 2)  # 0 DLA for null; 1 subDLA; 4 DLAs.
    # 2 for the location of the DLA model in the list

    num_quasars = len(qso_list)

    # allocate the arrays we want to save. Try to save similar variables as in
    # multi_dlas/process_qsos_multiple_dlas_meanflux.m
    # initialize results
    min_z_dlas = np.full((num_quasars,), np.nan)
    max_z_dlas = np.full((num_quasars,), np.nan)
    log_priors_no_dla = np.full((num_quasars,), np.nan)
    log_priors_dla = np.full((num_quasars, max_dlas), np.nan)
    log_likelihoods_no_dla = np.full((num_quasars,), np.nan)
    log_likelihoods_dla = np.full((num_quasars, max_dlas), np.nan)
    log_posteriors_no_dla = np.full((num_quasars,), np.nan)
    log_posteriors_dla = np.full((num_quasars, max_dlas), np.nan)

    sample_log_likelihoods_dla = np.full(
        (num_quasars, param.num_dla_samples, max_dlas), np.nan
    )
    base_sample_inds = np.zeros(
        (num_quasars, param.num_dla_samples, max_dlas - 1), dtype=np.int32
    )

    # initialize lls results
    log_likelihoods_lls = np.full((num_quasars,), np.nan)
    log_posteriors_lls = np.full((num_quasars,), np.nan)
    log_priors_lls = np.full((num_quasars,), np.nan)
    sample_log_likelihoods_lls = np.full((num_quasars, param.num_dla_samples), np.nan)

    # save maps: add the initializations of MAP values
    # N * (1~k models) * (1~k MAP dlas)
    MAP_z_dlas = np.full((num_quasars, max_dlas, max_dlas), np.nan)
    MAP_log_nhis = np.full((num_quasars, max_dlas, max_dlas), np.nan)
    MAP_inds = np.full((num_quasars, max_dlas, max_dlas), np.nan)

    # save model_posteriors in real-scale not in log-scale
    model_posteriors = np.full((num_quasars, 1 + 1 + max_dlas,), np.nan)
    p_dlas = np.full((num_quasars,), np.nan)
    p_no_dlas = np.full((num_quasars,), np.nan)

    for quasar_ind, (filename, z_qso) in enumerate(zip(qso_list, z_qso_list)):
        tic = time.time()

        np.random.seed(0)

        wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
        rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

        # Null model GP : a GP model without any DLA intervening, we also don't need to
        # run QMC sampling on this model.
        gp = NullGPMAT(
            param,
            prior,
            "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        )

        gp.set_data(
            rest_wavelengths, flux, noise_variance, pixel_mask, z_qso, build_model=True
        )

        # DLA Model GP : this is null model GP + Voigt profile, which is parameterised with
        # {(z_dla, logNHI)}_{i=1}^{k} parameters. k stands for maximum DLA we want to model.
        # we will estimate log posteriors for DLA(1), ..., DLA(k) models.
        dla_gp = DLAGPMAT(
            params=param,
            prior=prior,
            dla_samples=dla_samples,
            min_z_separation=3000.0,
            learned_file="data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
            broadening=broadening,
        )
        dla_gp.set_data(
            rest_wavelengths, flux, noise_variance, pixel_mask, z_qso, build_model=True
        )

        subdla_gp = SubDLAGPMAT(
            params=param,
            prior=prior,
            dla_samples=subdla_samples,
            min_z_separation=3000.0,
            learned_file="data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
            broadening=broadening,
        )
        subdla_gp.set_data(
            rest_wavelengths, flux, noise_variance, pixel_mask, z_qso, build_model=True
        )

        # run bayesian model selection and get log posteriors
        log_posteriors = bayes.model_selection([gp, subdla_gp, dla_gp], z_qso)
        # at the same time, log_priors, log_likelihoods, log_posteriors will be saved to
        # the bayes instance.

        # prepare to store the values to arrays
        min_z_dlas[quasar_ind] = dla_gp.params.min_z_dla(wavelengths, z_qso)
        max_z_dlas[quasar_ind] = dla_gp.params.max_z_dla(wavelengths, z_qso)

        log_priors_no_dla[quasar_ind] = bayes.log_priors[0]  # null model is index 0
        log_priors_dla[quasar_ind, :] = bayes.log_priors[-max_dlas:]
        log_priors_lls[quasar_ind] = bayes.log_priors[1]  # subDLA model is index 1

        # null model is index 0; subDLA model is index 1
        log_likelihoods_no_dla[quasar_ind] = bayes.log_likelihoods[0]
        log_likelihoods_dla[quasar_ind, :] = bayes.log_likelihoods[-max_dlas:]
        log_likelihoods_lls[quasar_ind] = bayes.log_likelihoods[1]

        # null model is index 0; subDLA model is index 1
        log_posteriors_no_dla[quasar_ind] = bayes.log_posteriors[0]
        log_posteriors_dla[quasar_ind, :] = bayes.log_posteriors[-max_dlas:]
        log_posteriors_lls[quasar_ind] = bayes.log_posteriors[1]

        sample_log_likelihoods_dla[quasar_ind, :, :] = dla_gp.sample_log_likelihoods[
            :, :
        ]
        base_sample_inds[quasar_ind, :, :] = dla_gp.base_sample_inds[:, :].T

        sample_log_likelihoods_lls[quasar_ind, :] = subdla_gp.sample_log_likelihoods[
            :, 0
        ]

        # save maps: add the initializations of MAP values
        # N * (1~k models) * (1~k MAP dlas)
        MAP_z_dla, MAP_log_nhi = dla_gp.maximum_a_posteriori()

        MAP_z_dlas[quasar_ind, :, :] = MAP_z_dla[:, :]
        MAP_log_nhis[quasar_ind, :, :] = MAP_log_nhi[:, :]

        # save model_posteriors in real-scale not in log-scale
        model_posteriors[quasar_ind, :] = bayes.model_posteriors[:]
        p_dlas[quasar_ind] = bayes.p_dla
        p_no_dlas[quasar_ind] = bayes.p_no_dla

        toc = time.time()
        # very time consuming: ~ 4 mins for a single spectrum without parallelized.
        print("spent {} mins; {} seconds".format((toc - tic) // 60, (toc - tic) % 60))

        # [make plots] plotting the inference results into
        # GP mean model + sample likelihood scatter plot
        if plot_figures:
            title = filename + "; zQSO: {:.2g};\n".format(z_qso) 
            out_filename = "spec-{}".format(str(quasar_ind).zfill(6))
            make_plots(
                dla_gp=dla_gp, bayes=bayes, filename=out_filename, sub_dir="images", title=title
            )
            plt.clf()
            plt.close()

    # write into HDF5 file
    with h5py.File("processed_qsos_multi_meanflux.h5", "w") as f:
        # storing default parameter settings for the model
        f.create_dataset(
            "prior_z_qso_increase", data=dla_gp.params.prior_z_qso_increase
        )
        f.create_dataset("k", data=dla_gp.params.k)
        f.create_dataset(
            "normalization_min_lambda", data=dla_gp.params.normalization_min_lambda
        )
        f.create_dataset(
            "normalization_max_lambda", data=dla_gp.params.normalization_max_lambda
        )
        f.create_dataset("min_z_cut", data=dla_gp.params.min_z_cut)
        f.create_dataset("max_z_cut", data=dla_gp.params.max_z_cut)
        f.create_dataset("num_dla_samples", data=dla_gp.params.num_dla_samples)
        f.create_dataset("num_lines", data=dla_gp.params.num_lines)
        f.create_dataset("num_forest_lines", data=dla_gp.params.num_forest_lines)

        # storing the sampling variables
        f.create_dataset("min_z_dlas", data=min_z_dlas)
        f.create_dataset("max_z_dlas", data=max_z_dlas)

        f.create_dataset("sample_log_likelihoods_dla", data=sample_log_likelihoods_dla)
        f.create_dataset("base_sample_inds", data=base_sample_inds)

        f.create_dataset("log_priors_no_dla", data=log_priors_no_dla)
        f.create_dataset("log_priors_lls", data=log_priors_lls)
        f.create_dataset("log_priors_dla", data=log_priors_dla)

        f.create_dataset("log_likelihoods_no_dla", data=log_likelihoods_no_dla)
        f.create_dataset("log_likelihoods_lls", data=log_likelihoods_lls)
        f.create_dataset("log_likelihoods_dla", data=log_likelihoods_dla)

        f.create_dataset("log_posteriors_no_dla", data=log_posteriors_no_dla)
        f.create_dataset("log_posteriors_lls", data=log_posteriors_lls)
        f.create_dataset("log_posteriors_dla", data=log_posteriors_dla)

        f.create_dataset("MAP_z_dlas", data=MAP_z_dlas)
        f.create_dataset("MAP_log_nhis", data=MAP_log_nhis)

        f.create_dataset("p_dlas", data=p_dlas)
        f.create_dataset("p_no_dlas", data=p_no_dlas)
        f.create_dataset("model_posteriors", data=model_posteriors)

        # also save zQSOs
        f.create_dataset("z_qsos", data=np.array(z_qso_list))
        # also save the filename list for reproducibility
        f.create_dataset("qso_list", data=np.array(qso_list, h5py.string_dtype(encoding="utf-8")))