Esempio n. 1
0
def split_grid(grid_fname, num_subgrids, overwrite=False):
    """
    Splits a spectral or sed grid (they are the same class actually)
    according to grid point index (so basically, arbitrarily).

    Parameters
    ----------
    grid_fname: string
        file name of the existing grid to be split up

    num_subgrids: integer
        the number of parts the grid should be split into

    overwrite: bool
        any subgrids that already exist will be deleted if set to True.
        If set to False, skip over any grids that are already there.

    Returns
    -------
    list of string
        the names of the newly created subgrid files
    """

    g = SEDGrid(grid_fname, backend="disk")

    fnames = []

    num_seds = len(g.seds)
    slices = uniform_slices(num_seds, num_subgrids)
    for i, slc in enumerate(slices):

        subgrid_fname = grid_fname.replace(".hd5", "sub{}.hd5".format(i))
        fnames.append(subgrid_fname)
        if os.path.isfile(subgrid_fname):
            if overwrite:
                os.remove(subgrid_fname)
            else:
                print("{} already exists. Skipping.".format(subgrid_fname))
                continue

        print("constructing subgrid " + str(i))

        # Load a slice as a SEDGrid object
        sub_g = SEDGrid(
            g.lamb[:],
            seds=g.seds[slc],
            grid=Table(g.grid[slc]),
            backend="memory",
        )
        if g.filters is not None:
            sub_g.header["filters"] = " ".join(g.filters)

        # Save it to a new file
        sub_g.write(subgrid_fname, append=False)

    return fnames
Esempio n. 2
0
def merge_grids(seds_fname, sub_names):
    """
    Merges a set of grids into one big grid. The grids need to have the
    same columns

    Parameters
    ----------
    seds_fname: string
        path for the output file

    sub_names: list of strings
        paths for the input grids
    """

    if not os.path.isfile(seds_fname):
        for n in sub_names:
            print("Appending {} to {}".format(n, seds_fname))
            g = SEDGrid(n)
            g.write(seds_fname, append=True)
    else:
        print("{} already exists".format(seds_fname))
Esempio n. 3
0
def test_grid_warnings():
    with pytest.raises(ValueError) as exc:
        SEDGrid(backend="hdf")
    assert exc.value.args[0] == "hdf backend not supported"

    with pytest.raises(ValueError) as exc:
        SEDGrid("test.txt")
    assert exc.value.args[0] == "txt file type not supported"

    # define grid contents
    n_bands = 3
    # filter_names = ["BAND1", "BAND2", "BAND3"]
    n_models = 100
    lamb = [1.0, 2.0, 3.0]
    seds = np.zeros((n_models, n_bands))
    # cov_diag = np.full((n_models, n_bands), 0.1)
    # n_offdiag = ((n_bands ** 2) - n_bands) // 2
    # cov_offdiag = np.full((n_models, n_offdiag), 1.0)
    cols = {"Av": [1.0, 1.1, 1.3], "Rv": [2.0, 3.0, 4.0]}
    header = {"Origin": "test_code"}
    gtable = Table(cols)
    gtable.meta = header

    with pytest.raises(ValueError) as exc:
        SEDGrid(lamb)
    assert exc.value.args[0] == "seds or grid not passed"

    for ftype in ["fits", "hdf"]:
        with pytest.raises(ValueError) as exc:
            a = SEDGrid(lamb, seds=seds, grid=gtable)
            a.grid = cols
            a.write(f"testgridwriteerror.{ftype}")
        assert exc.value.args[0] == "Only astropy.Table are supported"

        with pytest.raises(ValueError) as exc:
            a = SEDGrid(lamb, seds=seds, grid=gtable)
            a.grid = None
            a.write(f"testgridwriteerror.{ftype}")
        assert exc.value.args[
            0] == "Full data set not specified (lamb, seds, grid)"
Esempio n. 4
0
def test_sedgrid(cformat, cback, copygrid):
    """
    Tests of the SEDGrid class
    """
    n_bands = 3
    filter_names = ["BAND1", "BAND2", "BAND3"]
    n_models = 100
    lamb = [1.0, 2.0, 3.0]
    seds = np.zeros((n_models, n_bands))
    cov_diag = np.full((n_models, n_bands), 0.1)
    n_offdiag = ((n_bands**2) - n_bands) // 2
    cov_offdiag = np.full((n_models, n_offdiag), 1.0)
    cols = {"Av": [1.0, 1.1, 1.3], "Rv": [2.0, 3.0, 4.0]}
    header = {"Origin": "test_code"}
    gtable = Table(cols)
    gtable.meta = header

    tgrid = SEDGrid(
        lamb,
        seds=seds,
        grid=gtable,
        header=header,
        cov_diag=cov_diag,
        cov_offdiag=cov_offdiag,
        backend="memory",
    )
    tgrid.header["filters"] = " ".join(filter_names)

    # check that the grid has the expected properties
    expected_props = [
        "lamb",
        "seds",
        "cov_diag",
        "cov_offdiag",
        "grid",
        "nbytes",
        "filters",
        "header",
        "keys",
    ]
    for cprop in expected_props:
        assert hasattr(tgrid, cprop), f"missing {cprop} property"

    np.testing.assert_allclose(tgrid.lamb, lamb, err_msg="lambdas not equal")
    np.testing.assert_allclose(tgrid.seds, seds, err_msg="seds not equal")
    np.testing.assert_allclose(tgrid.cov_diag,
                               cov_diag,
                               err_msg="covdiag not equal")
    np.testing.assert_allclose(tgrid.cov_offdiag,
                               cov_offdiag,
                               err_msg="covoffdiag not equal")
    assert isinstance(tgrid.nbytes,
                      (int, np.integer)), "grid nbytes property not integer"
    compare_tables(tgrid.grid, gtable)
    assert tgrid.grid.keys() == list(cols.keys()), "colnames of grid not equal"
    assert tgrid.filters == filter_names, "filters of grid not equal"

    # test writing and reading to disk
    print(f"testing {cformat} file format")
    tfile = NamedTemporaryFile(suffix=cformat)

    # write the file
    tgrid.write(tfile.name)

    # read in the file using different backends
    if (cback == "disk") and (cformat == ".fits"):  # not supported
        return True

    print(f"    testing {cback} backend")
    dgrid_in = SEDGrid(tfile.name, backend=cback)

    # test making a copy
    print(f"    testing copygrid={copygrid}")
    if copygrid:
        dgrid = dgrid_in.copy()
    else:
        dgrid = dgrid_in
    print(dgrid)

    for cprop in expected_props:
        assert hasattr(dgrid, cprop), f"missing {cprop} property"

    # check that the grid has the expected values

    # this test is having a problem in the online travis ci
    # it someone manages to access another file with HST filter names!
    # no idea way.  Works fine offline.
    # assert dgrid.filters == filter_names, "{cformat} file filters not equal"

    assert len(dgrid) == n_bands, f"{cformat} file len not equal"

    np.testing.assert_allclose(
        dgrid.lamb, lamb, err_msg=f"{cformat} file grid lambdas not equal")
    np.testing.assert_allclose(dgrid.seds,
                               seds,
                               err_msg=f"{cformat} file grid seds not equal")
    np.testing.assert_allclose(
        dgrid.cov_diag,
        cov_diag,
        err_msg=f"{cformat} file grid cov_diag not equal",
    )
    np.testing.assert_allclose(
        dgrid.cov_offdiag,
        cov_offdiag,
        err_msg=f"{cformat} file grid cov_offdiag not equal",
    )
    assert isinstance(
        dgrid.nbytes,
        (int, np.integer)), f"{cformat} file grid nbytes property not integer"

    dTable = dgrid.grid
    if (cback == "disk") and (cformat == ".hdf"):
        dTable = read_table_hdf5(dgrid.grid)
    compare_tables(dTable, gtable, otag=f"{cformat} file")

    assert dTable.keys() == list(
        cols.keys()), f"{cformat} file colnames of grid not equal"

    assert dgrid.keys() == tgrid.keys(
    ), f"{cformat} file colnames of grid not equal"

    # final copy - needed for disk backend to get the now defined variables
    print(dgrid)

    dgrid_fin = dgrid.copy()

    print(dgrid_fin)
Esempio n. 5
0
def remove_filters_from_files(
    catfile,
    physgrid=None,
    obsgrid=None,
    outbase=None,
    physgrid_outfile=None,
    rm_filters=None,
    beast_filt=None,
):
    """
    Remove filters from catalog, physics grid, and/or obsmodel grid.  This has
    two primary use cases:

    1. When making simulated observations, you want to test how your fit quality
       changes with different combinations of filters.  In that case, put in
       files for both `physgrid` and `obsgrid`.  Set `rm_filters` to the
       filter(s) you wish to remove, and they will be removed both from those
       and from the catalog file.  The three new files will be output with the
       name prefix set in `outbase`.

    2. When running the BEAST, you have a master physics model grid with all
       filters present in the survey, but some fields don't have observations in
       all of those filters.  In that case, put the master grid in `physgrid`
       and set `rm_filters` to None.  The catalog will be used to determine the
       filters to remove (if any).  `obsgrid` should be left as None, because in
       this use case, the obsmodel grid has not yet been generated.  The output
       physics model grid will be named using the filename in `physgrid_outfile`
       (if given) or with the prefix in `outbase`.


    Parameters
    ----------
    catfile : string
        file name of photometry catalog

    physgrid : string (default=None)
        If set, remove filters from this physics model grid

    obsgrid : string (default=None)
        If set, remove filters from this obsmodel grid

    outbase : string (default=None)
        Path+file to prepend to all output file names.  Useful for case 1 above.

    physgrid_outfile : string (default=None)
        Path+name of the output physics model grid.  Useful for case 2 above.

    rm_filters : string or list of strings (default=None)
        If set, these are the filters to remove from all of the files.  If not
        set, only the filters present in catfile will be retained in physgrid
        and/or obsgrid.

    beast_filt : list of strings
        Sometimes there is ambiguity in the filter name (e.g., the grid has
        both HST_ACS_WFC_F475W and HST_WFC3_F475W, and the filter name is
        F475W).  Set this to the BEAST filter name to resolve any
        ambiguities.  For example, ['HST_WFC3_F475W', 'HST_WFC3_F814W'] ensures
        that these are the names used for F475W and F814W.

    """

    # read in the photometry catalog
    cat = Table.read(catfile)

    # if rm_filters set, remove the requested filters from the catalog
    if rm_filters is not None:
        for cfilter in np.atleast_1d(rm_filters):
            colname = "{}_rate".format(cfilter)
            if colname.upper() in cat.colnames:
                cat.remove_column(colname.upper())
            elif colname.lower() in cat.colnames:
                cat.remove_column(colname.lower())
            else:
                print("{} not in catalog file".format(colname))
        cat.write("{}_cat.fits".format(outbase), overwrite=True)

    # if rm_filters not set, extract the filter names that are present
    if rm_filters is None:
        cat_filters = [f[:-5].upper() for f in cat.colnames if f[-4:].lower() == "rate"]

    # if beast_filt is set, make a list of the short versions
    if beast_filt is not None:
        beast_filt_short = [(f.split("_"))[-1].upper() for f in beast_filt]

    # if physgrid set, process the SED grid
    if physgrid is not None:

        # read in the sed grid
        g0 = SEDGrid(physgrid, backend="cache")

        # extract info
        filters = g0.filters
        shortfilters = [(cfilter.split("_"))[-1].upper() for cfilter in filters]
        rindxs = []
        rgridcols = []

        # loop through filters and determine what needs deleting
        for csfilter, cfilter in zip(shortfilters, filters):

            # --------------------------
            # if the user chose the filters to remove
            if rm_filters is not None:

                # if the current filter is in the list of filters to remove
                if csfilter in np.atleast_1d(rm_filters):

                    # if there's a list of BEAST instrument+filter references
                    if beast_filt is not None:

                        # if the current filter is in the list of BEAST references
                        if csfilter in beast_filt_short:

                            # if it's the same instrument, delete it
                            # (if it's not the same instrument, keep it)
                            if beast_filt[beast_filt_short.index(csfilter)] == cfilter:
                                rindxs.append(filters.index(cfilter))
                                for grid_col in g0.grid.colnames:
                                    if cfilter in grid_col:
                                        rgridcols.append(grid_col)

                        # if the current filter isn't in the BEAST ref list, delete it
                        else:
                            rindxs.append(filters.index(cfilter))
                            for grid_col in g0.grid.colnames:
                                if cfilter in grid_col:
                                    rgridcols.append(grid_col)

                    # if there isn't a list of BEAST refs, delete it
                    else:
                        rindxs.append(filters.index(cfilter))
                        for grid_col in g0.grid.colnames:
                            if cfilter in grid_col:
                                rgridcols.append(grid_col)

            # --------------------------
            # if the removed filters are determined from the catalog file
            if rm_filters is None:

                # if the current filter is present in the catalog filters
                if csfilter in cat_filters:

                    # if there's a list of BEAST instrument+filter references
                    # (if there isn't a list of BEAST refs, keep it)
                    if beast_filt is not None:

                        # if the current filter is in the list of BEAST references
                        # (if the current filter isn't in the BEAST ref list, keep it)
                        if csfilter in beast_filt_short:

                            # if it's not the same instrument, delete it
                            # (if it's the same instrument, keep it)
                            if beast_filt[beast_filt_short.index(csfilter)] != cfilter:
                                rindxs.append(filters.index(cfilter))
                                for grid_col in g0.grid.colnames:
                                    if cfilter in grid_col:
                                        rgridcols.append(grid_col)

                # if the current filter isn't in the catalog filters, delete it
                else:
                    rindxs.append(filters.index(cfilter))
                    for grid_col in g0.grid.colnames:
                        if cfilter in grid_col:
                            rgridcols.append(grid_col)

        # delete column(s)
        nseds = np.delete(g0.seds, rindxs, 1)
        nlamb = np.delete(g0.lamb, rindxs, 0)
        nfilters = np.delete(filters, rindxs, 0)
        for rcol in rgridcols:
            g0.grid.remove_column(rcol)

        print("orig filters: {}".format(" ".join(filters)))
        print(" new filters: {}".format(" ".join(nfilters)))

        # save the modified grid
        g = SEDGrid(np.array(nlamb), seds=nseds, grid=g0.grid, backend="memory")
        g.header["filters"] = " ".join(nfilters)
        if physgrid_outfile is not None:
            g.write(physgrid_outfile)
        elif outbase is not None:
            g.write("{}_seds.grid.hd5".format(outbase))
        else:
            raise ValueError("Need to set either outbase or physgrid_outfile")

    # if obsgrid set, process the observation model
    if obsgrid is not None:
        obsgrid = noisemodel.get_noisemodelcat(obsgrid)
        with tables.open_file("{}_noisemodel.grid.hd5".format(outbase), "w") as outfile:
            outfile.create_array(
                outfile.root, "bias", np.delete(obsgrid["bias"], rindxs, 1)
            )
            outfile.create_array(
                outfile.root, "error", np.delete(obsgrid["error"], rindxs, 1)
            )
            outfile.create_array(
                outfile.root,
                "completeness",
                np.delete(obsgrid["completeness"], rindxs, 1),
            )
Esempio n. 6
0
def trim_models(
    sedgrid,
    sedgrid_noisemodel,
    obsdata,
    sed_outname,
    noisemodel_outname,
    sigma_fac=3.0,
    n_detected=4,
    inFlux=True,
    trunchen=False,
):
    """
    For a given set of observations, there will be models that are so
    bright or faint that they will always have ~0 probability of fitting
    the data.  This program trims those models out of the SED grid
    so that time is not spent calculating model points that are always
    zero probability.

    Parameters
    ----------
    sedgrid : grid.SEDgrid instance
        model grid
    sedgrid_noisemodel : beast noisemodel instance
        noise model data
    obsdata : Observation object instance
        observation catalog
    sed_outname : str
        name for output sed file
    noisemodel_outname : str
        name for output noisemodel file
    sigma_fac : float, optional
        factor for trimming the upper and lower range of grid so that
        the model range cuts off sigma_fac above and below the brightest
        and faintest models, respectively (default: 3.)
    n_detected : int, optional
        minimum number of bands where ASTs yielded a detection for
        a given model, if fewer detections than n_detected this model
        gets eliminated (default: 4)
    inFlux : bool, optional
        if true data are in fluxes (default: True)
    trunchen : bool, optional
        if true use the trunchen noise model (default: False)
    """
    # Store the brigtest and faintest fluxes in each band (for data and asts)
    n_filters = len(obsdata.filters)
    min_data = np.zeros(n_filters)
    max_data = np.zeros(n_filters)
    min_models = np.zeros(n_filters)
    max_models = np.zeros(n_filters)
    for k, filtername in enumerate(obsdata.filters):
        sfiltname = obsdata.filter_aliases[filtername]
        if inFlux:
            min_data[k] = np.amin(obsdata.data[sfiltname] *
                                  obsdata.vega_flux[k])
            max_data[k] = np.amax(obsdata.data[sfiltname] *
                                  obsdata.vega_flux[k])
        else:
            min_data[k] = np.amin(10**(-0.4 * obsdata.data[sfiltname]) *
                                  obsdata.vega_flux[k])
            max_data[k] = np.amax(10**(-0.4 * obsdata.data[sfiltname]) *
                                  obsdata.vega_flux[k])

        min_models[k] = np.amin(sedgrid.seds[:, k])
        max_models[k] = np.amax(sedgrid.seds[:, k])

    # link to the noisemodel values
    model_bias = sedgrid_noisemodel["bias"]
    model_unc = sedgrid_noisemodel["error"]
    model_compl = sedgrid_noisemodel["completeness"]
    if trunchen:
        model_q_norm = sedgrid_noisemodel["q_norm"]
        model_icov_diag = sedgrid_noisemodel["icov_diag"]
        model_icov_offdiag = sedgrid_noisemodel["icov_offdiag"]

    # has to be complete in all filters - otherwise observation model not defined
    # toothpick model means that if compl = 0, then bias = 0, and sigma = 0 from ASTs
    above_ast = model_compl > 0
    sum_above_ast = np.sum(above_ast, axis=1)
    (indxs, ) = np.where(sum_above_ast >= n_filters)
    n_ast_indxs = len(indxs)

    print("number of original models = ", len(sedgrid.seds[:, 0]))
    print("number of ast trimmed models = ", n_ast_indxs)

    if n_ast_indxs <= 0:
        raise ValueError("no models are brighter than the minimum ASTs run")

    # Find models with fluxes (with margin) between faintest and brightest data
    for k in range(n_filters):
        print("working on filter # = ", k)

        # Get upper and lower values for the models given the noise model
        #  sigma_fac defaults to 3.
        model_val = sedgrid.seds[indxs, k] + model_bias[indxs, k]
        model_down = model_val - sigma_fac * model_unc[indxs, k]
        model_up = model_val + sigma_fac * model_unc[indxs, k]

        # print(k, min(model_val), max(model_val), min(model_bias[indxs, k]))

        (nindxs, ) = np.where((model_up >= min_data[k])
                              & (model_down <= max_data[k]))
        if len(nindxs) > 0:
            indxs = indxs[nindxs]

    if len(indxs) == 0:
        raise ValueError("no models that are within the data range")

    print("number of original models = ", len(sedgrid.seds[:, 0]))
    print("number of ast trimmed models = ", n_ast_indxs)
    print("number of trimmed models = ", len(indxs))

    # Save the grid
    print("Writing trimmed sedgrid to disk into {0:s}".format(sed_outname))
    cols = {}
    for key in list(sedgrid.grid.keys()):
        cols[key] = sedgrid.grid[key][indxs]

    # New column to save the index of the model in the full grid
    cols["fullgrid_idx"] = indxs.astype(int)
    g = SEDGrid(sedgrid.lamb,
                seds=sedgrid.seds[indxs],
                grid=Table(cols),
                backend="memory")
    filternames = obsdata.filters
    g.header["filters"] = " ".join(filternames)

    # trimmed grid name
    g.write(sed_outname)

    # save the trimmed noise model
    print("Writing trimmed noisemodel to disk into {0:s}".format(
        noisemodel_outname))
    with tables.open_file(noisemodel_outname, "w") as outfile:
        outfile.create_array(outfile.root, "bias", model_bias[indxs])
        outfile.create_array(outfile.root, "error", model_unc[indxs])
        outfile.create_array(outfile.root, "completeness", model_compl[indxs])
        if trunchen:
            outfile.create_array(outfile.root, "q_norm", model_q_norm[indxs])
            outfile.create_array(outfile.root, "icov_diag",
                                 model_icov_diag[indxs])
            outfile.create_array(outfile.root, "icov_offdiag",
                                 model_icov_offdiag[indxs])