Beispiel #1
0
    def load_datasets(self, hdf_file: h5py._hl.files.File) -> None:
        """
        Creates a dictionary of HDF datasets (`self.ds`) which have been previously
        created (for restart jobs only).

        Args:
        ----
            hdf_file (h5py._hl.files.File) : HDF file containing all the datasets.
        """
        self.ds = {}  # initialize dictionary of datasets

        # use the names of the datasets as the keys in `self.ds`
        for ds_name in self.dataset_names:
            self.ds[ds_name] = hdf_file.get(ds_name)
Beispiel #2
0
    def create_datasets(self, hdf_file: h5py._hl.files.File) -> None:
        """
        Creates a dictionary of HDF5 datasets (`self.ds`).

        Args:
        ----
            hdf_file (h5py._hl.files.File) : HDF5 file which will contain datasets.
        """
        self.ds = {}  # initialize

        for ds_name in self.dataset_names:
            self.ds[ds_name] = hdf_file.create_dataset(
                ds_name,
                (self.total_n_subgraphs, *self.dims[ds_name]),
                chunks=True,  # must be True for resizing later
                dtype=np.dtype("int8"))
Beispiel #3
0
def write_data(
    model: str,
    parameters: List[str],
    database: h5py._hl.files.File,
    data_sorted: List[np.ndarray],
) -> None:
    """
    Function for writing the model spectra and parameters to the
    database.

    Parameters
    ----------
    model : str
        Atmosphere model.
    parameters : list(str)
        Model parameters.
    database: h5py._hl.files.File
        Database.
    data_sorted : list(np.ndarray)
        Sorted model data with the parameter values, wavelength
        points (um), and flux densities (W m-2 um-1).

    Returns
    -------
    NoneType
        None
    """

    n_param = len(parameters)

    if f"models/{model}" in database:
        del database[f"models/{model}"]

    dset = database.create_group(f"models/{model}")

    dset.attrs["n_param"] = n_param

    for i, item in enumerate(parameters):
        dset.attrs[f"parameter{i}"] = item

        database.create_dataset(f"models/{model}/{item}", data=data_sorted[i])

    database.create_dataset(f"models/{model}/wavelength",
                            data=data_sorted[n_param])

    database.create_dataset(f"models/{model}/flux",
                            data=data_sorted[n_param + 1])
Beispiel #4
0
def add_missing(model: str, parameters: List[str],
                database: h5py._hl.files.File) -> None:
    """
    Function for adding missing grid points with a linear
    interpolation.

    Parameters
    ----------
    model : str
        Atmosphere model.
    parameters : list(str)
        Model parameters.
    database : h5py._hl.files.File
        Database.

    Returns
    -------
    NoneType
        None
    """

    print("Number of grid points per parameter:")

    grid_shape = []
    param_data = []

    for i, item in enumerate(parameters):
        grid_shape.append(database[f"models/{model}/{item}"].shape[0])
        param_data.append(np.asarray(database[f"models/{model}/{item}"]))
        print(f"   - {item}: {grid_shape[i]}")

    flux = np.asarray(database[f"models/{model}/flux"])  # (W m-1 um-1)
    flux = np.log10(flux)

    count_total = 0
    count_interp = 0
    count_missing = 0

    if len(parameters) == 1:
        # Blackbody spectra
        pass

    elif len(parameters) == 2:
        find_missing = np.zeros(grid_shape, dtype=bool)

        values = []
        points = [[], []]
        new_points = [[], []]

        print("Fix missing grid points with a linear interpolation:")

        for i in range(grid_shape[0]):
            for j in range(grid_shape[1]):
                if np.isinf(np.sum(flux[i, j, ...])):
                    print("   - ", end="")
                    print(f"{parameters[0]} = {param_data[0][i]}, ", end="")
                    print(f"{parameters[1]} = {param_data[1][j]}")

                    if 0 < i < grid_shape[0] - 1:
                        check_low = np.isinf(np.sum(flux[i - 1, j, ...]))
                        check_up = np.isinf(np.sum(flux[i + 1, j, ...]))

                        # Linear scaling of the intermediate Teff point
                        scaling = (param_data[0][i] - param_data[0][i - 1]) / (
                            param_data[0][i + 1] - param_data[0][i - 1])

                        if not check_low and not check_up:
                            flux_low = flux[i - 1, j, ...]
                            flux_up = flux[i + 1, j, ...]
                            flux[i, j, ...] = (flux_low * (1.0 - scaling) +
                                               flux_up * scaling)
                            count_interp += 1

                        else:
                            find_missing[i, j] = True

                    else:
                        find_missing[i, j] = True

                else:
                    points[0].append(param_data[0][i])
                    points[1].append(param_data[1][j])

                    values.append(flux[i, j, ...])

                new_points[0].append(param_data[0][i])
                new_points[1].append(param_data[1][j])

                count_total += 1

        values = np.asarray(values)
        points = np.asarray(points)
        new_points = np.asarray(new_points)

        if np.sum(find_missing) > 0:
            flux_int = griddata(points.T,
                                values,
                                new_points.T,
                                method="linear",
                                fill_value=np.nan)

            count = 0

            for i in range(grid_shape[0]):
                for j in range(grid_shape[1]):
                    if np.isnan(np.sum(flux_int[count, :])):
                        count_missing += 1

                    elif np.isinf(np.sum(flux[i, j, ...])):
                        flux[i, j, :] = flux_int[count, :]
                        count_interp += 1

                    count += 1

            if count_missing > 0:
                print(
                    f"Could not interpolate {count_missing} grid points so storing "
                    f"zeros instead. [WARNING]\nThe grid points that are missing:"
                )

                for i in range(flux_int.shape[0]):
                    if np.isnan(np.sum(flux_int[i, :])):
                        print("   - ", end="")
                        print(f"{parameters[0]} = {new_points[0][i]}, ",
                              end="")
                        print(f"{parameters[1]} = {new_points[1][i]}")

    elif len(parameters) == 3:
        find_missing = np.zeros(grid_shape, dtype=bool)

        values = []
        points = [[], [], []]
        new_points = [[], [], []]

        print("Fix missing grid points with a linear interpolation:")

        for i in range(grid_shape[0]):
            for j in range(grid_shape[1]):
                for k in range(grid_shape[2]):
                    if np.isinf(np.sum(flux[i, j, k, ...])):
                        print("   - ", end="")
                        print(f"{parameters[0]} = {param_data[0][i]}, ",
                              end="")
                        print(f"{parameters[1]} = {param_data[1][j]}, ",
                              end="")
                        print(f"{parameters[2]} = {param_data[2][k]}")

                        if 0 < i < grid_shape[0] - 1:
                            check_low = np.isinf(np.sum(flux[i - 1, j, k,
                                                             ...]))
                            check_up = np.isinf(np.sum(flux[i + 1, j, k, ...]))

                            # Linear scaling of the intermediate Teff point
                            scaling = (param_data[0][i] - param_data[0][i - 1]
                                       ) / (param_data[0][i + 1] -
                                            param_data[0][i - 1])

                            if not check_low and not check_up:
                                flux_low = flux[i - 1, j, k, ...]
                                flux_up = flux[i + 1, j, k, ...]
                                flux[i, j, k,
                                     ...] = (flux_low * (1.0 - scaling) +
                                             flux_up * scaling)
                                count_interp += 1

                            else:
                                find_missing[i, j, k] = True

                        else:
                            find_missing[i, j, k] = True

                    else:
                        points[0].append(param_data[0][i])
                        points[1].append(param_data[1][j])
                        points[2].append(param_data[2][k])

                        values.append(flux[i, j, k, ...])

                    new_points[0].append(param_data[0][i])
                    new_points[1].append(param_data[1][j])
                    new_points[2].append(param_data[2][k])

                    count_total += 1

        values = np.asarray(values)
        points = np.asarray(points)
        new_points = np.asarray(new_points)

        if np.sum(find_missing) > 0:
            flux_int = griddata(points.T,
                                values,
                                new_points.T,
                                method="linear",
                                fill_value=np.nan)

            count = 0

            for i in range(grid_shape[0]):
                for j in range(grid_shape[1]):
                    for k in range(grid_shape[2]):
                        if np.isnan(np.sum(flux_int[count, :])):
                            count_missing += 1

                        elif np.isinf(np.sum(flux[i, j, k, ...])):
                            flux[i, j, k, :] = flux_int[count, :]
                            count_interp += 1

                        count += 1

            if count_missing > 0:
                print(
                    f"Could not interpolate {count_missing} grid points so storing "
                    f"zeros instead. [WARNING]\nThe grid points that are missing:"
                )

                for i in range(flux_int.shape[0]):
                    if np.isnan(np.sum(flux_int[i, :])):
                        print("   - ", end="")
                        print(f"{parameters[0]} = {new_points[0][i]}, ",
                              end="")
                        print(f"{parameters[1]} = {new_points[1][i]}, ",
                              end="")
                        print(f"{parameters[2]} = {new_points[2][i]}")

    elif len(parameters) == 4:
        find_missing = np.zeros(grid_shape, dtype=bool)

        values = []
        points = [[], [], [], []]
        new_points = [[], [], [], []]

        print("Fix missing grid points with a linear interpolation:")

        for i in range(grid_shape[0]):
            for j in range(grid_shape[1]):
                for k in range(grid_shape[2]):
                    for m in range(grid_shape[3]):
                        if np.isinf(np.sum(flux[i, j, k, m, ...])):
                            print("   - ", end="")
                            print(f"{parameters[0]} = {param_data[0][i]}, ",
                                  end="")
                            print(f"{parameters[1]} = {param_data[1][j]}, ",
                                  end="")
                            print(f"{parameters[2]} = {param_data[2][k]}, ",
                                  end="")
                            print(f"{parameters[3]} = {param_data[3][m]}")

                            if 0 < i < grid_shape[0] - 1:
                                check_low = np.isinf(
                                    np.sum(flux[i - 1, j, k, m, ...]))
                                check_up = np.isinf(
                                    np.sum(flux[i + 1, j, k, m, ...]))

                                # Linear scaling of the intermediate Teff point
                                scaling = (param_data[0][i] -
                                           param_data[0][i - 1]) / (
                                               param_data[0][i + 1] -
                                               param_data[0][i - 1])

                                if not check_low and not check_up:
                                    flux_low = flux[i - 1, j, k, m, ...]
                                    flux_up = flux[i + 1, j, k, m, ...]
                                    flux[i, j, k, m,
                                         ...] = (flux_low * (1.0 - scaling) +
                                                 flux_up * scaling)
                                    count_interp += 1

                                else:
                                    find_missing[i, j, k, m] = True

                            else:
                                find_missing[i, j, k, m] = True

                        else:
                            points[0].append(param_data[0][i])
                            points[1].append(param_data[1][j])
                            points[2].append(param_data[2][k])
                            points[3].append(param_data[3][m])

                            values.append(flux[i, j, k, m, ...])

                        new_points[0].append(param_data[0][i])
                        new_points[1].append(param_data[1][j])
                        new_points[2].append(param_data[2][k])
                        new_points[3].append(param_data[3][m])

                        count_total += 1

        values = np.asarray(values)
        points = np.asarray(points)
        new_points = np.asarray(new_points)

        if np.sum(find_missing) > 0:
            flux_int = griddata(points.T,
                                values,
                                new_points.T,
                                method="linear",
                                fill_value=np.nan)

            count = 0

            for i in range(grid_shape[0]):
                for j in range(grid_shape[1]):
                    for k in range(grid_shape[2]):
                        for m in range(grid_shape[3]):
                            if np.isnan(np.sum(flux_int[count, :])):
                                count_missing += 1

                            elif np.isinf(np.sum(flux[i, j, k, m, ...])):
                                flux[i, j, k, m, :] = flux_int[count, :]
                                count_interp += 1

                            count += 1

            if count_missing > 0:
                print(
                    f"Could not interpolate {count_missing} grid points so storing "
                    f"zeros instead. [WARNING]\nThe grid points that are missing:"
                )

                for i in range(flux_int.shape[0]):
                    if np.isnan(np.sum(flux_int[i, :])):
                        print("   - ", end="")
                        print(f"{parameters[0]} = {new_points[0][i]}, ",
                              end="")
                        print(f"{parameters[1]} = {new_points[1][i]}, ",
                              end="")
                        print(f"{parameters[2]} = {new_points[2][i]}, ",
                              end="")
                        print(f"{parameters[3]} = {new_points[3][i]}")

        # ran_par_0 = np.random.randint(grid_shape[0], size=1000)
        # ran_par_1 = np.random.randint(grid_shape[1], size=1000)
        # ran_par_2 = np.random.randint(grid_shape[2], size=1000)
        # ran_par_3 = np.random.randint(grid_shape[3], size=1000)
        #
        # for z in range(ran_par_0.shape[0]):
        #     i = ran_par_0[z]
        #     j = ran_par_1[z]
        #     k = ran_par_2[z]
        #     m = ran_par_3[z]
        #
        #     if 0 < i < grid_shape[0]-1:
        #         check_low = np.isinf(np.sum(flux[i-1, j, k, m, ...]))
        #         check_up = np.isinf(np.sum(flux[i+1, j, k, m, ...]))
        #
        #         # Linear scaling of the intermediate Teff point
        #         scaling = (param_data[0][i] - param_data[0][i-1]) / \
        #                   (param_data[0][i+1] - param_data[0][i-1])
        #
        #         if not check_low and not check_up:
        #             flux_low = flux[i-1, j, k, m, ...]
        #             flux_up = flux[i+1, j, k, m, ...]
        #             flux[i, j, k, m, ...] = flux_low*(1.-scaling) + flux_up*scaling

    elif len(parameters) == 5:
        find_missing = np.zeros(grid_shape, dtype=bool)

        values = []
        points = [[], [], [], [], []]
        new_points = [[], [], [], [], []]

        print("Fix missing grid points with a linear interpolation:")

        for i in range(grid_shape[0]):
            for j in range(grid_shape[1]):
                for k in range(grid_shape[2]):
                    for m in range(grid_shape[3]):
                        for n in range(grid_shape[4]):
                            if np.isinf(np.sum(flux[i, j, k, m, n, ...])):
                                print("   - ", end="")
                                print(
                                    f"{parameters[0]} = {param_data[0][i]}, ",
                                    end="")
                                print(
                                    f"{parameters[1]} = {param_data[1][j]}, ",
                                    end="")
                                print(
                                    f"{parameters[2]} = {param_data[2][k]}, ",
                                    end="")
                                print(
                                    f"{parameters[3]} = {param_data[3][m]}, ",
                                    end="")
                                print(f"{parameters[4]} = {param_data[4][n]}")

                                if 0 < i < grid_shape[0] - 1:
                                    check_low = np.isinf(
                                        np.sum(flux[i - 1, j, k, m, n, ...]))
                                    check_up = np.isinf(
                                        np.sum(flux[i + 1, j, k, m, n, ...]))

                                    # Linear scaling of the intermediate Teff point
                                    scaling = (param_data[0][i] -
                                               param_data[0][i - 1]) / (
                                                   param_data[0][i + 1] -
                                                   param_data[0][i - 1])

                                    if not check_low and not check_up:
                                        flux_low = flux[i - 1, j, k, m, n, ...]
                                        flux_up = flux[i + 1, j, k, m, n, ...]
                                        flux[i, j, k, m, n,
                                             ...] = (flux_low *
                                                     (1.0 - scaling) +
                                                     flux_up * scaling)
                                        count_interp += 1

                                    else:
                                        find_missing[i, j, k, m, n] = True

                                else:
                                    find_missing[i, j, k, m, n] = True

                            else:
                                points[0].append(param_data[0][i])
                                points[1].append(param_data[1][j])
                                points[2].append(param_data[2][k])
                                points[3].append(param_data[3][m])
                                points[4].append(param_data[4][n])

                                values.append(flux[i, j, k, m, n, ...])

                            new_points[0].append(param_data[0][i])
                            new_points[1].append(param_data[1][j])
                            new_points[2].append(param_data[2][k])
                            new_points[3].append(param_data[3][m])
                            new_points[4].append(param_data[4][n])

                            count_total += 1

        values = np.asarray(values)
        points = np.asarray(points)
        new_points = np.asarray(new_points)

        if np.sum(find_missing) > 0:
            flux_int = griddata(points.T,
                                values,
                                new_points.T,
                                method="linear",
                                fill_value=np.nan)

            count = 0

            for i in range(grid_shape[0]):
                for j in range(grid_shape[1]):
                    for k in range(grid_shape[2]):
                        for m in range(grid_shape[3]):
                            for n in range(grid_shape[4]):
                                if np.isnan(np.sum(flux_int[count, :])):
                                    count_missing += 1

                                elif np.isinf(np.sum(flux[i, j, k, m, n,
                                                          ...])):
                                    flux[i, j, k, m, n, :] = flux_int[count, :]
                                    count_interp += 1

                                count += 1

            if count_missing > 0:
                print(
                    f"Could not interpolate {count_missing} grid points so storing"
                    f"zeros instead. [WARNING]\nThe grid points that are missing:"
                )

                for i in range(flux_int.shape[0]):
                    if np.isnan(np.sum(flux_int[i, :])):
                        print("   - ", end="")
                        print(f"{parameters[0]} = {new_points[0][i]}, ",
                              end="")
                        print(f"{parameters[1]} = {new_points[1][i]}, ",
                              end="")
                        print(f"{parameters[2]} = {new_points[2][i]}, ",
                              end="")
                        print(f"{parameters[3]} = {new_points[3][i]}, ",
                              end="")
                        print(f"{parameters[4]} = {new_points[4][i]}")

    else:
        raise ValueError(
            "The add_missing function is currently not compatible "
            "with more than 5 model parameters.")

    print(f"Number of stored grid points: {count_total}")
    print(f"Number of interpolated grid points: {count_interp}")
    print(f"Number of missing grid points: {count_missing}")

    del database[f"models/{model}/flux"]
    database.create_dataset(f"models/{model}/flux", data=10.0**flux)
Beispiel #5
0
def add_bonnefoy2014(input_path: str, database: h5py._hl.files.File) -> None:
    """
    Function for adding the SINFONI spectra of young, M- and L-type dwarfs from
    `Bonnefoy et al. (2014) <https://ui.adsabs.harvard.edu/abs/2014A%26A...562A.127B/abstract>`_
    to the database.

    Parameters
    ----------
    input_path : str
        Path of the data folder.
    database : h5py._hl.files.File
        The HDF5 database.

    Returns
    -------
    NoneType
        None
    """

    print_text = "spectra of young M/L type objects from Bonnefoy et al. 2014"

    data_url = "http://cdsarc.u-strasbg.fr/viz-bin/nph-Cat/tar.gz?J/A+A/562/A127/"
    data_file = os.path.join(input_path, "J_A+A_562_A127.tar.gz")
    data_folder = os.path.join(input_path, "bonnefoy+2014/")

    if not os.path.isfile(data_file):
        print(f"Downloading {print_text} (2.3 MB)...", end="", flush=True)
        urllib.request.urlretrieve(data_url, data_file)
        print(" [DONE]")

    if os.path.exists(data_folder):
        shutil.rmtree(data_folder)

    print(f"Unpacking {print_text} (2.3 MB)...", end="", flush=True)
    tar = tarfile.open(data_file)
    tar.extractall(data_folder)
    tar.close()
    print(" [DONE]")

    spec_dict = {}

    with gzip.open(os.path.join(data_folder, "stars.dat.gz"),
                   "r") as gzip_file:
        for line in gzip_file:
            name = line[:13].decode().strip()
            files = line[80:].decode().strip().split()
            sptype = line[49:56].decode().strip()

            if name == "NAME 2M1207A":
                name = "2M1207A"

            if len(sptype) == 0:
                sptype = None
            elif "." in sptype:
                sptype = sptype[:4]
            else:
                sptype = sptype[:2]

            if name == "Cha1109":
                sptype = "M9"
            elif name == "DH Tau B":
                sptype = "M9"
            elif name == "TWA 22A":
                sptype = "M6"
            elif name == "TWA 22B":
                sptype = "M6"
            elif name == "CT Cha b":
                sptype = "M9"

            spec_dict[name] = {"name": name, "sptype": sptype, "files": files}

    database.create_group("spectra/bonnefoy+2014")

    fits_folder = os.path.join(data_folder, "sp")

    print_message = ""

    for _, _, files in os.walk(fits_folder):
        for _, filename in enumerate(files):
            fname_split = filename.split("_")

            data = fits.getdata(os.path.join(fits_folder, filename))

            for name, value in spec_dict.items():
                if filename in value["files"]:
                    if name == "TWA 22AB":
                        # Binary spectrum
                        continue

                    if "JHK.fits" in fname_split:
                        spec_dict[name]["JHK"] = data

                    elif "J" in fname_split:
                        spec_dict[name]["J"] = data

                    elif "H+K" in fname_split or "HK" in fname_split:
                        spec_dict[name]["HK"] = data

    for name, value in spec_dict.items():
        empty_message = len(print_message) * " "
        print(f"\r{empty_message}", end="")

        print_message = f"Adding spectra... {name}"
        print(f"\r{print_message}", end="")

        if "JHK" in value:
            sp_data = value["JHK"]

        elif "J" in value and "HK" in value:
            sp_data = np.vstack((value["J"], value["HK"]))

        else:
            continue

        dset = database.create_dataset(f"spectra/bonnefoy+2014/{name}",
                                       data=sp_data)

        dset.attrs["name"] = str(name).encode()
        dset.attrs["sptype"] = str(value["sptype"]).encode()

    empty_message = len(print_message) * " "
    print(f"\r{empty_message}", end="")

    print_message = "Adding spectra... [DONE]"
    print(f"\r{print_message}")

    database.close()
Beispiel #6
0
def add_spex(input_path: str, database: h5py._hl.files.File) -> None:
    """
    Function for adding the SpeX Prism Spectral Library to the database.

    Parameters
    ----------
    input_path : str
        Path of the data folder.
    database : h5py._hl.files.File
        The HDF5 database.

    Returns
    -------
    NoneType
        None
    """

    parallax_url = "https://home.strw.leidenuniv.nl/~stolker/species/parallax.dat"
    parallax_file = os.path.join(input_path, "parallax.dat")

    if not os.path.isfile(parallax_file):
        urllib.request.urlretrieve(parallax_url, parallax_file)

    parallax_data = pd.pandas.read_csv(
        parallax_file,
        usecols=[0, 1, 2],
        names=["object", "parallax", "parallax_error"],
        delimiter=",",
        dtype={"object": str, "parallax": float, "parallax_error": float},
    )

    database.create_group("spectra/spex")

    data_path = os.path.join(input_path, "spex")

    if not os.path.exists(data_path):
        os.makedirs(data_path)

    url_all = "http://svo2.cab.inta-csic.es/vocats/v2/spex/cs.php?" \
              "RA=180.000000&DEC=0.000000&SR=180.000000&VERB=2"

    xml_file_spex = os.path.join(data_path, "spex.xml")

    if not os.path.isfile(xml_file_spex):
        urllib.request.urlretrieve(url_all, xml_file_spex)

    table = parse_single_table(xml_file_spex)
    # name = table.array['name']
    twomass = table.array["name2m"]
    url = table.array["access_url"]

    unique_id = []

    print_message = ""

    for i, item in enumerate(url):
        if twomass[i] not in unique_id:

            if isinstance(twomass[i], str):
                xml_file_1 = os.path.join(data_path, twomass[i] + ".xml")
            else:
                # Use decode for backward compatibility
                xml_file_1 = os.path.join(
                    data_path, twomass[i].decode("utf-8") + ".xml"
                )

            if not os.path.isfile(xml_file_1):
                if isinstance(item, str):
                    urllib.request.urlretrieve(item, xml_file_1)
                else:
                    urllib.request.urlretrieve(item.decode("utf-8"), xml_file_1)

            table = parse_single_table(xml_file_1)
            name = table.array["ID"]
            url = table.array["access_url"]

            if isinstance(name[0], str):
                name = name[0]
            else:
                name = name[0].decode("utf-8")

            empty_message = len(print_message) * " "
            print(f"\r{empty_message}", end="")

            print_message = f"Downloading SpeX Prism Spectral Library... {name}"
            print(f"\r{print_message}", end="")

            xml_file_2 = os.path.join(data_path, f"spex_{name}.xml")

            if not os.path.isfile(xml_file_2):
                if isinstance(url[0], str):
                    urllib.request.urlretrieve(url[0], xml_file_2)
                else:
                    urllib.request.urlretrieve(url[0].decode("utf-8"), xml_file_2)

            unique_id.append(twomass[i])

    empty_message = len(print_message) * " "
    print(f"\r{empty_message}", end="")

    print_message = "Downloading SpeX Prism Spectral Library... [DONE]"
    print(f"\r{print_message}")

    h_twomass = photometry.SyntheticPhotometry("2MASS/2MASS.H")

    # 2MASS H band zero point for 0 mag (Cogen et al. 2003)
    h_zp = 1.133e-9  # (W m-2 um-1)

    for votable in os.listdir(data_path):
        if votable.startswith("spex_") and votable.endswith(".xml"):
            xml_file = os.path.join(data_path, votable)

            table = parse_single_table(xml_file)

            wavelength = table.array["wavelength"]  # (Angstrom)
            flux = table.array["flux"]  # Normalized units

            wavelength = np.array(wavelength * 1e-4)  # (um)
            flux = np.array(flux)  # (a.u.)
            error = np.full(flux.size, np.nan)

            # 2MASS magnitudes
            j_mag = table.get_field_by_id("jmag").value
            h_mag = table.get_field_by_id("hmag").value
            ks_mag = table.get_field_by_id("ksmag").value

            if not isinstance(j_mag, str):
                j_mag = j_mag.decode("utf-8")

            if not isinstance(h_mag, str):
                h_mag = h_mag.decode("utf-8")

            if not isinstance(ks_mag, str):
                ks_mag = ks_mag.decode("utf-8")

            if j_mag == "":
                j_mag = np.nan
            else:
                j_mag = float(j_mag)

            if h_mag == "":
                h_mag = np.nan
            else:
                h_mag = float(h_mag)

            if ks_mag == "":
                ks_mag = np.nan
            else:
                ks_mag = float(ks_mag)

            name = table.get_field_by_id("name").value

            if not isinstance(name, str):
                name = name.decode("utf-8")

            twomass_id = table.get_field_by_id("name2m").value

            if not isinstance(twomass_id, str):
                twomass_id = twomass_id.decode("utf-8")

            # Optical spectral type

            try:
                sptype_opt = table.get_field_by_id("optspty").value

                if not isinstance(sptype_opt, str):
                    sptype_opt = sptype_opt.decode("utf-8")

                sptype_opt = data_util.update_sptype(np.array([sptype_opt]))[0]

            except KeyError:
                sptype_opt = None

            # Near-infrared spectral type

            try:
                sptype_nir = table.get_field_by_id("nirspty").value

                if not isinstance(sptype_nir, str):
                    sptype_nir = sptype_nir.decode("utf-8")

                sptype_nir = data_util.update_sptype(np.array([sptype_nir]))[0]

            except KeyError:
                sptype_nir = None

            h_flux, _ = h_twomass.magnitude_to_flux(h_mag, error=None, zp_flux=h_zp)
            phot = h_twomass.spectrum_to_flux(wavelength, flux)  # Normalized units

            flux *= h_flux / phot[0]  # (W m-2 um-1)

            spdata = np.column_stack([wavelength, flux, error])

            simbad_id = query_util.get_simbad(f"2MASS {twomass_id}")

            if simbad_id is not None:
                if not isinstance(simbad_id, str):
                    simbad_id = simbad_id.decode("utf-8")

                par_select = parallax_data[parallax_data["object"] == simbad_id]

                if not par_select.empty:
                    parallax = (
                        par_select["parallax"].values[0],
                        par_select["parallax_error"].values[0],
                    )

                else:
                    parallax = (np.nan, np.nan)

            else:
                parallax = (np.nan, np.nan)

            print_message = f"Adding spectra... {name}"
            print(f"\r{print_message:<72}", end="")

            dset = database.create_dataset(f"spectra/spex/{name}", data=spdata)

            dset.attrs["name"] = str(name).encode()

            if sptype_opt is not None:
                dset.attrs["sptype"] = str(sptype_opt).encode()
            elif sptype_nir is not None:
                dset.attrs["sptype"] = str(sptype_nir).encode()
            else:
                dset.attrs["sptype"] = str("None").encode()

            dset.attrs["simbad"] = str(simbad_id).encode()
            dset.attrs["2MASS/2MASS.J"] = j_mag
            dset.attrs["2MASS/2MASS.H"] = h_mag
            dset.attrs["2MASS/2MASS.Ks"] = ks_mag
            dset.attrs["parallax"] = parallax[0]  # (mas)
            dset.attrs["parallax_error"] = parallax[1]  # (mas)

    print_message = "Adding spectra... [DONE]"
    print(f"\r{print_message:<72}")

    database.close()
Beispiel #7
0
def add_kesseli2017(input_path: str, database: h5py._hl.files.File) -> None:
    """
    Function for adding the SDSS stellar spectra from Kesseli et al. (2017) to the database.

    Parameters
    ----------
    input_path : str
        Path of the data folder.
    database : h5py._hl.files.File
        The HDF5 database.

    Returns
    -------
    NoneType
        None
    """

    data_url = "https://cdsarc.unistra.fr/viz-bin/nph-Cat/tar.gz?J/ApJS/230/16"
    data_file = os.path.join(input_path, "J_ApJS_230_16.tar.gz")
    data_folder = os.path.join(input_path, "kesseli+2017/")

    if not os.path.isfile(data_file):
        print(
            "Downloading SDSS spectra from Kesseli et al. 2017 (145 MB)...",
            end="",
            flush=True,
        )
        urllib.request.urlretrieve(data_url, data_file)
        print(" [DONE]")

    if os.path.exists(data_folder):
        shutil.rmtree(data_folder)

    print(
        "Unpacking SDSS spectra from Kesseli et al. 2017 (145 MB)...",
        end="",
        flush=True,
    )
    tar = tarfile.open(data_file)
    tar.extractall(data_folder)
    tar.close()
    print(" [DONE]")

    database.create_group("spectra/kesseli+2017")

    fits_folder = os.path.join(data_folder, "fits")

    print_message = ""

    for _, _, files in os.walk(fits_folder):
        for _, filename in enumerate(files):
            with fits.open(os.path.join(fits_folder, filename)) as hdu_list:
                data = hdu_list[1].data

                wavelength = 1e-4 * 10.0**data["LogLam"]  # (um)
                flux = data["Flux"]  # Normalized units
                error = data["PropErr"]  # Normalized units

                name = filename[:-5].replace("_", " ")

                file_split = filename.split("_")
                file_split = file_split[0].split(".")

                sptype = file_split[0]

                spdata = np.column_stack([wavelength, flux, error])

                empty_message = len(print_message) * " "
                print(f"\r{empty_message}", end="")

                print_message = f"Adding spectra... {name}"
                print(f"\r{print_message}", end="")

                dset = database.create_dataset(f"spectra/kesseli+2017/{name}",
                                               data=spdata)

                dset.attrs["name"] = str(name).encode()
                dset.attrs["sptype"] = str(sptype).encode()

    empty_message = len(print_message) * " "
    print(f"\r{empty_message}", end="")

    print_message = "Adding spectra... [DONE]"
    print(f"\r{print_message}")

    database.close()
Beispiel #8
0
def add_optical_constants(input_path: str,
                          database: h5py._hl.files.File) -> None:
    """
    Function for adding the optical constants of crystalline and amorphous
    MgSiO3 and Fe to the database.

    Parameters
    ----------
    input_path : str
        Folder where the data is located.
    database : h5py._hl.files.File
        Database.

    Returns
    -------
    None
        NoneType
    """

    if not os.path.exists(input_path):
        os.makedirs(input_path)

    url = "https://home.strw.leidenuniv.nl/~stolker/species/optical_constants.zip"

    data_file = os.path.join(input_path, "optical_constants.zip")

    if not os.path.isfile(data_file):
        print("Downloading optical constants (87 kB)...", end="", flush=True)
        urllib.request.urlretrieve(url, data_file)
        print(" [DONE]")

    print("Unpacking optical constants...", end="", flush=True)

    with zipfile.ZipFile(data_file, "r") as zip_ref:
        zip_ref.extractall(input_path)

    print(" [DONE]")

    print("Adding optical constants of MgSiO3...", end="")

    nk_file = os.path.join(
        input_path,
        "optical_constants/mgsio3/crystalline/"
        "mgsio3_jaeger_98_scott_96_axis1.dat",
    )

    data = np.loadtxt(nk_file)

    database.create_dataset("dust/mgsio3/crystalline/axis_1", data=data)

    nk_file = os.path.join(
        input_path,
        "optical_constants/mgsio3/crystalline/"
        "mgsio3_jaeger_98_scott_96_axis2.dat",
    )

    data = np.loadtxt(nk_file)

    database.create_dataset("dust/mgsio3/crystalline/axis_2", data=data)

    nk_file = os.path.join(
        input_path,
        "optical_constants/mgsio3/crystalline/"
        "mgsio3_jaeger_98_scott_96_axis3.dat",
    )

    data = np.loadtxt(nk_file)

    database.create_dataset("dust/mgsio3/crystalline/axis_3", data=data)

    nk_file = os.path.join(
        input_path,
        "optical_constants/mgsio3/amorphous/"
        "mgsio3_jaeger_2003_reformat.dat",
    )

    data = np.loadtxt(nk_file)

    database.create_dataset("dust/mgsio3/amorphous", data=data)

    print(" [DONE]")

    print("Adding optical constants of Fe...", end="")

    nk_file = os.path.join(
        input_path, "optical_constants/fe/crystalline/fe_henning_1996.dat")
    data = np.loadtxt(nk_file)
    database.create_dataset("dust/fe/crystalline", data=data)

    nk_file = os.path.join(
        input_path, "optical_constants/fe/amorphous/fe_pollack_1994.dat")
    data = np.loadtxt(nk_file)
    database.create_dataset("dust/fe/amorphous", data=data)

    print(" [DONE]")
Beispiel #9
0
def add_cross_sections(input_path: str, database: h5py._hl.files.File) -> None:
    """
    Function for adding the extinction cross section of
    crystalline MgSiO3 for a log-normal and power-law size
    distribution to the database.

    Parameters
    ----------
    input_path : str
        Folder where the data is located.
    database : h5py._hl.files.File
        Database.

    Returns
    -------
    None
        NoneType
    """

    if not os.path.exists(input_path):
        os.makedirs(input_path)

    url = "https://home.strw.leidenuniv.nl/~stolker/species/lognorm_mgsio3_c_ext.fits"

    data_file = os.path.join(input_path, "lognorm_mgsio3_c_ext.fits")

    print("Downloading log-normal dust cross sections (231 kB)...",
          end="",
          flush=True)
    urllib.request.urlretrieve(url, data_file)
    print(" [DONE]")

    print("Adding log-normal dust cross sections:")

    with fits.open(os.path.join(input_path,
                                "lognorm_mgsio3_c_ext.fits")) as hdu_list:
        database.create_dataset(
            "dust/lognorm/mgsio3/crystalline/cross_section",
            data=hdu_list[0].data)

        print(
            f"   - Data shape (n_wavelength, n_radius, n_sigma): {hdu_list[0].data.shape}"
        )

        database.create_dataset("dust/lognorm/mgsio3/crystalline/wavelength",
                                data=hdu_list[1].data)

        data_range = f"{np.amin(hdu_list[1].data)} - {np.amax(hdu_list[1].data)}"
        print(f"   - Wavelength range: {data_range} um")

        database.create_dataset("dust/lognorm/mgsio3/crystalline/radius_g",
                                data=hdu_list[2].data)

        data_range = f"{np.amin(hdu_list[2].data)} - {np.amax(hdu_list[2].data)}"
        print(f"   - Mean geometric radius range: {data_range} um")

        database.create_dataset("dust/lognorm/mgsio3/crystalline/sigma_g",
                                data=hdu_list[3].data)

        data_range = f"{np.amin(hdu_list[3].data)} - {np.amax(hdu_list[3].data)}"
        print(f"   - Geometric standard deviation range: {data_range}")

    url = "https://home.strw.leidenuniv.nl/~stolker/species/powerlaw_mgsio3_c_ext.fits"

    data_file = os.path.join(input_path, "powerlaw_mgsio3_c_ext.fits")

    print("Downloading power-law dust cross sections (231 kB)...",
          end="",
          flush=True)
    urllib.request.urlretrieve(url, data_file)
    print(" [DONE]")

    print("Adding power-law dust cross sections")

    with fits.open(os.path.join(input_path,
                                "powerlaw_mgsio3_c_ext.fits")) as hdu_list:
        database.create_dataset(
            "dust/powerlaw/mgsio3/crystalline/cross_section",
            data=hdu_list[0].data)

        print(
            f"   - Data shape (n_wavelength, n_radius, n_exponent): {hdu_list[0].data.shape}"
        )

        database.create_dataset("dust/powerlaw/mgsio3/crystalline/wavelength",
                                data=hdu_list[1].data)

        data_range = f"{np.amin(hdu_list[1].data)} - {np.amax(hdu_list[1].data)}"
        print(f"   - Wavelength range: {data_range} um")

        database.create_dataset("dust/powerlaw/mgsio3/crystalline/radius_max",
                                data=hdu_list[2].data)

        data_range = f"{np.amin(hdu_list[2].data)} - {np.amax(hdu_list[2].data)}"
        print(f"   - Maximum grain radius range: {data_range} um")

        database.create_dataset("dust/powerlaw/mgsio3/crystalline/exponent",
                                data=hdu_list[3].data)

        data_range = f"{np.amin(hdu_list[3].data)} - {np.amax(hdu_list[3].data)}"
        print(f"   - Power-law exponent range: {data_range}")
Beispiel #10
0
def add_irtf(input_path: str,
             database: h5py._hl.files.File,
             sptypes: Optional[List[str]] = None) -> None:
    """
    Function for adding the IRTF Spectral Library to the database.

    Parameters
    ----------
    input_path : str
        Path of the data folder.
    database : h5py._hl.files.File
        Database.
    sptypes : list(str, ), None
        List with the spectral types ('F', 'G', 'K', 'M', 'L', 'T'). All spectral types are
        included if set to ``None``.

    Returns
    -------
    NoneType
        None
    """

    if sptypes is None:
        sptypes = ['F', 'G', 'K', 'M', 'L', 'T']

    distance_url = 'https://people.phys.ethz.ch/~stolkert/species/distance.dat'
    distance_file = os.path.join(input_path, 'distance.dat')

    if not os.path.isfile(distance_file):
        urllib.request.urlretrieve(distance_url, distance_file)

    distance_data = pd.pandas.read_csv(distance_file,
                                       usecols=[0, 3, 4],
                                       names=['object', 'distance', 'distance_error'],
                                       delimiter=',',
                                       dtype={'object': str,
                                              'distance': float,
                                              'distance_error': float})

    datadir = os.path.join(input_path, 'irtf')

    if not os.path.exists(datadir):
        os.makedirs(datadir)

    data_file = {'F': os.path.join(input_path, 'irtf/F_fits_091201.tar'),
                 'G': os.path.join(input_path, 'irtf/G_fits_091201.tar'),
                 'K': os.path.join(input_path, 'irtf/K_fits_091201.tar'),
                 'M': os.path.join(input_path, 'irtf/M_fits_091201.tar'),
                 'L': os.path.join(input_path, 'irtf/L_fits_091201.tar'),
                 'T': os.path.join(input_path, 'irtf/T_fits_091201.tar')}

    data_folder = {'F': os.path.join(input_path, 'irtf/F_fits_091201'),
                   'G': os.path.join(input_path, 'irtf/G_fits_091201'),
                   'K': os.path.join(input_path, 'irtf/K_fits_091201'),
                   'M': os.path.join(input_path, 'irtf/M_fits_091201'),
                   'L': os.path.join(input_path, 'irtf/L_fits_091201'),
                   'T': os.path.join(input_path, 'irtf/T_fits_091201')}

    data_type = {'F': 'F stars (4.4 MB)',
                 'G': 'G stars (5.6 MB)',
                 'K': 'K stars (5.5 MB)',
                 'M': 'M stars (7.5 MB)',
                 'L': 'L dwarfs (850 kB)',
                 'T': 'T dwarfs (100 kB)'}

    url_root = 'http://irtfweb.ifa.hawaii.edu/~spex/IRTF_Spectral_Library/Data/'

    url = {'F': url_root+'F_fits_091201.tar',
           'G': url_root+'G_fits_091201.tar',
           'K': url_root+'K_fits_091201.tar',
           'M': url_root+'M_fits_091201.tar',
           'L': url_root+'L_fits_091201.tar',
           'T': url_root+'T_fits_091201.tar'}

    for item in sptypes:
        if not os.path.isfile(data_file[item]):
            print(f'Downloading IRTF Spectral Library - {data_type[item]}...', end='', flush=True)
            urllib.request.urlretrieve(url[item], data_file[item])
            print(' [DONE]')

    print('Unpacking IRTF Spectral Library...', end='', flush=True)

    for item in sptypes:
        tar = tarfile.open(data_file[item])
        tar.extractall(path=datadir)
        tar.close()

    print(' [DONE]')

    database.create_group('spectra/irtf')

    for item in sptypes:
        for root, _, files in os.walk(data_folder[item]):

            for _, filename in enumerate(files):
                if filename[-9:] != '_ext.fits':
                    fitsfile = os.path.join(root, filename)

                    spdata, header = fits.getdata(fitsfile, header=True)

                    name = header['OBJECT']
                    sptype = header['SPTYPE']

                    if name[-2:] == 'AB':
                        name = name[:-2]
                    elif name[-3:] == 'ABC':
                        name = name[:-3]

                    spt_split = sptype.split()

                    if item in ['L', 'T'] or spt_split[1][0] == 'V':
                        print_message = f'Adding IRTF Spectral Library... {name}'
                        print(f'\r{print_message:<70}', end='')

                        simbad_id = query_util.get_simbad(name)

                        if simbad_id is not None:
                            simbad_id = simbad_id.decode('utf-8')

                            dist_select = distance_data.loc[distance_data['object'] == simbad_id]

                            if not dist_select.empty:
                                distance = (dist_select['distance'], dist_select['distance_error'])
                            else:
                                simbad_id, distance = query_util.get_distance(name)

                        else:
                            distance = (np.nan, np.nan)

                        sptype = data_util.update_sptype(np.array([sptype]))[0]

                        dset = database.create_dataset(f'spectra/irtf/{name}',
                                                       data=spdata)

                        dset.attrs['name'] = str(name).encode()
                        dset.attrs['sptype'] = str(sptype).encode()
                        dset.attrs['simbad'] = str(simbad_id).encode()
                        dset.attrs['distance'] = distance[0]
                        dset.attrs['distance_error'] = distance[1]

    print_message = 'Adding IRTF Spectral Library... [DONE]'
    print(f'\r{print_message:<70}')

    database.close()
Beispiel #11
0
def add_allers2013(input_path: str, database: h5py._hl.files.File) -> None:
    """
    Function for adding the spectra of young, M- and L-type dwarfs from
    `Allers & Liu (2013) <https://ui.adsabs.harvard.edu/abs/2013ApJ...772...79A/abstract>`_  to
    the database.

    Parameters
    ----------
    input_path : str
        Path of the data folder.
    database : h5py._hl.files.File
        The HDF5 database.

    Returns
    -------
    NoneType
        None
    """

    print_text = "spectra of young M/L type objects from Allers & Liu 2013"

    data_url = "https://home.strw.leidenuniv.nl/~stolker/species/allers_liu_2013.tgz"
    data_file = os.path.join(input_path, "allers_liu_2013.tgz")
    data_folder = os.path.join(input_path, "allers+2013/")

    if not os.path.isfile(data_file):
        print(f"Downloading {print_text} (173 kB)...", end="", flush=True)
        urllib.request.urlretrieve(data_url, data_file)
        print(" [DONE]")

    if os.path.exists(data_folder):
        shutil.rmtree(data_folder)

    print(f"Unpacking {print_text} (173 kB)...", end="", flush=True)
    tar = tarfile.open(data_file)
    tar.extractall(data_folder)
    tar.close()
    print(" [DONE]")

    sources = np.genfromtxt(
        os.path.join(data_folder, "sources.csv"),
        delimiter=",",
        dtype=None,
        encoding="ASCII",
    )

    source_names = sources[:, 0]
    source_sptype = sources[:, 7]

    database.create_group("spectra/allers+2013")

    print_message = ""

    for _, _, files in os.walk(data_folder):
        for _, filename in enumerate(files):
            if filename.endswith(".fits"):
                sp_data, header = fits.getdata(os.path.join(
                    data_folder, filename),
                                               header=True)

            else:
                continue

            sp_data = np.transpose(sp_data)

            # (erg s-1 cm-2 A-1) -> (W m-2 um-1)
            sp_data[:, 1:] *= 10.0

            name = header["OBJECT"]

            index = np.argwhere(source_names == name)

            if len(index) == 0:
                sptype = None
            else:
                sptype = source_sptype[index][0][0][:2]

            empty_message = len(print_message) * " "
            print(f"\r{empty_message}", end="")

            print_message = f"Adding spectra... {name}"
            print(f"\r{print_message}", end="")

            dset = database.create_dataset(f"spectra/allers+2013/{name}",
                                           data=sp_data)

            dset.attrs["name"] = str(name).encode()
            dset.attrs["sptype"] = str(sptype).encode()

    empty_message = len(print_message) * " "
    print(f"\r{empty_message}", end="")

    print_message = "Adding spectra... [DONE]"
    print(f"\r{print_message}")

    database.close()
Beispiel #12
0
def add_optical_constants(input_path: str,
                          database: h5py._hl.files.File) -> None:
    """
    Function for adding the optical constants of crystalline and amorphous MgSiO3 and Fe to the
    database.

    Parameters
    ----------
    input_path : str
        Folder where the data is located.
    database : h5py._hl.files.File
        Database.

    Returns
    -------
    None
        NoneType
    """

    if not os.path.exists(input_path):
        os.makedirs(input_path)

    url = 'https://people.phys.ethz.ch/~stolkert/species/optical_constants.zip'

    data_file = os.path.join(input_path, 'optical_constants.zip')

    if not os.path.isfile(data_file):
        print('Downloading optical constants (87 kB)...', end='', flush=True)
        urllib.request.urlretrieve(url, data_file)
        print(' [DONE]')

    print('Unpacking optical constants...', end='', flush=True)

    with zipfile.ZipFile(data_file, 'r') as zip_ref:
        zip_ref.extractall(input_path)

    print(' [DONE]')

    print('Adding optical constants of MgSiO3...', end='')

    nk_file = os.path.join(
        input_path, 'optical_constants/mgsio3/crystalline/'
        'mgsio3_jaeger_98_scott_96_axis1.dat')

    data = np.loadtxt(nk_file)
    database.create_dataset('dust/mgsio3/crystalline/axis_1/', data=data)

    nk_file = os.path.join(
        input_path, 'optical_constants/mgsio3/crystalline/'
        'mgsio3_jaeger_98_scott_96_axis2.dat')

    data = np.loadtxt(nk_file)
    database.create_dataset('dust/mgsio3/crystalline/axis_2/', data=data)

    nk_file = os.path.join(
        input_path, 'optical_constants/mgsio3/crystalline/'
        'mgsio3_jaeger_98_scott_96_axis3.dat')

    data = np.loadtxt(nk_file)
    database.create_dataset('dust/mgsio3/crystalline/axis_3/', data=data)

    nk_file = os.path.join(
        input_path, 'optical_constants/mgsio3/amorphous/'
        'mgsio3_jaeger_2003_reformat.dat')

    data = np.loadtxt(nk_file)
    database.create_dataset('dust/mgsio3/amorphous', data=data)

    print(' [DONE]')

    print('Adding optical constants of Fe...', end='')

    nk_file = os.path.join(
        input_path, 'optical_constants/fe/crystalline/fe_henning_1996.dat')
    data = np.loadtxt(nk_file)
    database.create_dataset('dust/fe/crystalline', data=data)

    nk_file = os.path.join(
        input_path, 'optical_constants/fe/amorphous/fe_pollack_1994.dat')
    data = np.loadtxt(nk_file)
    database.create_dataset('dust/fe/amorphous', data=data)

    print(' [DONE]')
Beispiel #13
0
def add_cross_sections(input_path: str, database: h5py._hl.files.File) -> None:
    """
    Function for adding the extinction cross section of crystalline MgSiO3 for a log-normal and
    power-law size distribution to the database.

    Parameters
    ----------
    input_path : str
        Folder where the data is located.
    database : h5py._hl.files.File
        Database.

    Returns
    -------
    None
        NoneType
    """

    if not os.path.exists(input_path):
        os.makedirs(input_path)

    url = 'https://people.phys.ethz.ch/~stolkert/species/lognorm_mgsio3_c_ext.fits'

    data_file = os.path.join(input_path, 'lognorm_mgsio3_c_ext.fits')

    print('Downloading log-normal dust cross sections (231 kB)...',
          end='',
          flush=True)
    urllib.request.urlretrieve(url, data_file)
    print(' [DONE]')

    print('Adding log-normal dust cross sections...', end='')

    with fits.open(os.path.join(input_path,
                                'lognorm_mgsio3_c_ext.fits')) as hdu_list:
        database.create_dataset(
            'dust/lognorm/mgsio3/crystalline/cross_section/',
            data=hdu_list[0].data)

        database.create_dataset('dust/lognorm/mgsio3/crystalline/wavelength/',
                                data=hdu_list[1].data)

        database.create_dataset('dust/lognorm/mgsio3/crystalline/radius_g/',
                                data=hdu_list[2].data)

        database.create_dataset('dust/lognorm/mgsio3/crystalline/sigma_g/',
                                data=hdu_list[3].data)

    print(' [DONE]')

    url = 'https://people.phys.ethz.ch/~stolkert/species/powerlaw_mgsio3_c_ext.fits'

    data_file = os.path.join(input_path, 'powerlaw_mgsio3_c_ext.fits')

    print('Downloading power-law dust cross sections (231 kB)...',
          end='',
          flush=True)
    urllib.request.urlretrieve(url, data_file)
    print(' [DONE]')

    print('Adding power-law dust cross sections...', end='')

    with fits.open(os.path.join(input_path,
                                'powerlaw_mgsio3_c_ext.fits')) as hdu_list:
        database.create_dataset(
            'dust/powerlaw/mgsio3/crystalline/cross_section/',
            data=hdu_list[0].data)

        database.create_dataset('dust/powerlaw/mgsio3/crystalline/wavelength/',
                                data=hdu_list[1].data)

        database.create_dataset('dust/powerlaw/mgsio3/crystalline/radius_max/',
                                data=hdu_list[2].data)

        database.create_dataset('dust/powerlaw/mgsio3/crystalline/exponent/',
                                data=hdu_list[3].data)

    print(' [DONE]')
Beispiel #14
0
def add_irtf(input_path: str,
             database: h5py._hl.files.File,
             sptypes: Optional[List[str]] = None) -> None:
    """
    Function for adding the IRTF Spectral Library to the database.

    Parameters
    ----------
    input_path : str
        Path of the data folder.
    database : h5py._hl.files.File
        Database.
    sptypes : list(str), None
        List with the spectral types ('F', 'G', 'K', 'M', 'L', 'T'). All spectral types are
        included if set to ``None``.

    Returns
    -------
    NoneType
        None
    """

    if sptypes is None:
        sptypes = ["F", "G", "K", "M", "L", "T"]

    parallax_url = "https://home.strw.leidenuniv.nl/~stolker/species/parallax.dat"
    parallax_file = os.path.join(input_path, "parallax.dat")

    if not os.path.isfile(parallax_file):
        urllib.request.urlretrieve(parallax_url, parallax_file)

    parallax_data = pd.pandas.read_csv(
        parallax_file,
        usecols=[0, 1, 2],
        names=["object", "parallax", "parallax_error"],
        delimiter=",",
        dtype={
            "object": str,
            "parallax": float,
            "parallax_error": float
        },
    )

    datadir = os.path.join(input_path, "irtf")

    if not os.path.exists(datadir):
        os.makedirs(datadir)

    data_file = {
        "F": os.path.join(input_path, "irtf/F_fits_091201.tar"),
        "G": os.path.join(input_path, "irtf/G_fits_091201.tar"),
        "K": os.path.join(input_path, "irtf/K_fits_091201.tar"),
        "M": os.path.join(input_path, "irtf/M_fits_091201.tar"),
        "L": os.path.join(input_path, "irtf/L_fits_091201.tar"),
        "T": os.path.join(input_path, "irtf/T_fits_091201.tar"),
    }

    data_folder = {
        "F": os.path.join(input_path, "irtf/F_fits_091201"),
        "G": os.path.join(input_path, "irtf/G_fits_091201"),
        "K": os.path.join(input_path, "irtf/K_fits_091201"),
        "M": os.path.join(input_path, "irtf/M_fits_091201"),
        "L": os.path.join(input_path, "irtf/L_fits_091201"),
        "T": os.path.join(input_path, "irtf/T_fits_091201"),
    }

    data_type = {
        "F": "F stars (4.4 MB)",
        "G": "G stars (5.6 MB)",
        "K": "K stars (5.5 MB)",
        "M": "M stars (7.5 MB)",
        "L": "L dwarfs (850 kB)",
        "T": "T dwarfs (100 kB)",
    }

    url_root = "http://irtfweb.ifa.hawaii.edu/~spex/IRTF_Spectral_Library/Data/"

    url = {
        "F": url_root + "F_fits_091201.tar",
        "G": url_root + "G_fits_091201.tar",
        "K": url_root + "K_fits_091201.tar",
        "M": url_root + "M_fits_091201.tar",
        "L": url_root + "L_fits_091201.tar",
        "T": url_root + "T_fits_091201.tar",
    }

    for item in sptypes:
        if not os.path.isfile(data_file[item]):
            print(
                f"Downloading IRTF Spectral Library - {data_type[item]}...",
                end="",
                flush=True,
            )
            urllib.request.urlretrieve(url[item], data_file[item])
            print(" [DONE]")

    print("Unpacking IRTF Spectral Library...", end="", flush=True)

    for item in sptypes:
        tar = tarfile.open(data_file[item])
        tar.extractall(path=datadir)
        tar.close()

    print(" [DONE]")

    database.create_group("spectra/irtf")

    print_message = ""

    for item in sptypes:
        for root, _, files in os.walk(data_folder[item]):

            for _, filename in enumerate(files):
                if filename[-9:] != "_ext.fits":
                    fitsfile = os.path.join(root, filename)

                    spdata, header = fits.getdata(fitsfile, header=True)
                    spdata = np.transpose(spdata)

                    name = header["OBJECT"]
                    sptype = header["SPTYPE"]

                    if name[-2:] == "AB":
                        name = name[:-2]
                    elif name[-3:] == "ABC":
                        name = name[:-3]

                    spt_split = sptype.split()

                    if item in ["L", "T"] or spt_split[1][0] == "V":
                        empty_message = len(print_message) * " "
                        print(f"\r{empty_message}", end="")

                        print_message = f"Adding spectra... {name}"
                        print(f"\r{print_message}", end="")

                        simbad_id = query_util.get_simbad(name)

                        if simbad_id is not None:
                            # For backward compatibility
                            if not isinstance(simbad_id, str):
                                simbad_id = simbad_id.decode("utf-8")

                            par_select = parallax_data[parallax_data["object"]
                                                       == simbad_id]

                            if not par_select.empty:
                                parallax = (
                                    par_select["parallax"],
                                    par_select["parallax_error"],
                                )
                            else:
                                simbad_id, parallax = query_util.get_parallax(
                                    name)

                        else:
                            parallax = (np.nan, np.nan)

                        sptype = data_util.update_sptype(np.array([sptype]))[0]

                        dset = database.create_dataset(f"spectra/irtf/{name}",
                                                       data=spdata)

                        dset.attrs["name"] = str(name).encode()
                        dset.attrs["sptype"] = str(sptype).encode()
                        dset.attrs["simbad"] = str(simbad_id).encode()
                        dset.attrs["parallax"] = parallax[0]
                        dset.attrs["parallax_error"] = parallax[1]

    empty_message = len(print_message) * " "
    print(f"\r{empty_message}", end="")

    print_message = "Adding spectra... [DONE]"
    print(f"\r{print_message}")

    database.close()