def load_datasets(self, hdf_file: h5py._hl.files.File) -> None: """ Creates a dictionary of HDF datasets (`self.ds`) which have been previously created (for restart jobs only). Args: ---- hdf_file (h5py._hl.files.File) : HDF file containing all the datasets. """ self.ds = {} # initialize dictionary of datasets # use the names of the datasets as the keys in `self.ds` for ds_name in self.dataset_names: self.ds[ds_name] = hdf_file.get(ds_name)
def create_datasets(self, hdf_file: h5py._hl.files.File) -> None: """ Creates a dictionary of HDF5 datasets (`self.ds`). Args: ---- hdf_file (h5py._hl.files.File) : HDF5 file which will contain datasets. """ self.ds = {} # initialize for ds_name in self.dataset_names: self.ds[ds_name] = hdf_file.create_dataset( ds_name, (self.total_n_subgraphs, *self.dims[ds_name]), chunks=True, # must be True for resizing later dtype=np.dtype("int8"))
def write_data( model: str, parameters: List[str], database: h5py._hl.files.File, data_sorted: List[np.ndarray], ) -> None: """ Function for writing the model spectra and parameters to the database. Parameters ---------- model : str Atmosphere model. parameters : list(str) Model parameters. database: h5py._hl.files.File Database. data_sorted : list(np.ndarray) Sorted model data with the parameter values, wavelength points (um), and flux densities (W m-2 um-1). Returns ------- NoneType None """ n_param = len(parameters) if f"models/{model}" in database: del database[f"models/{model}"] dset = database.create_group(f"models/{model}") dset.attrs["n_param"] = n_param for i, item in enumerate(parameters): dset.attrs[f"parameter{i}"] = item database.create_dataset(f"models/{model}/{item}", data=data_sorted[i]) database.create_dataset(f"models/{model}/wavelength", data=data_sorted[n_param]) database.create_dataset(f"models/{model}/flux", data=data_sorted[n_param + 1])
def add_missing(model: str, parameters: List[str], database: h5py._hl.files.File) -> None: """ Function for adding missing grid points with a linear interpolation. Parameters ---------- model : str Atmosphere model. parameters : list(str) Model parameters. database : h5py._hl.files.File Database. Returns ------- NoneType None """ print("Number of grid points per parameter:") grid_shape = [] param_data = [] for i, item in enumerate(parameters): grid_shape.append(database[f"models/{model}/{item}"].shape[0]) param_data.append(np.asarray(database[f"models/{model}/{item}"])) print(f" - {item}: {grid_shape[i]}") flux = np.asarray(database[f"models/{model}/flux"]) # (W m-1 um-1) flux = np.log10(flux) count_total = 0 count_interp = 0 count_missing = 0 if len(parameters) == 1: # Blackbody spectra pass elif len(parameters) == 2: find_missing = np.zeros(grid_shape, dtype=bool) values = [] points = [[], []] new_points = [[], []] print("Fix missing grid points with a linear interpolation:") for i in range(grid_shape[0]): for j in range(grid_shape[1]): if np.isinf(np.sum(flux[i, j, ...])): print(" - ", end="") print(f"{parameters[0]} = {param_data[0][i]}, ", end="") print(f"{parameters[1]} = {param_data[1][j]}") if 0 < i < grid_shape[0] - 1: check_low = np.isinf(np.sum(flux[i - 1, j, ...])) check_up = np.isinf(np.sum(flux[i + 1, j, ...])) # Linear scaling of the intermediate Teff point scaling = (param_data[0][i] - param_data[0][i - 1]) / ( param_data[0][i + 1] - param_data[0][i - 1]) if not check_low and not check_up: flux_low = flux[i - 1, j, ...] flux_up = flux[i + 1, j, ...] flux[i, j, ...] = (flux_low * (1.0 - scaling) + flux_up * scaling) count_interp += 1 else: find_missing[i, j] = True else: find_missing[i, j] = True else: points[0].append(param_data[0][i]) points[1].append(param_data[1][j]) values.append(flux[i, j, ...]) new_points[0].append(param_data[0][i]) new_points[1].append(param_data[1][j]) count_total += 1 values = np.asarray(values) points = np.asarray(points) new_points = np.asarray(new_points) if np.sum(find_missing) > 0: flux_int = griddata(points.T, values, new_points.T, method="linear", fill_value=np.nan) count = 0 for i in range(grid_shape[0]): for j in range(grid_shape[1]): if np.isnan(np.sum(flux_int[count, :])): count_missing += 1 elif np.isinf(np.sum(flux[i, j, ...])): flux[i, j, :] = flux_int[count, :] count_interp += 1 count += 1 if count_missing > 0: print( f"Could not interpolate {count_missing} grid points so storing " f"zeros instead. [WARNING]\nThe grid points that are missing:" ) for i in range(flux_int.shape[0]): if np.isnan(np.sum(flux_int[i, :])): print(" - ", end="") print(f"{parameters[0]} = {new_points[0][i]}, ", end="") print(f"{parameters[1]} = {new_points[1][i]}") elif len(parameters) == 3: find_missing = np.zeros(grid_shape, dtype=bool) values = [] points = [[], [], []] new_points = [[], [], []] print("Fix missing grid points with a linear interpolation:") for i in range(grid_shape[0]): for j in range(grid_shape[1]): for k in range(grid_shape[2]): if np.isinf(np.sum(flux[i, j, k, ...])): print(" - ", end="") print(f"{parameters[0]} = {param_data[0][i]}, ", end="") print(f"{parameters[1]} = {param_data[1][j]}, ", end="") print(f"{parameters[2]} = {param_data[2][k]}") if 0 < i < grid_shape[0] - 1: check_low = np.isinf(np.sum(flux[i - 1, j, k, ...])) check_up = np.isinf(np.sum(flux[i + 1, j, k, ...])) # Linear scaling of the intermediate Teff point scaling = (param_data[0][i] - param_data[0][i - 1] ) / (param_data[0][i + 1] - param_data[0][i - 1]) if not check_low and not check_up: flux_low = flux[i - 1, j, k, ...] flux_up = flux[i + 1, j, k, ...] flux[i, j, k, ...] = (flux_low * (1.0 - scaling) + flux_up * scaling) count_interp += 1 else: find_missing[i, j, k] = True else: find_missing[i, j, k] = True else: points[0].append(param_data[0][i]) points[1].append(param_data[1][j]) points[2].append(param_data[2][k]) values.append(flux[i, j, k, ...]) new_points[0].append(param_data[0][i]) new_points[1].append(param_data[1][j]) new_points[2].append(param_data[2][k]) count_total += 1 values = np.asarray(values) points = np.asarray(points) new_points = np.asarray(new_points) if np.sum(find_missing) > 0: flux_int = griddata(points.T, values, new_points.T, method="linear", fill_value=np.nan) count = 0 for i in range(grid_shape[0]): for j in range(grid_shape[1]): for k in range(grid_shape[2]): if np.isnan(np.sum(flux_int[count, :])): count_missing += 1 elif np.isinf(np.sum(flux[i, j, k, ...])): flux[i, j, k, :] = flux_int[count, :] count_interp += 1 count += 1 if count_missing > 0: print( f"Could not interpolate {count_missing} grid points so storing " f"zeros instead. [WARNING]\nThe grid points that are missing:" ) for i in range(flux_int.shape[0]): if np.isnan(np.sum(flux_int[i, :])): print(" - ", end="") print(f"{parameters[0]} = {new_points[0][i]}, ", end="") print(f"{parameters[1]} = {new_points[1][i]}, ", end="") print(f"{parameters[2]} = {new_points[2][i]}") elif len(parameters) == 4: find_missing = np.zeros(grid_shape, dtype=bool) values = [] points = [[], [], [], []] new_points = [[], [], [], []] print("Fix missing grid points with a linear interpolation:") for i in range(grid_shape[0]): for j in range(grid_shape[1]): for k in range(grid_shape[2]): for m in range(grid_shape[3]): if np.isinf(np.sum(flux[i, j, k, m, ...])): print(" - ", end="") print(f"{parameters[0]} = {param_data[0][i]}, ", end="") print(f"{parameters[1]} = {param_data[1][j]}, ", end="") print(f"{parameters[2]} = {param_data[2][k]}, ", end="") print(f"{parameters[3]} = {param_data[3][m]}") if 0 < i < grid_shape[0] - 1: check_low = np.isinf( np.sum(flux[i - 1, j, k, m, ...])) check_up = np.isinf( np.sum(flux[i + 1, j, k, m, ...])) # Linear scaling of the intermediate Teff point scaling = (param_data[0][i] - param_data[0][i - 1]) / ( param_data[0][i + 1] - param_data[0][i - 1]) if not check_low and not check_up: flux_low = flux[i - 1, j, k, m, ...] flux_up = flux[i + 1, j, k, m, ...] flux[i, j, k, m, ...] = (flux_low * (1.0 - scaling) + flux_up * scaling) count_interp += 1 else: find_missing[i, j, k, m] = True else: find_missing[i, j, k, m] = True else: points[0].append(param_data[0][i]) points[1].append(param_data[1][j]) points[2].append(param_data[2][k]) points[3].append(param_data[3][m]) values.append(flux[i, j, k, m, ...]) new_points[0].append(param_data[0][i]) new_points[1].append(param_data[1][j]) new_points[2].append(param_data[2][k]) new_points[3].append(param_data[3][m]) count_total += 1 values = np.asarray(values) points = np.asarray(points) new_points = np.asarray(new_points) if np.sum(find_missing) > 0: flux_int = griddata(points.T, values, new_points.T, method="linear", fill_value=np.nan) count = 0 for i in range(grid_shape[0]): for j in range(grid_shape[1]): for k in range(grid_shape[2]): for m in range(grid_shape[3]): if np.isnan(np.sum(flux_int[count, :])): count_missing += 1 elif np.isinf(np.sum(flux[i, j, k, m, ...])): flux[i, j, k, m, :] = flux_int[count, :] count_interp += 1 count += 1 if count_missing > 0: print( f"Could not interpolate {count_missing} grid points so storing " f"zeros instead. [WARNING]\nThe grid points that are missing:" ) for i in range(flux_int.shape[0]): if np.isnan(np.sum(flux_int[i, :])): print(" - ", end="") print(f"{parameters[0]} = {new_points[0][i]}, ", end="") print(f"{parameters[1]} = {new_points[1][i]}, ", end="") print(f"{parameters[2]} = {new_points[2][i]}, ", end="") print(f"{parameters[3]} = {new_points[3][i]}") # ran_par_0 = np.random.randint(grid_shape[0], size=1000) # ran_par_1 = np.random.randint(grid_shape[1], size=1000) # ran_par_2 = np.random.randint(grid_shape[2], size=1000) # ran_par_3 = np.random.randint(grid_shape[3], size=1000) # # for z in range(ran_par_0.shape[0]): # i = ran_par_0[z] # j = ran_par_1[z] # k = ran_par_2[z] # m = ran_par_3[z] # # if 0 < i < grid_shape[0]-1: # check_low = np.isinf(np.sum(flux[i-1, j, k, m, ...])) # check_up = np.isinf(np.sum(flux[i+1, j, k, m, ...])) # # # Linear scaling of the intermediate Teff point # scaling = (param_data[0][i] - param_data[0][i-1]) / \ # (param_data[0][i+1] - param_data[0][i-1]) # # if not check_low and not check_up: # flux_low = flux[i-1, j, k, m, ...] # flux_up = flux[i+1, j, k, m, ...] # flux[i, j, k, m, ...] = flux_low*(1.-scaling) + flux_up*scaling elif len(parameters) == 5: find_missing = np.zeros(grid_shape, dtype=bool) values = [] points = [[], [], [], [], []] new_points = [[], [], [], [], []] print("Fix missing grid points with a linear interpolation:") for i in range(grid_shape[0]): for j in range(grid_shape[1]): for k in range(grid_shape[2]): for m in range(grid_shape[3]): for n in range(grid_shape[4]): if np.isinf(np.sum(flux[i, j, k, m, n, ...])): print(" - ", end="") print( f"{parameters[0]} = {param_data[0][i]}, ", end="") print( f"{parameters[1]} = {param_data[1][j]}, ", end="") print( f"{parameters[2]} = {param_data[2][k]}, ", end="") print( f"{parameters[3]} = {param_data[3][m]}, ", end="") print(f"{parameters[4]} = {param_data[4][n]}") if 0 < i < grid_shape[0] - 1: check_low = np.isinf( np.sum(flux[i - 1, j, k, m, n, ...])) check_up = np.isinf( np.sum(flux[i + 1, j, k, m, n, ...])) # Linear scaling of the intermediate Teff point scaling = (param_data[0][i] - param_data[0][i - 1]) / ( param_data[0][i + 1] - param_data[0][i - 1]) if not check_low and not check_up: flux_low = flux[i - 1, j, k, m, n, ...] flux_up = flux[i + 1, j, k, m, n, ...] flux[i, j, k, m, n, ...] = (flux_low * (1.0 - scaling) + flux_up * scaling) count_interp += 1 else: find_missing[i, j, k, m, n] = True else: find_missing[i, j, k, m, n] = True else: points[0].append(param_data[0][i]) points[1].append(param_data[1][j]) points[2].append(param_data[2][k]) points[3].append(param_data[3][m]) points[4].append(param_data[4][n]) values.append(flux[i, j, k, m, n, ...]) new_points[0].append(param_data[0][i]) new_points[1].append(param_data[1][j]) new_points[2].append(param_data[2][k]) new_points[3].append(param_data[3][m]) new_points[4].append(param_data[4][n]) count_total += 1 values = np.asarray(values) points = np.asarray(points) new_points = np.asarray(new_points) if np.sum(find_missing) > 0: flux_int = griddata(points.T, values, new_points.T, method="linear", fill_value=np.nan) count = 0 for i in range(grid_shape[0]): for j in range(grid_shape[1]): for k in range(grid_shape[2]): for m in range(grid_shape[3]): for n in range(grid_shape[4]): if np.isnan(np.sum(flux_int[count, :])): count_missing += 1 elif np.isinf(np.sum(flux[i, j, k, m, n, ...])): flux[i, j, k, m, n, :] = flux_int[count, :] count_interp += 1 count += 1 if count_missing > 0: print( f"Could not interpolate {count_missing} grid points so storing" f"zeros instead. [WARNING]\nThe grid points that are missing:" ) for i in range(flux_int.shape[0]): if np.isnan(np.sum(flux_int[i, :])): print(" - ", end="") print(f"{parameters[0]} = {new_points[0][i]}, ", end="") print(f"{parameters[1]} = {new_points[1][i]}, ", end="") print(f"{parameters[2]} = {new_points[2][i]}, ", end="") print(f"{parameters[3]} = {new_points[3][i]}, ", end="") print(f"{parameters[4]} = {new_points[4][i]}") else: raise ValueError( "The add_missing function is currently not compatible " "with more than 5 model parameters.") print(f"Number of stored grid points: {count_total}") print(f"Number of interpolated grid points: {count_interp}") print(f"Number of missing grid points: {count_missing}") del database[f"models/{model}/flux"] database.create_dataset(f"models/{model}/flux", data=10.0**flux)
def add_bonnefoy2014(input_path: str, database: h5py._hl.files.File) -> None: """ Function for adding the SINFONI spectra of young, M- and L-type dwarfs from `Bonnefoy et al. (2014) <https://ui.adsabs.harvard.edu/abs/2014A%26A...562A.127B/abstract>`_ to the database. Parameters ---------- input_path : str Path of the data folder. database : h5py._hl.files.File The HDF5 database. Returns ------- NoneType None """ print_text = "spectra of young M/L type objects from Bonnefoy et al. 2014" data_url = "http://cdsarc.u-strasbg.fr/viz-bin/nph-Cat/tar.gz?J/A+A/562/A127/" data_file = os.path.join(input_path, "J_A+A_562_A127.tar.gz") data_folder = os.path.join(input_path, "bonnefoy+2014/") if not os.path.isfile(data_file): print(f"Downloading {print_text} (2.3 MB)...", end="", flush=True) urllib.request.urlretrieve(data_url, data_file) print(" [DONE]") if os.path.exists(data_folder): shutil.rmtree(data_folder) print(f"Unpacking {print_text} (2.3 MB)...", end="", flush=True) tar = tarfile.open(data_file) tar.extractall(data_folder) tar.close() print(" [DONE]") spec_dict = {} with gzip.open(os.path.join(data_folder, "stars.dat.gz"), "r") as gzip_file: for line in gzip_file: name = line[:13].decode().strip() files = line[80:].decode().strip().split() sptype = line[49:56].decode().strip() if name == "NAME 2M1207A": name = "2M1207A" if len(sptype) == 0: sptype = None elif "." in sptype: sptype = sptype[:4] else: sptype = sptype[:2] if name == "Cha1109": sptype = "M9" elif name == "DH Tau B": sptype = "M9" elif name == "TWA 22A": sptype = "M6" elif name == "TWA 22B": sptype = "M6" elif name == "CT Cha b": sptype = "M9" spec_dict[name] = {"name": name, "sptype": sptype, "files": files} database.create_group("spectra/bonnefoy+2014") fits_folder = os.path.join(data_folder, "sp") print_message = "" for _, _, files in os.walk(fits_folder): for _, filename in enumerate(files): fname_split = filename.split("_") data = fits.getdata(os.path.join(fits_folder, filename)) for name, value in spec_dict.items(): if filename in value["files"]: if name == "TWA 22AB": # Binary spectrum continue if "JHK.fits" in fname_split: spec_dict[name]["JHK"] = data elif "J" in fname_split: spec_dict[name]["J"] = data elif "H+K" in fname_split or "HK" in fname_split: spec_dict[name]["HK"] = data for name, value in spec_dict.items(): empty_message = len(print_message) * " " print(f"\r{empty_message}", end="") print_message = f"Adding spectra... {name}" print(f"\r{print_message}", end="") if "JHK" in value: sp_data = value["JHK"] elif "J" in value and "HK" in value: sp_data = np.vstack((value["J"], value["HK"])) else: continue dset = database.create_dataset(f"spectra/bonnefoy+2014/{name}", data=sp_data) dset.attrs["name"] = str(name).encode() dset.attrs["sptype"] = str(value["sptype"]).encode() empty_message = len(print_message) * " " print(f"\r{empty_message}", end="") print_message = "Adding spectra... [DONE]" print(f"\r{print_message}") database.close()
def add_spex(input_path: str, database: h5py._hl.files.File) -> None: """ Function for adding the SpeX Prism Spectral Library to the database. Parameters ---------- input_path : str Path of the data folder. database : h5py._hl.files.File The HDF5 database. Returns ------- NoneType None """ parallax_url = "https://home.strw.leidenuniv.nl/~stolker/species/parallax.dat" parallax_file = os.path.join(input_path, "parallax.dat") if not os.path.isfile(parallax_file): urllib.request.urlretrieve(parallax_url, parallax_file) parallax_data = pd.pandas.read_csv( parallax_file, usecols=[0, 1, 2], names=["object", "parallax", "parallax_error"], delimiter=",", dtype={"object": str, "parallax": float, "parallax_error": float}, ) database.create_group("spectra/spex") data_path = os.path.join(input_path, "spex") if not os.path.exists(data_path): os.makedirs(data_path) url_all = "http://svo2.cab.inta-csic.es/vocats/v2/spex/cs.php?" \ "RA=180.000000&DEC=0.000000&SR=180.000000&VERB=2" xml_file_spex = os.path.join(data_path, "spex.xml") if not os.path.isfile(xml_file_spex): urllib.request.urlretrieve(url_all, xml_file_spex) table = parse_single_table(xml_file_spex) # name = table.array['name'] twomass = table.array["name2m"] url = table.array["access_url"] unique_id = [] print_message = "" for i, item in enumerate(url): if twomass[i] not in unique_id: if isinstance(twomass[i], str): xml_file_1 = os.path.join(data_path, twomass[i] + ".xml") else: # Use decode for backward compatibility xml_file_1 = os.path.join( data_path, twomass[i].decode("utf-8") + ".xml" ) if not os.path.isfile(xml_file_1): if isinstance(item, str): urllib.request.urlretrieve(item, xml_file_1) else: urllib.request.urlretrieve(item.decode("utf-8"), xml_file_1) table = parse_single_table(xml_file_1) name = table.array["ID"] url = table.array["access_url"] if isinstance(name[0], str): name = name[0] else: name = name[0].decode("utf-8") empty_message = len(print_message) * " " print(f"\r{empty_message}", end="") print_message = f"Downloading SpeX Prism Spectral Library... {name}" print(f"\r{print_message}", end="") xml_file_2 = os.path.join(data_path, f"spex_{name}.xml") if not os.path.isfile(xml_file_2): if isinstance(url[0], str): urllib.request.urlretrieve(url[0], xml_file_2) else: urllib.request.urlretrieve(url[0].decode("utf-8"), xml_file_2) unique_id.append(twomass[i]) empty_message = len(print_message) * " " print(f"\r{empty_message}", end="") print_message = "Downloading SpeX Prism Spectral Library... [DONE]" print(f"\r{print_message}") h_twomass = photometry.SyntheticPhotometry("2MASS/2MASS.H") # 2MASS H band zero point for 0 mag (Cogen et al. 2003) h_zp = 1.133e-9 # (W m-2 um-1) for votable in os.listdir(data_path): if votable.startswith("spex_") and votable.endswith(".xml"): xml_file = os.path.join(data_path, votable) table = parse_single_table(xml_file) wavelength = table.array["wavelength"] # (Angstrom) flux = table.array["flux"] # Normalized units wavelength = np.array(wavelength * 1e-4) # (um) flux = np.array(flux) # (a.u.) error = np.full(flux.size, np.nan) # 2MASS magnitudes j_mag = table.get_field_by_id("jmag").value h_mag = table.get_field_by_id("hmag").value ks_mag = table.get_field_by_id("ksmag").value if not isinstance(j_mag, str): j_mag = j_mag.decode("utf-8") if not isinstance(h_mag, str): h_mag = h_mag.decode("utf-8") if not isinstance(ks_mag, str): ks_mag = ks_mag.decode("utf-8") if j_mag == "": j_mag = np.nan else: j_mag = float(j_mag) if h_mag == "": h_mag = np.nan else: h_mag = float(h_mag) if ks_mag == "": ks_mag = np.nan else: ks_mag = float(ks_mag) name = table.get_field_by_id("name").value if not isinstance(name, str): name = name.decode("utf-8") twomass_id = table.get_field_by_id("name2m").value if not isinstance(twomass_id, str): twomass_id = twomass_id.decode("utf-8") # Optical spectral type try: sptype_opt = table.get_field_by_id("optspty").value if not isinstance(sptype_opt, str): sptype_opt = sptype_opt.decode("utf-8") sptype_opt = data_util.update_sptype(np.array([sptype_opt]))[0] except KeyError: sptype_opt = None # Near-infrared spectral type try: sptype_nir = table.get_field_by_id("nirspty").value if not isinstance(sptype_nir, str): sptype_nir = sptype_nir.decode("utf-8") sptype_nir = data_util.update_sptype(np.array([sptype_nir]))[0] except KeyError: sptype_nir = None h_flux, _ = h_twomass.magnitude_to_flux(h_mag, error=None, zp_flux=h_zp) phot = h_twomass.spectrum_to_flux(wavelength, flux) # Normalized units flux *= h_flux / phot[0] # (W m-2 um-1) spdata = np.column_stack([wavelength, flux, error]) simbad_id = query_util.get_simbad(f"2MASS {twomass_id}") if simbad_id is not None: if not isinstance(simbad_id, str): simbad_id = simbad_id.decode("utf-8") par_select = parallax_data[parallax_data["object"] == simbad_id] if not par_select.empty: parallax = ( par_select["parallax"].values[0], par_select["parallax_error"].values[0], ) else: parallax = (np.nan, np.nan) else: parallax = (np.nan, np.nan) print_message = f"Adding spectra... {name}" print(f"\r{print_message:<72}", end="") dset = database.create_dataset(f"spectra/spex/{name}", data=spdata) dset.attrs["name"] = str(name).encode() if sptype_opt is not None: dset.attrs["sptype"] = str(sptype_opt).encode() elif sptype_nir is not None: dset.attrs["sptype"] = str(sptype_nir).encode() else: dset.attrs["sptype"] = str("None").encode() dset.attrs["simbad"] = str(simbad_id).encode() dset.attrs["2MASS/2MASS.J"] = j_mag dset.attrs["2MASS/2MASS.H"] = h_mag dset.attrs["2MASS/2MASS.Ks"] = ks_mag dset.attrs["parallax"] = parallax[0] # (mas) dset.attrs["parallax_error"] = parallax[1] # (mas) print_message = "Adding spectra... [DONE]" print(f"\r{print_message:<72}") database.close()
def add_kesseli2017(input_path: str, database: h5py._hl.files.File) -> None: """ Function for adding the SDSS stellar spectra from Kesseli et al. (2017) to the database. Parameters ---------- input_path : str Path of the data folder. database : h5py._hl.files.File The HDF5 database. Returns ------- NoneType None """ data_url = "https://cdsarc.unistra.fr/viz-bin/nph-Cat/tar.gz?J/ApJS/230/16" data_file = os.path.join(input_path, "J_ApJS_230_16.tar.gz") data_folder = os.path.join(input_path, "kesseli+2017/") if not os.path.isfile(data_file): print( "Downloading SDSS spectra from Kesseli et al. 2017 (145 MB)...", end="", flush=True, ) urllib.request.urlretrieve(data_url, data_file) print(" [DONE]") if os.path.exists(data_folder): shutil.rmtree(data_folder) print( "Unpacking SDSS spectra from Kesseli et al. 2017 (145 MB)...", end="", flush=True, ) tar = tarfile.open(data_file) tar.extractall(data_folder) tar.close() print(" [DONE]") database.create_group("spectra/kesseli+2017") fits_folder = os.path.join(data_folder, "fits") print_message = "" for _, _, files in os.walk(fits_folder): for _, filename in enumerate(files): with fits.open(os.path.join(fits_folder, filename)) as hdu_list: data = hdu_list[1].data wavelength = 1e-4 * 10.0**data["LogLam"] # (um) flux = data["Flux"] # Normalized units error = data["PropErr"] # Normalized units name = filename[:-5].replace("_", " ") file_split = filename.split("_") file_split = file_split[0].split(".") sptype = file_split[0] spdata = np.column_stack([wavelength, flux, error]) empty_message = len(print_message) * " " print(f"\r{empty_message}", end="") print_message = f"Adding spectra... {name}" print(f"\r{print_message}", end="") dset = database.create_dataset(f"spectra/kesseli+2017/{name}", data=spdata) dset.attrs["name"] = str(name).encode() dset.attrs["sptype"] = str(sptype).encode() empty_message = len(print_message) * " " print(f"\r{empty_message}", end="") print_message = "Adding spectra... [DONE]" print(f"\r{print_message}") database.close()
def add_optical_constants(input_path: str, database: h5py._hl.files.File) -> None: """ Function for adding the optical constants of crystalline and amorphous MgSiO3 and Fe to the database. Parameters ---------- input_path : str Folder where the data is located. database : h5py._hl.files.File Database. Returns ------- None NoneType """ if not os.path.exists(input_path): os.makedirs(input_path) url = "https://home.strw.leidenuniv.nl/~stolker/species/optical_constants.zip" data_file = os.path.join(input_path, "optical_constants.zip") if not os.path.isfile(data_file): print("Downloading optical constants (87 kB)...", end="", flush=True) urllib.request.urlretrieve(url, data_file) print(" [DONE]") print("Unpacking optical constants...", end="", flush=True) with zipfile.ZipFile(data_file, "r") as zip_ref: zip_ref.extractall(input_path) print(" [DONE]") print("Adding optical constants of MgSiO3...", end="") nk_file = os.path.join( input_path, "optical_constants/mgsio3/crystalline/" "mgsio3_jaeger_98_scott_96_axis1.dat", ) data = np.loadtxt(nk_file) database.create_dataset("dust/mgsio3/crystalline/axis_1", data=data) nk_file = os.path.join( input_path, "optical_constants/mgsio3/crystalline/" "mgsio3_jaeger_98_scott_96_axis2.dat", ) data = np.loadtxt(nk_file) database.create_dataset("dust/mgsio3/crystalline/axis_2", data=data) nk_file = os.path.join( input_path, "optical_constants/mgsio3/crystalline/" "mgsio3_jaeger_98_scott_96_axis3.dat", ) data = np.loadtxt(nk_file) database.create_dataset("dust/mgsio3/crystalline/axis_3", data=data) nk_file = os.path.join( input_path, "optical_constants/mgsio3/amorphous/" "mgsio3_jaeger_2003_reformat.dat", ) data = np.loadtxt(nk_file) database.create_dataset("dust/mgsio3/amorphous", data=data) print(" [DONE]") print("Adding optical constants of Fe...", end="") nk_file = os.path.join( input_path, "optical_constants/fe/crystalline/fe_henning_1996.dat") data = np.loadtxt(nk_file) database.create_dataset("dust/fe/crystalline", data=data) nk_file = os.path.join( input_path, "optical_constants/fe/amorphous/fe_pollack_1994.dat") data = np.loadtxt(nk_file) database.create_dataset("dust/fe/amorphous", data=data) print(" [DONE]")
def add_cross_sections(input_path: str, database: h5py._hl.files.File) -> None: """ Function for adding the extinction cross section of crystalline MgSiO3 for a log-normal and power-law size distribution to the database. Parameters ---------- input_path : str Folder where the data is located. database : h5py._hl.files.File Database. Returns ------- None NoneType """ if not os.path.exists(input_path): os.makedirs(input_path) url = "https://home.strw.leidenuniv.nl/~stolker/species/lognorm_mgsio3_c_ext.fits" data_file = os.path.join(input_path, "lognorm_mgsio3_c_ext.fits") print("Downloading log-normal dust cross sections (231 kB)...", end="", flush=True) urllib.request.urlretrieve(url, data_file) print(" [DONE]") print("Adding log-normal dust cross sections:") with fits.open(os.path.join(input_path, "lognorm_mgsio3_c_ext.fits")) as hdu_list: database.create_dataset( "dust/lognorm/mgsio3/crystalline/cross_section", data=hdu_list[0].data) print( f" - Data shape (n_wavelength, n_radius, n_sigma): {hdu_list[0].data.shape}" ) database.create_dataset("dust/lognorm/mgsio3/crystalline/wavelength", data=hdu_list[1].data) data_range = f"{np.amin(hdu_list[1].data)} - {np.amax(hdu_list[1].data)}" print(f" - Wavelength range: {data_range} um") database.create_dataset("dust/lognorm/mgsio3/crystalline/radius_g", data=hdu_list[2].data) data_range = f"{np.amin(hdu_list[2].data)} - {np.amax(hdu_list[2].data)}" print(f" - Mean geometric radius range: {data_range} um") database.create_dataset("dust/lognorm/mgsio3/crystalline/sigma_g", data=hdu_list[3].data) data_range = f"{np.amin(hdu_list[3].data)} - {np.amax(hdu_list[3].data)}" print(f" - Geometric standard deviation range: {data_range}") url = "https://home.strw.leidenuniv.nl/~stolker/species/powerlaw_mgsio3_c_ext.fits" data_file = os.path.join(input_path, "powerlaw_mgsio3_c_ext.fits") print("Downloading power-law dust cross sections (231 kB)...", end="", flush=True) urllib.request.urlretrieve(url, data_file) print(" [DONE]") print("Adding power-law dust cross sections") with fits.open(os.path.join(input_path, "powerlaw_mgsio3_c_ext.fits")) as hdu_list: database.create_dataset( "dust/powerlaw/mgsio3/crystalline/cross_section", data=hdu_list[0].data) print( f" - Data shape (n_wavelength, n_radius, n_exponent): {hdu_list[0].data.shape}" ) database.create_dataset("dust/powerlaw/mgsio3/crystalline/wavelength", data=hdu_list[1].data) data_range = f"{np.amin(hdu_list[1].data)} - {np.amax(hdu_list[1].data)}" print(f" - Wavelength range: {data_range} um") database.create_dataset("dust/powerlaw/mgsio3/crystalline/radius_max", data=hdu_list[2].data) data_range = f"{np.amin(hdu_list[2].data)} - {np.amax(hdu_list[2].data)}" print(f" - Maximum grain radius range: {data_range} um") database.create_dataset("dust/powerlaw/mgsio3/crystalline/exponent", data=hdu_list[3].data) data_range = f"{np.amin(hdu_list[3].data)} - {np.amax(hdu_list[3].data)}" print(f" - Power-law exponent range: {data_range}")
def add_irtf(input_path: str, database: h5py._hl.files.File, sptypes: Optional[List[str]] = None) -> None: """ Function for adding the IRTF Spectral Library to the database. Parameters ---------- input_path : str Path of the data folder. database : h5py._hl.files.File Database. sptypes : list(str, ), None List with the spectral types ('F', 'G', 'K', 'M', 'L', 'T'). All spectral types are included if set to ``None``. Returns ------- NoneType None """ if sptypes is None: sptypes = ['F', 'G', 'K', 'M', 'L', 'T'] distance_url = 'https://people.phys.ethz.ch/~stolkert/species/distance.dat' distance_file = os.path.join(input_path, 'distance.dat') if not os.path.isfile(distance_file): urllib.request.urlretrieve(distance_url, distance_file) distance_data = pd.pandas.read_csv(distance_file, usecols=[0, 3, 4], names=['object', 'distance', 'distance_error'], delimiter=',', dtype={'object': str, 'distance': float, 'distance_error': float}) datadir = os.path.join(input_path, 'irtf') if not os.path.exists(datadir): os.makedirs(datadir) data_file = {'F': os.path.join(input_path, 'irtf/F_fits_091201.tar'), 'G': os.path.join(input_path, 'irtf/G_fits_091201.tar'), 'K': os.path.join(input_path, 'irtf/K_fits_091201.tar'), 'M': os.path.join(input_path, 'irtf/M_fits_091201.tar'), 'L': os.path.join(input_path, 'irtf/L_fits_091201.tar'), 'T': os.path.join(input_path, 'irtf/T_fits_091201.tar')} data_folder = {'F': os.path.join(input_path, 'irtf/F_fits_091201'), 'G': os.path.join(input_path, 'irtf/G_fits_091201'), 'K': os.path.join(input_path, 'irtf/K_fits_091201'), 'M': os.path.join(input_path, 'irtf/M_fits_091201'), 'L': os.path.join(input_path, 'irtf/L_fits_091201'), 'T': os.path.join(input_path, 'irtf/T_fits_091201')} data_type = {'F': 'F stars (4.4 MB)', 'G': 'G stars (5.6 MB)', 'K': 'K stars (5.5 MB)', 'M': 'M stars (7.5 MB)', 'L': 'L dwarfs (850 kB)', 'T': 'T dwarfs (100 kB)'} url_root = 'http://irtfweb.ifa.hawaii.edu/~spex/IRTF_Spectral_Library/Data/' url = {'F': url_root+'F_fits_091201.tar', 'G': url_root+'G_fits_091201.tar', 'K': url_root+'K_fits_091201.tar', 'M': url_root+'M_fits_091201.tar', 'L': url_root+'L_fits_091201.tar', 'T': url_root+'T_fits_091201.tar'} for item in sptypes: if not os.path.isfile(data_file[item]): print(f'Downloading IRTF Spectral Library - {data_type[item]}...', end='', flush=True) urllib.request.urlretrieve(url[item], data_file[item]) print(' [DONE]') print('Unpacking IRTF Spectral Library...', end='', flush=True) for item in sptypes: tar = tarfile.open(data_file[item]) tar.extractall(path=datadir) tar.close() print(' [DONE]') database.create_group('spectra/irtf') for item in sptypes: for root, _, files in os.walk(data_folder[item]): for _, filename in enumerate(files): if filename[-9:] != '_ext.fits': fitsfile = os.path.join(root, filename) spdata, header = fits.getdata(fitsfile, header=True) name = header['OBJECT'] sptype = header['SPTYPE'] if name[-2:] == 'AB': name = name[:-2] elif name[-3:] == 'ABC': name = name[:-3] spt_split = sptype.split() if item in ['L', 'T'] or spt_split[1][0] == 'V': print_message = f'Adding IRTF Spectral Library... {name}' print(f'\r{print_message:<70}', end='') simbad_id = query_util.get_simbad(name) if simbad_id is not None: simbad_id = simbad_id.decode('utf-8') dist_select = distance_data.loc[distance_data['object'] == simbad_id] if not dist_select.empty: distance = (dist_select['distance'], dist_select['distance_error']) else: simbad_id, distance = query_util.get_distance(name) else: distance = (np.nan, np.nan) sptype = data_util.update_sptype(np.array([sptype]))[0] dset = database.create_dataset(f'spectra/irtf/{name}', data=spdata) dset.attrs['name'] = str(name).encode() dset.attrs['sptype'] = str(sptype).encode() dset.attrs['simbad'] = str(simbad_id).encode() dset.attrs['distance'] = distance[0] dset.attrs['distance_error'] = distance[1] print_message = 'Adding IRTF Spectral Library... [DONE]' print(f'\r{print_message:<70}') database.close()
def add_allers2013(input_path: str, database: h5py._hl.files.File) -> None: """ Function for adding the spectra of young, M- and L-type dwarfs from `Allers & Liu (2013) <https://ui.adsabs.harvard.edu/abs/2013ApJ...772...79A/abstract>`_ to the database. Parameters ---------- input_path : str Path of the data folder. database : h5py._hl.files.File The HDF5 database. Returns ------- NoneType None """ print_text = "spectra of young M/L type objects from Allers & Liu 2013" data_url = "https://home.strw.leidenuniv.nl/~stolker/species/allers_liu_2013.tgz" data_file = os.path.join(input_path, "allers_liu_2013.tgz") data_folder = os.path.join(input_path, "allers+2013/") if not os.path.isfile(data_file): print(f"Downloading {print_text} (173 kB)...", end="", flush=True) urllib.request.urlretrieve(data_url, data_file) print(" [DONE]") if os.path.exists(data_folder): shutil.rmtree(data_folder) print(f"Unpacking {print_text} (173 kB)...", end="", flush=True) tar = tarfile.open(data_file) tar.extractall(data_folder) tar.close() print(" [DONE]") sources = np.genfromtxt( os.path.join(data_folder, "sources.csv"), delimiter=",", dtype=None, encoding="ASCII", ) source_names = sources[:, 0] source_sptype = sources[:, 7] database.create_group("spectra/allers+2013") print_message = "" for _, _, files in os.walk(data_folder): for _, filename in enumerate(files): if filename.endswith(".fits"): sp_data, header = fits.getdata(os.path.join( data_folder, filename), header=True) else: continue sp_data = np.transpose(sp_data) # (erg s-1 cm-2 A-1) -> (W m-2 um-1) sp_data[:, 1:] *= 10.0 name = header["OBJECT"] index = np.argwhere(source_names == name) if len(index) == 0: sptype = None else: sptype = source_sptype[index][0][0][:2] empty_message = len(print_message) * " " print(f"\r{empty_message}", end="") print_message = f"Adding spectra... {name}" print(f"\r{print_message}", end="") dset = database.create_dataset(f"spectra/allers+2013/{name}", data=sp_data) dset.attrs["name"] = str(name).encode() dset.attrs["sptype"] = str(sptype).encode() empty_message = len(print_message) * " " print(f"\r{empty_message}", end="") print_message = "Adding spectra... [DONE]" print(f"\r{print_message}") database.close()
def add_optical_constants(input_path: str, database: h5py._hl.files.File) -> None: """ Function for adding the optical constants of crystalline and amorphous MgSiO3 and Fe to the database. Parameters ---------- input_path : str Folder where the data is located. database : h5py._hl.files.File Database. Returns ------- None NoneType """ if not os.path.exists(input_path): os.makedirs(input_path) url = 'https://people.phys.ethz.ch/~stolkert/species/optical_constants.zip' data_file = os.path.join(input_path, 'optical_constants.zip') if not os.path.isfile(data_file): print('Downloading optical constants (87 kB)...', end='', flush=True) urllib.request.urlretrieve(url, data_file) print(' [DONE]') print('Unpacking optical constants...', end='', flush=True) with zipfile.ZipFile(data_file, 'r') as zip_ref: zip_ref.extractall(input_path) print(' [DONE]') print('Adding optical constants of MgSiO3...', end='') nk_file = os.path.join( input_path, 'optical_constants/mgsio3/crystalline/' 'mgsio3_jaeger_98_scott_96_axis1.dat') data = np.loadtxt(nk_file) database.create_dataset('dust/mgsio3/crystalline/axis_1/', data=data) nk_file = os.path.join( input_path, 'optical_constants/mgsio3/crystalline/' 'mgsio3_jaeger_98_scott_96_axis2.dat') data = np.loadtxt(nk_file) database.create_dataset('dust/mgsio3/crystalline/axis_2/', data=data) nk_file = os.path.join( input_path, 'optical_constants/mgsio3/crystalline/' 'mgsio3_jaeger_98_scott_96_axis3.dat') data = np.loadtxt(nk_file) database.create_dataset('dust/mgsio3/crystalline/axis_3/', data=data) nk_file = os.path.join( input_path, 'optical_constants/mgsio3/amorphous/' 'mgsio3_jaeger_2003_reformat.dat') data = np.loadtxt(nk_file) database.create_dataset('dust/mgsio3/amorphous', data=data) print(' [DONE]') print('Adding optical constants of Fe...', end='') nk_file = os.path.join( input_path, 'optical_constants/fe/crystalline/fe_henning_1996.dat') data = np.loadtxt(nk_file) database.create_dataset('dust/fe/crystalline', data=data) nk_file = os.path.join( input_path, 'optical_constants/fe/amorphous/fe_pollack_1994.dat') data = np.loadtxt(nk_file) database.create_dataset('dust/fe/amorphous', data=data) print(' [DONE]')
def add_cross_sections(input_path: str, database: h5py._hl.files.File) -> None: """ Function for adding the extinction cross section of crystalline MgSiO3 for a log-normal and power-law size distribution to the database. Parameters ---------- input_path : str Folder where the data is located. database : h5py._hl.files.File Database. Returns ------- None NoneType """ if not os.path.exists(input_path): os.makedirs(input_path) url = 'https://people.phys.ethz.ch/~stolkert/species/lognorm_mgsio3_c_ext.fits' data_file = os.path.join(input_path, 'lognorm_mgsio3_c_ext.fits') print('Downloading log-normal dust cross sections (231 kB)...', end='', flush=True) urllib.request.urlretrieve(url, data_file) print(' [DONE]') print('Adding log-normal dust cross sections...', end='') with fits.open(os.path.join(input_path, 'lognorm_mgsio3_c_ext.fits')) as hdu_list: database.create_dataset( 'dust/lognorm/mgsio3/crystalline/cross_section/', data=hdu_list[0].data) database.create_dataset('dust/lognorm/mgsio3/crystalline/wavelength/', data=hdu_list[1].data) database.create_dataset('dust/lognorm/mgsio3/crystalline/radius_g/', data=hdu_list[2].data) database.create_dataset('dust/lognorm/mgsio3/crystalline/sigma_g/', data=hdu_list[3].data) print(' [DONE]') url = 'https://people.phys.ethz.ch/~stolkert/species/powerlaw_mgsio3_c_ext.fits' data_file = os.path.join(input_path, 'powerlaw_mgsio3_c_ext.fits') print('Downloading power-law dust cross sections (231 kB)...', end='', flush=True) urllib.request.urlretrieve(url, data_file) print(' [DONE]') print('Adding power-law dust cross sections...', end='') with fits.open(os.path.join(input_path, 'powerlaw_mgsio3_c_ext.fits')) as hdu_list: database.create_dataset( 'dust/powerlaw/mgsio3/crystalline/cross_section/', data=hdu_list[0].data) database.create_dataset('dust/powerlaw/mgsio3/crystalline/wavelength/', data=hdu_list[1].data) database.create_dataset('dust/powerlaw/mgsio3/crystalline/radius_max/', data=hdu_list[2].data) database.create_dataset('dust/powerlaw/mgsio3/crystalline/exponent/', data=hdu_list[3].data) print(' [DONE]')
def add_irtf(input_path: str, database: h5py._hl.files.File, sptypes: Optional[List[str]] = None) -> None: """ Function for adding the IRTF Spectral Library to the database. Parameters ---------- input_path : str Path of the data folder. database : h5py._hl.files.File Database. sptypes : list(str), None List with the spectral types ('F', 'G', 'K', 'M', 'L', 'T'). All spectral types are included if set to ``None``. Returns ------- NoneType None """ if sptypes is None: sptypes = ["F", "G", "K", "M", "L", "T"] parallax_url = "https://home.strw.leidenuniv.nl/~stolker/species/parallax.dat" parallax_file = os.path.join(input_path, "parallax.dat") if not os.path.isfile(parallax_file): urllib.request.urlretrieve(parallax_url, parallax_file) parallax_data = pd.pandas.read_csv( parallax_file, usecols=[0, 1, 2], names=["object", "parallax", "parallax_error"], delimiter=",", dtype={ "object": str, "parallax": float, "parallax_error": float }, ) datadir = os.path.join(input_path, "irtf") if not os.path.exists(datadir): os.makedirs(datadir) data_file = { "F": os.path.join(input_path, "irtf/F_fits_091201.tar"), "G": os.path.join(input_path, "irtf/G_fits_091201.tar"), "K": os.path.join(input_path, "irtf/K_fits_091201.tar"), "M": os.path.join(input_path, "irtf/M_fits_091201.tar"), "L": os.path.join(input_path, "irtf/L_fits_091201.tar"), "T": os.path.join(input_path, "irtf/T_fits_091201.tar"), } data_folder = { "F": os.path.join(input_path, "irtf/F_fits_091201"), "G": os.path.join(input_path, "irtf/G_fits_091201"), "K": os.path.join(input_path, "irtf/K_fits_091201"), "M": os.path.join(input_path, "irtf/M_fits_091201"), "L": os.path.join(input_path, "irtf/L_fits_091201"), "T": os.path.join(input_path, "irtf/T_fits_091201"), } data_type = { "F": "F stars (4.4 MB)", "G": "G stars (5.6 MB)", "K": "K stars (5.5 MB)", "M": "M stars (7.5 MB)", "L": "L dwarfs (850 kB)", "T": "T dwarfs (100 kB)", } url_root = "http://irtfweb.ifa.hawaii.edu/~spex/IRTF_Spectral_Library/Data/" url = { "F": url_root + "F_fits_091201.tar", "G": url_root + "G_fits_091201.tar", "K": url_root + "K_fits_091201.tar", "M": url_root + "M_fits_091201.tar", "L": url_root + "L_fits_091201.tar", "T": url_root + "T_fits_091201.tar", } for item in sptypes: if not os.path.isfile(data_file[item]): print( f"Downloading IRTF Spectral Library - {data_type[item]}...", end="", flush=True, ) urllib.request.urlretrieve(url[item], data_file[item]) print(" [DONE]") print("Unpacking IRTF Spectral Library...", end="", flush=True) for item in sptypes: tar = tarfile.open(data_file[item]) tar.extractall(path=datadir) tar.close() print(" [DONE]") database.create_group("spectra/irtf") print_message = "" for item in sptypes: for root, _, files in os.walk(data_folder[item]): for _, filename in enumerate(files): if filename[-9:] != "_ext.fits": fitsfile = os.path.join(root, filename) spdata, header = fits.getdata(fitsfile, header=True) spdata = np.transpose(spdata) name = header["OBJECT"] sptype = header["SPTYPE"] if name[-2:] == "AB": name = name[:-2] elif name[-3:] == "ABC": name = name[:-3] spt_split = sptype.split() if item in ["L", "T"] or spt_split[1][0] == "V": empty_message = len(print_message) * " " print(f"\r{empty_message}", end="") print_message = f"Adding spectra... {name}" print(f"\r{print_message}", end="") simbad_id = query_util.get_simbad(name) if simbad_id is not None: # For backward compatibility if not isinstance(simbad_id, str): simbad_id = simbad_id.decode("utf-8") par_select = parallax_data[parallax_data["object"] == simbad_id] if not par_select.empty: parallax = ( par_select["parallax"], par_select["parallax_error"], ) else: simbad_id, parallax = query_util.get_parallax( name) else: parallax = (np.nan, np.nan) sptype = data_util.update_sptype(np.array([sptype]))[0] dset = database.create_dataset(f"spectra/irtf/{name}", data=spdata) dset.attrs["name"] = str(name).encode() dset.attrs["sptype"] = str(sptype).encode() dset.attrs["simbad"] = str(simbad_id).encode() dset.attrs["parallax"] = parallax[0] dset.attrs["parallax_error"] = parallax[1] empty_message = len(print_message) * " " print(f"\r{empty_message}", end="") print_message = "Adding spectra... [DONE]" print(f"\r{print_message}") database.close()