def hit2df( fname, cache=True, verbose=True, drop_non_numeric=True, load_wavenum_min=None, load_wavenum_max=None, ): """Convert a HITRAN/HITEMP [1]_ file to a Pandas dataframe Parameters ---------- fname: str HITRAN-HITEMP file name cache: boolean, or ``'regen'`` or ``'force'`` if ``True``, a pandas-readable HDF5 file is generated on first access, and later used. This saves on the datatype cast and conversion and improves performances a lot (but changes in the database are not taken into account). If False, no database is used. If ``'regen'``, temp file are reconstructed. Default ``True``. Other Parameters ---------------- drop_non_numeric: boolean if ``True``, non numeric columns are dropped. This improves performances, but make sure all the columns you need are converted to numeric formats before hand. Default ``True``. Note that if a cache file is loaded it will be left untouched. load_wavenum_min, load_wavenum_max: float if not ``'None'``, only load the cached file if it contains data for wavenumbers above/below the specified value. See :py:func`~radis.io.cache_files.load_h5_cache_file`. Default ``'None'``. Returns ------- df: pandas Dataframe dataframe containing all lines and parameters References ---------- .. [1] `HITRAN 1996, Rothman et al., 1998 <https://www.sciencedirect.com/science/article/pii/S0022407398000788>`__ Notes ----- Performances: see CDSD-HITEMP parser See Also -------- :func:`~radis.io.cdsd.cdsd2df` """ metadata = {} # Last modification time of the original file : metadata["last_modification"] = time.ctime(getmtime(fname)) if verbose >= 2: print("Opening file {0} (cache={1})".format(fname, cache)) print("Last modification time: {0}".format(metadata["last_modification"])) if load_wavenum_min and load_wavenum_max: assert load_wavenum_min < load_wavenum_max columns = columns_2004 # Use cache file if possible fcache = cache_file_name(fname) if cache and exists(fcache): relevant_if_metadata_above = ( {"wavenum_max": load_wavenum_min} if load_wavenum_min else {} ) # not relevant if wavenum_max of file is < wavenum min required relevant_if_metadata_below = ( {"wavenum_min": load_wavenum_max} if load_wavenum_max else {} ) # not relevant if wavenum_min of file is > wavenum max required df = load_h5_cache_file( fcache, cache, valid_if_metadata_is=metadata, relevant_if_metadata_above=relevant_if_metadata_above, relevant_if_metadata_below=relevant_if_metadata_below, current_version=radis.__version__, last_compatible_version=OLDEST_COMPATIBLE_VERSION, verbose=verbose, ) if df is not None: return df # Detect the molecule by reading the start of the file try: with open(fname) as f: mol = get_molecule(int(f.read(2))) except UnicodeDecodeError as err: raise ValueError( "You're trying to read a binary file {0} ".format(fname) + "instead of an HITRAN file" ) from err # %% Start reading the full file df = parse_hitran_file(fname, columns) # %% Post processing # assert one molecule per database only. Else the groupbase data reading # above doesnt make sense nmol = len(set(df["id"])) if nmol == 0: raise ValueError("Databank looks empty") elif nmol != 1: # Crash, give explicity error messages try: secondline = df.iloc[1] except IndexError: secondline = "" raise ValueError( "Multiple molecules in database ({0}). Current ".format(nmol) + "spectral code only computes 1 species at the time. Use MergeSlabs. " + "Verify the parsing was correct by looking at the first row below: " + "\n{0}".format(df.iloc[0]) + "\n----------------\nand the second row " + "below: \n{0}".format(secondline) ) # Add local quanta attributes, based on the HITRAN group df = parse_local_quanta(df, mol) # Add global quanta attributes, based on the HITRAN class df = parse_global_quanta(df, mol) # Remove non numerical attributes if drop_non_numeric: if "branch" in df: replace_PQR_with_m101(df) df = drop_object_format_columns(df, verbose=verbose) # cached file mode but cached file doesn't exist yet (else we had returned) if cache: new_metadata = { # Last modification time of the original file : "last_modification": time.ctime(getmtime(fname)), "wavenum_min": df.wav.min(), "wavenum_max": df.wav.max(), } if verbose: print( "Generating cache file {0} with metadata :\n{1}".format( fcache, new_metadata ) ) try: save_to_hdf( df, fcache, metadata=new_metadata, version=radis.__version__, key="df", overwrite=True, verbose=verbose, ) except PermissionError: if verbose: print(sys.exc_info()) print("An error occured in cache file generation. Lookup access rights") pass # TODO : get only wavenum above/below 'load_wavenum_min', 'load_wavenum_max' # by parsing df.wav. Completely irrelevant files are discarded in 'load_h5_cache_file' # but files that have partly relevant lines are fully loaded. # Note : cache file is generated with the full line list. return df
def get_label_hitran(row, details): """ Todo ------- replace with simple astype(str) statements and str operations ex: > '['+df[locl].astype(str)+']('+df[globl].astype(str)+'->'+ > df[globu].astype(str)'+)' will be much faster! """ molecule = get_molecule(row.id) # Get global labels if molecule in HITRAN_CLASS1: label = ( "{molec}[iso{iso:.0f}] [{branch}{jl:.0f}]({vl:.0f})->({vu:.0f})" .format( **dict([(k, row[k]) for k in ["vu", "vl", "jl", "iso"]] + [ ("molec", molecule), ("branch", _fix_branch_format[row["branch"]]), ]))) elif molecule in HITRAN_CLASS4: label = "{molec}[iso{iso:.0f}] [{branch}{jl:.0f}]({v1l:.0f}{v2l:.0f}`{l2l:.0f}`{v3l:.0f})->({v1u:.0f}{v2u:.0f}`{l2u:.0f}`{v3u:.0f})".format( **dict([(k, row[k]) for k in [ "v1u", "v2u", "l2u", "v3u", "v1l", "v2l", "l2l", "v3l", "jl", "iso", ]] + [ ("molec", molecule), ("branch", _fix_branch_format[row["branch"]]), ])) elif molecule in HITRAN_CLASS5: label = "{molec}[iso{iso:.0f}] [{branch}{jl:.0f}]({v1l:.0f}{v2l:.0f}`{l2l:.0f}`{v3l:.0f} {rl:.0f})->({v1u:.0f}{v2u:.0f}`{l2u:.0f}`{v3u:.0f} {ru:.0f})".format( **dict([(k, row[k]) for k in [ "v1u", "v2u", "l2u", "v3u", "v1l", "v2l", "l2l", "v3l", "rl", "ru", "jl", "iso", ]] + [ ("molec", molecule), ("branch", _fix_branch_format[row["branch"]]), ])) else: raise NotImplementedError( "No label for {0}. Please add it!".format(molecule)) # Add details about some line properties for k in details: name, _, unit = details[k] if is_float(row[k]): label += "<br>{0} {1}: {2:.3g} {3}".format( k, name, row[k], unit) else: label += "<br>{0} {1}: {2} {3}".format(k, name, row[k], unit) return label
def fetch_astroquery( molecule, isotope, wmin, wmax, verbose=True, cache=True, expected_metadata={} ): """Download a HITRAN line database to a Pandas DataFrame. Wrapper to Astroquery [1]_ fetch function Parameters ---------- molecule: str, or int molecule name or identifier isotope: int isotope number wmin, wmax: float (cm-1) wavenumber min and max Other Parameters ---------------- verbose: boolean Default ``True`` cache: boolean or ``'regen'`` if ``True``, tries to find a ``.h5`` cache file in the Astroquery :py:attr:`~astroquery.query.BaseQuery.cache_location`, that would match the requirements. If not found, downloads it and saves the line dataframe as a ``.h5`` file in the Astroquery. If ``'regen'``, delete existing cache file to regerenate it. expected_metadata: dict if ``cache=True``, check that the metadata in the cache file correspond to these attributes. Arguments ``molecule``, ``isotope``, ``wmin``, ``wmax`` are already added by default. Notes ----- The HITRAN module in Astroquery [1]_ is itself based on [HAPI]_ References ---------- .. [1] `Astroquery <https://astroquery.readthedocs.io>`_ See Also -------- :py:func:`astroquery.hitran.reader.download_hitran`, :py:func:`astroquery.hitran.reader.read_hitran_file`, :py:attr:`~astroquery.query.BaseQuery.cache_location` """ # Check input if not is_float(molecule): mol_id = get_molecule_identifier(molecule) else: mol_id = molecule molecule = get_molecule(mol_id) assert is_float(isotope) empty_range = False if cache: # Cache file location in Astroquery cache # TODO: move full HITRAN databases in ~/radisdb cache like io/hitemp/fetch_hitemp ? fcache = join( Hitran.cache_location, CACHE_FILE_NAME.format( **{"molecule": molecule, "isotope": isotope, "wmin": wmin, "wmax": wmax} ), ) # ... Update metadata with physical properties from the database. expected_metadata.update( {"molecule": molecule, "isotope": isotope, "wmin": wmin, "wmax": wmax} ) if cache == "regen": if exists(fcache): if verbose: print(f"Cache file {fcache} deleted to be regenerated") os.remove(fcache) else: # Load cache file if valid check_cache_file( fcache=fcache, use_cached=cache, expected_metadata=expected_metadata, verbose=verbose, ) if exists(fcache): try: return get_cache_file(fcache, verbose=verbose) except Exception as err: if verbose: printr( "Problem reading cache file {0}:\n{1}\nDeleting it!".format( fcache, str(err) ) ) os.remove(fcache) # Download using the astroquery library try: response = Hitran.query_lines_async( molecule_number=mol_id, isotopologue_number=isotope, min_frequency=wmin / u.cm, max_frequency=wmax / u.cm, ) except KeyError as err: raise KeyError( str(err) + " <<w this error occured in Astroquery. Maybe these molecule " + "({0}) and isotope ({1}) are not supported".format(molecule, isotope) ) from err # Deal with usual errors if response.status_code == 404: # Maybe there are just no lines for this species in this range # In that case we usually end up with errors like: # (<class 'Exception'>, Exception('Query failed: 404 Client Error: # Not Found for url: http://hitran.org/lbl/api?numax=25000&numin=19000&iso_ids_list=69\n',), # <traceback object at 0x7f0967c91708>) if response.reason == "Not Found": # Let's bet it's just that there are no lines in this range empty_range = True if verbose: print( ( "No lines for {0} (id={1}), iso={2} in range {3:.2f}-{4:.2f}cm-1. ".format( molecule, mol_id, isotope, wmin, wmax ) ) ) else: raise ValueError( "An error occured during the download of HITRAN files " + "for {0} (id={1}), iso={2} between {3:.2f}-{4:.2f}cm-1. ".format( molecule, mol_id, isotope, wmin, wmax ) + "Are you online?\n" + "See details of the error below:\n\n {0}".format(response.reason) ) elif response.status_code == 500: raise ValueError( "{0} while querying the HITRAN server: ".format(response.status_code) + "\n\n{0}".format(response.text) ) # Process response # Rename columns from Astroquery to RADIS format rename_columns = { "molec_id": "id", "local_iso_id": "iso", "nu": "wav", "sw": "int", "a": "A", "gamma_air": "airbrd", "gamma_self": "selbrd", "elower": "El", "n_air": "Tdpair", "delta_air": "Pshft", "global_upper_quanta": "globu", "global_lower_quanta": "globl", "local_upper_quanta": "locu", "local_lower_quanta": "locl", "line_mixing_flag": "lmix", "gp": "gp", "gpp": "gpp", } if not empty_range: tbl = Hitran._parse_result(response) df = tbl.to_pandas() df = df.rename(columns=rename_columns) else: df = pd.DataFrame(columns=list(rename_columns.values())) # Cast type to float64 cast_type = { "wav": np.float64, "int": np.float64, "A": np.float64, "airbrd": np.float64, "selbrd": np.float64, "El": np.float64, "Tdpair": np.float64, "Pshft": np.float64, } for c, typ in cast_type.items(): df[c] = df[c].astype(typ) # cached file mode but cached file doesn't exist yet (else we had returned) if cache: new_metadata = { "molecule": molecule, "isotope": isotope, "wmin": wmin, "wmax": wmax, } if verbose: print( "Generating cache file {0} with metadata :\n{1}".format( fcache, new_metadata ) ) try: save_to_hdf( df, fcache, metadata=new_metadata, version=radis.__version__, key="df", overwrite=True, verbose=verbose, ) except PermissionError: if verbose: print(sys.exc_info()) print("An error occured in cache file generation. Lookup access rights") pass return df