Example #1
0
def _get_discharge(config: Config, basin: str) -> pd.Series:
    if config.dataset == 'camels_us':
        _, area = camelsus.load_camels_us_forcings(config.data_dir, basin,
                                                   'daymet')
        return camelsus.load_camels_us_discharge(config.data_dir, basin, area)
    else:
        raise NotImplementedError
    def _load_basin_data(self, basin: str) -> pd.DataFrame:
        """Load input and output data from text files."""
        # get forcings
        dfs = []
        if not any(f.endswith('_hourly') for f in self.cfg.forcings):
            raise ValueError('Forcings include no hourly forcings set.')
        for forcing in self.cfg.forcings:
            if forcing[-7:] == '_hourly':
                df = self.load_hourly_data(basin, forcing)
            else:
                # load daily CAMELS forcings and upsample to hourly
                df, _ = camelsus.load_camels_us_forcings(
                    self.cfg.data_dir, basin, forcing)
                df = df.resample('1H').ffill()
            if len(self.cfg.forcings) > 1:
                # rename columns
                df = df.rename(
                    columns={
                        col: f"{col}_{forcing}"
                        for col in df.columns if 'qobs' not in col.lower()
                    })
            dfs.append(df)
        df = pd.concat(dfs, axis=1)

        # collapse all input features to a single list, to check for 'QObs(mm/d)'.
        all_features = self.cfg.target_variables
        if isinstance(self.cfg.dynamic_inputs, dict):
            for val in self.cfg.dynamic_inputs.values():
                all_features = all_features + val
        elif isinstance(self.cfg.dynamic_inputs, list):
            all_features = all_features + self.cfg.dynamic_inputs

        # catch also QObs(mm/d)_shiftX or _copyX features
        if any([x.startswith("QObs(mm/d)") for x in all_features]):
            # add daily discharge from CAMELS, using daymet to get basin area
            _, area = camelsus.load_camels_us_forcings(self.cfg.data_dir,
                                                       basin, "daymet")
            discharge = camelsus.load_camels_us_discharge(
                self.cfg.data_dir, basin, area)
            discharge = discharge.resample('1H').ffill()
            df["QObs(mm/d)"] = discharge

        # only warn for missing netcdf files once for each forcing product
        self._warn_slow_loading = False

        # replace invalid discharge values by NaNs
        qobs_cols = [col for col in df.columns if 'qobs' in col.lower()]
        for col in qobs_cols:
            df.loc[df[col] < 0, col] = np.nan

        # add stage, if requested
        if 'gauge_height_m' in self.cfg.target_variables:
            df = df.join(load_hourly_us_stage(self.cfg.data_dir, basin))
            df.loc[df['gauge_height_m'] < 0, 'gauge_height_m'] = np.nan

        # convert discharge to 'synthetic' stage, if requested
        if 'synthetic_qobs_stage_meters' in self.cfg.target_variables:
            attributes = camelsus.load_camels_us_attributes(
                data_dir=self.cfg.data_dir, basins=[basin])
            with open(self.cfg.rating_curve_file, 'rb') as f:
                rating_curves = pickle.load(f)
            df['synthetic_qobs_stage_meters'] = np.nan
            if basin in rating_curves.keys():
                discharge_m3s = df[
                    'qobs_mm_per_hour'].values / 1000 * attributes.area_gages2[
                        basin] * 1e6 / 60**2
                df['synthetic_qobs_stage_meters'] = rating_curves[
                    basin].discharge_to_stage(discharge_m3s)

        return df