Ejemplo n.º 1
0
def test_convert_time_units():
    assert np.allclose(convert_time_units('s', 'day'), 1 / 86400)
    assert np.allclose(convert_time_units(4, 1), 86400)
    assert np.allclose(convert_time_units('days', 'seconds'), 86400)
    assert np.allclose(convert_time_units('d', 's'), 86400)
    assert np.allclose(convert_time_units(1, 4), 1 / 86400)
    assert np.allclose(convert_time_units(None, 'd'), 1.)
    assert np.allclose(convert_time_units(5, 4), 1 / 365.25)
    assert np.allclose(convert_time_units(4, 5), 365.25)
    assert np.allclose(convert_time_units('years', 'days'), 1 / 365.25)
Ejemplo n.º 2
0
def test_rch_setup(pfl_nwt_with_dis, project_root_path, simulate_high_k_lakes):

    m = pfl_nwt_with_dis  #deepcopy(pfl_nwt_with_dis)
    m.cfg['high_k_lakes']['simulate_high_k_lakes'] = simulate_high_k_lakes
    # test intermediate array creation from rech specified as scalars
    m.cfg['rch']['rech'] = [0.001, 0.002]
    m.cfg['rch']['rech_length_units'] = 'meters'
    m.cfg['rch']['rech_time_units'] = 'days'
    rch = m.setup_rch()
    arrayfiles = m.cfg['intermediate_data']['rech']
    assert len(arrayfiles) == len(m.cfg['rch']['rech'])
    for f in arrayfiles:
        assert os.path.exists(f)

    # test intermediate array creation from source_data
    # (rasters of different shapes)
    inf_array = 'mfsetup/tests/data/plainfieldlakes/source_data/' \
                'net_infiltration__2012-01-01_to_2017-12-31__1066_by_1145__SUM__INCHES_PER_YEAR.tif'
    inf_array = os.path.join(project_root_path, inf_array)
    with rasterio.open(inf_array) as src:
        inf_values = src.read(1)

    m.cfg['rch']['source_data']['rech']['filename'] = inf_array
    m.cfg['rch']['rech'] = None
    m.cfg['rch']['source_data']['rech']['length_units'] = 'inches'
    m.cfg['rch']['source_data']['rech']['time_units'] = 'years'
    rch = m.setup_rch()

    # spatial mean recharge in model should approx. match the GeoTiff (which covers a larger area)
    avg_in_yr = rch.rech.array[0, 0, :, :].mean() * convert_length_units('meters', 'inches') * \
        convert_time_units('days', 'years')
    assert np.allclose(avg_in_yr, inf_values.mean() * m.cfg['rch']['source_data']['rech']['mult'], rtol=0.25)
    arrayfiles = m.cfg['intermediate_data']['rech']
    for f in arrayfiles:
        assert os.path.exists(f)

    # check that high-K lake recharge was assigned correctly
    if simulate_high_k_lakes:
        highklake_recharge = m.rch.rech.array[:, 0, m.isbc[0] == 2].mean(axis=1)
        print(highklake_recharge)
        print(m.high_k_lake_recharge)
        assert np.allclose(highklake_recharge, m.high_k_lake_recharge)
    else:
        assert not np.any(m._isbc2d == 2)

    # test writing of MODFLOW arrays
    rch.write_file()
    assert m.cfg['rch']['rech'] is not None
    for f in m.cfg['rch']['rech']:
        assert os.path.exists(f)
    assert os.path.exists(rch.fn_path)

    # test intermediate array creation from rech specified as arrays
    # (of same shape; use MODFLOW arrays written above)
    rch = m.setup_rch()
    arrayfiles = m.cfg['intermediate_data']['rech']
    for f in arrayfiles:
        assert os.path.exists(f)
Ejemplo n.º 3
0
def preprocess_flows(
    data,
    metadata=None,
    flow_data_columns=['flow'],
    start_date=None,
    active_area=None,
    active_area_id_column=None,
    active_area_feature_id=None,
    source_crs=4269,
    dest_crs=5070,
    datetime_col='datetime',
    site_no_col='site_no',
    line_id_col='line_id',
    x_coord_col='x',
    y_coord_col='y',
    name_col='name',
    flow_qualifier_column=None,
    default_qualifier='measured',
    include_sites=None,
    include_line_ids=None,
    source_volume_units='ft3',
    source_time_units='s',
    dest_volume_units='m3',
    dest_time_units='d',
    geographic_groups=None,
    geographic_groups_col=None,
    max_obsname_len=None,
    add_leading_zeros_to_sw_site_nos=False,
    column_renames=None,
    outfile=None,
):
    """Preprocess stream flow observation data, for example, from NWIS or another data source that
    outputs time series in CSV format with site locations and identifiers.

    * Data are reprojected from a `source_crs` (Coordinate reference system; assumed to be in geographic coordinates)
      to the CRS of the model (`dest_crs`)
    * Data are culled to a `start_date` and optionally, a polygon or set of polygons defining the model area
    * length and time units are converted to those of the groundwater model.
    * Prefixes for observation names (with an optional length limit) that identify the location are generated
    * Preliminary observation groups can also be assigned, based on geographic areas defined by polygons
      (`geographic_groups` parameter)

    Parameters
    ----------
    data : csv file or DataFrame
        Time series of stream flow observations.
        Columns:

        ===================== ======================================
        site_no               site identifier
        datetime              measurement dates/times
        x                     x-coordinate of site
        y                     y-coordinate of site
        flow_data_columns     Columns of observed streamflow values
        flow_qualifier_column Optional column with qualifiers for flow values
        ===================== ======================================

        Notes:

        * x and y columns can alternatively be in the metadata table
        * flow_data_columns are denoted in `flow_data_columns`; multiple
          columns can be included to process base flow and total flow, or
          other statistics in tandem
        * For example, `flow_qualifier_column` may have "estimated" or "measured"
          flags denoting whether streamflows were derived from measured values
          or statistical estimates.

    metadata : csv file or DataFrame
        Stream flow observation site information.

        May include columns:

        ================= ================================================================================
        site_no           site identifier
        x                 x-coordinate of site
        y                 y-coordinate of site
        name              name of site
        line_id_col       Identifier for a line in a hydrography dataset that the site is associated with.
        ================= ================================================================================

        Notes:

        * other columns in metadata will be passed through to the metadata output

    flow_data_columns : list of strings
        Columns in data with flow values or their statistics.
        By default, ['q_cfs']
        start_date : str (YYYY-mm-dd)
        Simulation start date (cull observations before this date)
    active_area : str
        Shapefile with polygon to cull observations to. Automatically reprojected
        to dest_crs if the shapefile includes a .prj file.
        by default, None.
    active_area_id_column : str, optional
        Column in active_area with feature ids.
        By default, None, in which case all features are used.
    active_area_feature_id : str, optional
        ID of feature to use for active area
        By default, None, in which case all features are used.
    source_crs : obj
        Coordinate reference system of the head observation locations.
        A Python int, dict, str, or :class:`pyproj.crs.CRS` instance
        passed to :meth:`pyproj.crs.CRS.from_user_input`

        Can be any of:
          - PROJ string
          - Dictionary of PROJ parameters
          - PROJ keyword arguments for parameters
          - JSON string with PROJ parameters
          - CRS WKT string
          - An authority string [i.e. 'epsg:4326']
          - An EPSG integer code [i.e. 4326]
          - A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')]
          - An object with a `to_wkt` method.
          - A :class:`pyproj.crs.CRS` class

        By default, epsg:4269
    dest_crs : obj
        Coordinate reference system of the model. Same input types
        as ``source_crs``.
        By default, epsg:5070
    datetime_col : str, optional
        Column name in data with observation date/times,
        by default 'datetime'
    site_no_col : str, optional
        Column name in data and metadata with site identifiers,
        by default 'site_no'
    line_id_col : str, optional
        Column name in data or metadata with identifiers for
        hydrography lines associated with observation sites.
        by default 'line_id'
    x_coord_col : str, optional
        Column name in data or metadata with x-coordinates,
        by default 'x'
    y_coord_col : str, optional
        Column name in data or metadata with y-coordinates,
        by default 'y'
    name_col : str, optional
        Column name in data or metadata with observation site names,
        by default 'name'
    flow_qualifier_column : str, optional
        Column name in data with flow observation qualifiers, such
        as "measured" or "estimated"
        by default 'category'
    default_qualifier : str, optional
        Default qualifier to populate flow_qualifier_column if it
        is None. By default, "measured"
    include_sites : list-like, optional
        Exclude output to these sites.
        by default, None (include all sites)
    include_line_ids : list-like, optional
        Exclude output to these sites, represented by line identifiers.
        by default, None (include all sites)
    source_volume_units : str, 'm3', 'cubic meters', 'ft3', etc.
        Volume units of the source data. By default, 'ft3'
    source_time_units : str, 's', 'seconds', 'days', etc.
        Time units of the source data. By default, 's'
    dest_volume_units : str, 'm3', 'cubic meters', 'ft3', etc.
        Volume units of the output (model). By default, 'm3'
    dest_time_units : str, 's', 'seconds', 'days', etc.
        Time units of the output (model). By default, 'd'
    geographic_groups : file, dict or list-like
        Option to group observations by area(s) of interest. Can
        be a shapefile, list of shapefiles, or dictionary of shapely polygons.
        A 'group' column will be created in the metadata, and observation
        sites within each polygon will be assigned the group name
        associated with that polygon.

        For example::

            geographic_groups='../source_data/extents/CompositeHydrographArea.shp'
            geographic_groups=['../source_data/extents/CompositeHydrographArea.shp']
            geographic_groups={'cha': <shapely Polygon>}

        Where 'cha' is an observation group name for observations located within the
        the area defined by CompositeHydrographArea.shp. For shapefiles,
        group names are provided in a `geographic_groups_col`.

    geographic_groups_col : str
        Field name in the `geographic_groups` shapefile(s) containing the
        observation group names associated with each polygon.
    max_obsname_len : int or None
        Maximum length for observation name prefix. Default of 13
        allows for a PEST obsnme of 20 characters or less with
        <prefix>_yyyydd or <prefix>_<per>d<per>
        (e.g. <prefix>_2d1 for a difference between stress periods 2 and 1)
        If None, observation names will not be truncated. PEST++ does not have
        a limit on observation name length.
    add_leading_zeros_to_sw_site_nos : bool
        Whether or not to pad site numbers using the
        :func:~`mapgwm.swflows.format_usgs_sw_site_id` function.
        By default, False.
    column_renames : dict, optional
        Option to rename columns in the data or metadata that are different than those listed above.
        For example, if the data file has a 'SITE_NO' column instead of 'SITE_BADGE'::

            column_renames={'SITE_NO': 'site_no'}

        by default None, in which case the renames listed above will be used.
        Note that the renames must be the same as those listed above for
        :func:`mapgwm.swflows.preprocess_flows` to work.
    outfile : str
        Where output file will be written. Metadata are written to a file
        with the same name, with an additional "_info" suffix prior to
        the file extension.

    Returns
    -------
    data : DataFrame
        Preprocessed time series
    metadata : DataFrame
        Preprocessed metadata

    References
    ----------
    `The PEST++ Manual <https://github.com/usgs/pestpp/tree/master/documentation>`

    Notes
    -----

    """
    # outputs
    if outfile is not None:
        outpath, filename = os.path.split(outfile)
        makedirs(outpath)
        outname, ext = os.path.splitext(outfile)
        out_info_csvfile = outname + '_info.csv'
        out_data_csvfile = outfile
        out_shapefile = outname + '_info.shp'

    # read the source data
    if not isinstance(data, pd.DataFrame):
        df = pd.read_csv(data, dtype={site_no_col: object})
    else:
        df = data.copy()
    # check the columns
    for col in [datetime_col] + flow_data_columns:
        assert col in df.columns, "Column {} not found in {}".format(col, data)
    assert any({site_no_col, line_id_col}.intersection(df.columns)), \
        "Neither {} or {} found in {}. Need to specify a site_no_col or line_id_col".format(site_no_col,
                                                                                            line_id_col, data)
    # rename input columns to these names,
    # for consistent output
    dest_columns = {
        datetime_col: 'datetime',
        site_no_col: 'site_no',
        line_id_col: 'line_id',
        x_coord_col: 'x',
        y_coord_col: 'y',
        name_col: 'name',
        flow_qualifier_column: 'category'
    }
    # update the default column renames
    # with any supplied via column_renames parameter
    if isinstance(column_renames, collections.Mapping):
        dest_columns.update(column_renames)
    df.rename(columns=dest_columns, inplace=True)
    flow_data_columns = [
        c if c not in dest_columns else dest_columns[c]
        for c in flow_data_columns
    ]
    # convert site numbers to strings;
    # add leading 0s to any USGS sites that should have them
    if 'site_no' in df.columns:
        df['site_no'] = format_site_ids(df['site_no'],
                                        add_leading_zeros_to_sw_site_nos)
    else:
        df['site_no'] = df[line_id_col]

    # read the source data
    if metadata is not None:
        if not isinstance(metadata, pd.DataFrame):
            md = pd.read_csv(metadata, dtype={site_no_col: object})
        else:
            md = metadata.copy()
        if site_no_col not in md.columns or 'site_no' not in df.columns:
            raise IndexError(
                'If metadata are supplied, both data and metadata must '
                'have a site_no column.')
        md.rename(columns=dest_columns, inplace=True)
        md['site_no'] = format_site_ids(md['site_no'],
                                        add_leading_zeros_to_sw_site_nos)
        md.index = md['site_no']
        by_site = df.groupby('site_no')
        md['start_dt'] = pd.DataFrame(by_site['datetime'].first())
    else:
        by_site = df.groupby('site_no')
        md = pd.DataFrame(by_site['datetime'].first())
        md.columns = ['start_dt']
        md['site_no'] = md.index

    md['end_dt'] = pd.DataFrame(by_site['datetime'].last())
    md['n'] = pd.DataFrame(by_site['datetime'].count())
    md.reset_index(inplace=True, drop=True)

    # assign metadata if supplied
    for col in 'x', 'y', 'line_id', 'name':
        if col in df.columns and col not in md.columns:
            by_site_no = dict(zip(df['site_no'], df[col]))
            md[col] = [by_site_no[sn] for sn in md['site_no']]
            if col != 'line_id':
                df.drop(col, axis=1, inplace=True)

    # index the dataframe to times;
    # truncate data before start date
    df.index = pd.to_datetime(df['datetime'])
    df.index.name = 'datetime'
    df = df.loc[start_date:].copy()

    # project x, y to model crs
    x_pr, y_pr = project((md.x.values, md.y.values), source_crs, dest_crs)
    md['x'], md['y'] = x_pr, y_pr
    md['geometry'] = [Point(x, y) for x, y in zip(x_pr, y_pr)]

    # cull data to that within the model area
    if active_area is not None:
        df, md = cull_data_to_active_area(df,
                                          active_area,
                                          active_area_id_column,
                                          active_area_feature_id,
                                          data_crs=dest_crs,
                                          metadata=md)

    # get the hydrography IDs corresponding to each site
    # using the included lookup table
    #if 'line_id' not in df.columns:
    #    assert line_id_lookup is not None, \
    #    "need to include line_ids in a column, or line_id_lookup dictionary mapping line_ids to site numbers"
    #    df = df.loc[df['site_no'].isin(line_id_lookup)].copy()
    #    df['line_id'] = [line_id_lookup[sn] for sn in df['site_no']]

    if include_sites is not None:
        md = md.loc[md.site_no.isin(include_sites)]
        df = df.loc[df.site_no.isin(include_sites)]
    if include_line_ids is not None:
        md = md.loc[md.line_id.isin(include_line_ids)]
        df = df.loc[df.line_id.isin(include_line_ids)]

    # convert units
    # ensure that flow values are numeric (may be objects if taken directly from NWIS)
    unit_conversion = (
        convert_volume_units(source_volume_units, dest_volume_units) /
        convert_time_units(source_time_units, dest_time_units))
    for flow_col in flow_data_columns:
        df[flow_col] = pd.to_numeric(df[flow_col],
                                     errors='coerce') * unit_conversion
    df.dropna(subset=flow_data_columns, axis=0, inplace=True)

    # reformat qualifiers for consistent output
    # (lump to dest category columns of either estimated or measured)
    # with measured including values derived from baseflow separation or actual measurements)
    # output column name for flow qualifier column:
    dest_flow_qualifier_column = 'category'
    if flow_qualifier_column is not None:
        flow_qualifiers = {
            'calculated': 'measured',  # 'measured',
            'base flow separated from measured values':
            'measured',  # 'measured',
            'measured total flow': 'measured',
            'estimated gaged': 'estimated',
            'estimated ungaged': 'estimated'
        }
        df[dest_flow_qualifier_column] = df[flow_qualifier_column].replace(
            flow_qualifiers)
    else:
        df['category'] = default_qualifier

    # make unique n-character prefixes (site identifiers) for each observation location
    # 13 character length allows for prefix_yyyymmm in 20 character observation names
    # (BeoPEST limit)
    unique_obsnames = set()
    obsnames = []
    for sn in md['site_no'].tolist():
        if max_obsname_len is not None:
            name = make_obsname(sn,
                                unique_names=unique_obsnames,
                                maxlen=max_obsname_len)
            assert name not in unique_obsnames
        else:
            name = sn
        unique_obsnames.add(name)
        obsnames.append(name)
    md['obsprefix'] = obsnames

    # add area of interest information
    md['group'] = 'fluxes'
    md = assign_geographic_obsgroups(md,
                                     geographic_groups,
                                     geographic_groups_col,
                                     metadata_crs=dest_crs)

    # data columns
    data_cols = ['site_no', 'line_id', 'datetime'
                 ] + flow_data_columns + ['category']
    #if 'line_id' in md.columns and 'line_id' not in df.columns:
    #    # only map line_ids to data if there are more site numbers
    #    # implying that no site number maps to more than one line_id
    #    if len(set(df.site_no)) >= len(set(df.line_id)):
    #        ids = dict(zip(md['site_no'], md['line_id']))
    #    df['line_id'] = [ids[sn] for sn in df['site_no']]
    data_cols = [c for c in data_cols if c in df.columns]
    df = df[data_cols]

    md.index = md['site_no']
    # save out the results
    if outfile is not None:
        df2shp(md.drop(['x', 'y'], axis=1), out_shapefile, crs=dest_crs)
        print('writing {}'.format(out_info_csvfile))
        md.drop('geometry', axis=1).to_csv(out_info_csvfile,
                                           index=False,
                                           float_format='%g')
        print('writing {}'.format(out_data_csvfile))
        df.to_csv(out_data_csvfile, index=False, float_format='%g')
    return df, md
Ejemplo n.º 4
0
def plot_wateruse(wel_files, perioddata, add_data=None,
                  wel_flux_col='q',
                  model_volume_units='$m^3$', model_time_units='day',
                  plot_volume_units='mgal', plot_time_units='day',
                  outfile=None):
    """

    Parameters
    ----------
    wel_files :
        A head line with column names is assumed. For example:
        #k,i,j,q,boundname

    perioddata :
    add_data :
    model_volume_units :
    model_time_units :
    plot_volume_units :
    plot_time_units :

    Returns
    -------

    """

    # read the stress period information
    if not isinstance(perioddata, pd.DataFrame):
        perioddata = pd.read_csv(perioddata)
    else:
        perioddata = perioddata.copy()
    perioddata.index = perioddata['per']

    dfs = []
    for i, f in wel_files.items():
        df = pd.read_csv(f, delim_whitespace=True)
        df.columns = [c.strip('#') for c in df.columns]
        df['per'] = i
        df['start_datetime'] = perioddata.loc[i, 'start_datetime']
        df['end_datetime'] = perioddata.loc[i, 'end_datetime']
        dfs.append(df)
    df = pd.concat(dfs)

    # sum the model pumping by stress period
    period_sums = df.groupby('per').first()
    period_sums[wel_flux_col] = df.groupby('per')[wel_flux_col].sum()
    # fill nan values (from any periods without wel files) with 0s
    period_sums = period_sums.reindex(range(period_sums.index.max()))
    period_sums['start_datetime'] = perioddata['start_datetime']
    period_sums['end_datetime'] = perioddata['end_datetime']
    period_sums[wel_flux_col].fillna(0, inplace=True)
    period_sums.index = pd.to_datetime(period_sums['start_datetime'])
    period_sums['WEL package input'] = period_sums['q']
    period_sums = period_sums[['WEL package input', 'start_datetime', 'end_datetime']]

    # convert units
    model_vol_conv = convert_volume_units(model_volume_units, plot_volume_units)
    model_time_conv = convert_time_units(model_time_units, plot_time_units)
    model_conv = model_vol_conv * model_time_conv

    # plot any additional comparison data
    if add_data is not None:
        for label, items in add_data.items():
            # read the stress period information
            if not isinstance(items['data'], pd.DataFrame):
                items['data'] = pd.read_csv(items['data'])
            req_cols = {'q', 'start_datetime'}
            assert not req_cols.difference(items['data'].columns), \
                f"add_data: {label} data must have columns: {req_cols}"

            items['data']['start_datetime'] = pd.to_datetime(items['data']['start_datetime'])
            aux_period_sums = items['data'].groupby('start_datetime').first()
            aux_period_sums[label] = items['data'].groupby('start_datetime')['q'].sum()
            # fill nan values (from any periods without wel files) with 0s
            #aux_period_sums[label].fillna(0, inplace=True)
            aux_period_sums['start_datetime'] = aux_period_sums.index

            period_sums = period_sums.join(aux_period_sums[[label]], how='outer')
            j=2

    # forward fill nan WEL values values
    # (where other times may have been inserted)
    period_sums['WEL package input'] = period_sums['WEL package input'].ffill()
    #period_sums = period_sums.resample('M').mean() #.ffill()

    # make a plot
    fig, ax = plt.subplots(figsize=(11, 8.5))
    ax = period_sums.plot(ax=ax)
    units_text = f'{model_volume_units}/{model_time_units}'
    ax.set_ylabel(f'Pumpage, in {units_text}')
    ax.set_xlabel('')

    # second axis with another volume unit
    def second_axis_conversion(x):
        return x * model_conv

    def second_axis_conversion_r(x):
        return x * 1 / model_conv
    ax2 = ax.secondary_yaxis('right', functions=(second_axis_conversion,
                                                 second_axis_conversion_r))
    ax2.set_ylabel(f'Pumpage, in {plot_volume_units}/{plot_time_units}')
    #format_xtick_labels(period_sums, ax, maxlabels=30, date_format='%Y-%m-%d')
    h, l = ax.get_legend_handles_labels()
    means = (period_sums.mean(axis=0) * model_conv).to_dict()
    plot_units_text = f'{plot_volume_units}/{plot_time_units}'
    labels_with_means = []
    for label in l:
        new_label = label
        if label in means:
            new_label += f' (mean: {means[label]:g} {plot_units_text})'
        labels_with_means.append(new_label)
    ax.legend(h, labels_with_means)

    if outfile is not None:
        Path(outfile).parent.mkdir(parents=True, exist_ok=True)
        plt.savefig(outfile)
        plt.close()
        print(f'wrote {outfile}')
    else:
        return ax
Ejemplo n.º 5
0
    def setup_lak(self):
        """
        Sets up the Lake package.

        Parameters
        ----------

        Notes
        -----

        """
        package = 'lak'
        print('\nSetting up {} package...'.format(package.upper()))
        t0 = time.time()
        if self.lakarr.sum() == 0:
            print("lakes_shapefile not specified, or no lakes in model area")
            return

        # option to write connectiondata to external file
        external_files = self.cfg['lak']['external_files']

        # source data
        source_data = self.cfg['lak']['source_data']

        # munge lake package input
        # returns dataframe with information for each lake
        self.lake_info = setup_lake_info(self)

        # returns dataframe with connection information
        connectiondata = setup_lake_connectiondata(self, for_external_file=external_files)
        # lakeno column will have # in front if for_external_file=True
        lakeno_col = [c for c in connectiondata.columns if 'lakeno' in c][0]
        nlakeconn = connectiondata.groupby(lakeno_col).count().iconn.to_dict()
        offset = 0 if external_files else 1
        self.lake_info['nlakeconn'] = [nlakeconn[id - offset] for id in self.lake_info['lak_id']]

        # set up the tab files
        if 'stage_area_volume_file' in source_data:
            tab_files = setup_lake_tablefiles(self, source_data['stage_area_volume_file'])

            # tabfiles aren't rewritten by flopy on package write
            self.cfg['lak']['tab_files'] = tab_files
            # kludge to deal with ugliness of lake package external file handling
            # (need to give path relative to model_ws, not folder that flopy is working in)
            tab_files_argument = [os.path.relpath(f) for f in tab_files]

        # todo: implement lake outlets with SFR

        # perioddata
        self.lake_fluxes = setup_lake_fluxes(self)
        lakeperioddata = get_lakeperioddata(self.lake_fluxes)

        # set up external files
        connectiondata_cols = [lakeno_col, 'iconn', 'k', 'i', 'j', 'claktype', 'bedleak',
                               'belev', 'telev', 'connlen', 'connwidth']
        if external_files:
            # get the file path (allowing for different external file locations, specified name format, etc.)
            filepath = self.setup_external_filepaths(package, 'connectiondata',
                                                     self.cfg[package]['connectiondata_filename_fmt'])
            connectiondata[connectiondata_cols].to_csv(filepath[0]['filename'], index=False, sep=' ')
            # make a copy for the intermediate data folder, for consistency with mf-2005
            shutil.copy(filepath[0]['filename'], self.cfg['intermediate_data']['output_folder'])
        else:
            connectiondata_cols = connectiondata_cols[:2] + ['cellid'] + connectiondata_cols[5:]
            self.cfg[package]['connectiondata'] = connectiondata[connectiondata_cols].values.tolist()

        # set up input arguments
        kwargs = self.cfg[package].copy()
        options = self.cfg[package]['options'].copy()
        renames = {'budget_fileout': 'budget_filerecord',
                   'stage_fileout': 'stage_filerecord'}
        for k, v in renames.items():
            if k in options:
                options[v] = options.pop(k)
        kwargs.update(self.cfg[package]['options'])
        kwargs['time_conversion'] = convert_time_units(self.time_units, 'seconds')
        kwargs['length_conversion'] = convert_time_units(self.length_units, 'meters')
        kwargs['nlakes'] = len(self.lake_info)
        kwargs['noutlets'] = 0  # not implemented
        # [lakeno, strt, nlakeconn, aux, boundname]
        packagedata_cols = ['lak_id', 'strt', 'nlakeconn']
        if kwargs.get('boundnames'):
            packagedata_cols.append('name')
        packagedata = self.lake_info[packagedata_cols]
        packagedata['lak_id'] -= 1  # convert to zero-based
        kwargs['packagedata'] = packagedata.values.tolist()
        kwargs['ntables'] = len(tab_files)
        kwargs['tables'] = [(i, f, 'junk', 'junk') for i, f in enumerate(tab_files)]
        kwargs['outlets'] = None  # not implemented
        #kwargs['outletperioddata'] = None  # not implemented
        kwargs['perioddata'] = lakeperioddata

        # observations
        kwargs['observations'] = setup_mf6_lake_obs(kwargs)

        kwargs = get_input_arguments(kwargs, mf6.ModflowGwflak)
        lak = mf6.ModflowGwflak(self, **kwargs)
        print("finished in {:.2f}s\n".format(time.time() - t0))
        return lak
Ejemplo n.º 6
0
def test_parse_source_data(source_data_cases,
                           source_data_from_model_cases,
                           pfl_nwt_with_grid, project_root_path):
    model = pfl_nwt_with_grid
    cases = source_data_cases + source_data_from_model_cases
    results = []

    sd = TabularSourceData.from_config(cases[0], type='tabular')
    assert isinstance(sd.filenames, dict)
    assert sd.length_unit_conversion == 1.
    assert sd.time_unit_conversion == 1.
    assert sd.unit_conversion == 1.

    sd = TabularSourceData.from_config(cases[1], type='tabular')
    assert isinstance(sd.filenames, dict)

    sd = TabularSourceData.from_config(cases[2], type='tabular')
    assert isinstance(sd.filenames, dict)

    sd = TabularSourceData.from_config(cases[3]['features_shapefile'])
    assert isinstance(sd.filenames, dict)

    var = 'rech'
    sd = ArraySourceData.from_config(cases[4]['infiltration_arrays'],
                                variable=var,
                                type='array')
    assert isinstance(sd.filenames, dict)
    assert sd.unit_conversion == 1. # no dest model

    sd = TabularSourceData.from_config(cases[9]['flowlines']['nhdplus_paths'])
    assert isinstance(sd.filenames, dict)

    # test conversion to model units
    for i, f in cases[4]['infiltration_arrays']['filenames'].items():
        cases[4]['infiltration_arrays']['filenames'][i] = os.path.join(project_root_path, f)
    sd = ArraySourceData.from_config(cases[4]['infiltration_arrays'],
                                     variable=var,
                                     dest_model=model)
    assert isinstance(sd.filenames, dict)
    assert sd.unit_conversion == convert_length_units('inches', 'meters') *\
        convert_time_units('years', 'days')
    data = sd.get_data()
    assert isinstance(data, dict)
    assert len(data) == len(cases[4]['infiltration_arrays']['filenames'])
    assert data[0].shape == model.modelgrid.shape[1:]
    assert sd.unit_conversion == 1/12 * .3048 * 1/365.25

    # test averaging of layer between two files
    sd = ArraySourceData.from_config(cases[6]['hk'],
                                     variable='hk',
                                     dest_model=model)
    data = sd.get_data()
    assert isinstance(sd.filenames, dict)
    assert np.allclose(data[1].mean(axis=(0, 1)), cases[6]['hk'][1])

    # test averaging of layers provided in source array
    sd = ArraySourceData.from_config(source_data_from_model_cases[0],
                                     variable='botm',
                                     dest_model=model)
    data = sd.get_data()
    mask = sd._source_grid_mask
    arr0 = sd.regrid_from_source_model(sd.source_array[0],
                                        mask=mask,
                                        method='linear')
    arr1 = sd.regrid_from_source_model(sd.source_array[1],
                                        mask=mask,
                                        method='linear')
    assert np.allclose(np.mean([arr0, arr1], axis=(0)), data[0])

    # TODO: write test for multiplier intermediate layers

    # test mapping of layers from binary file;
    # based on layer bottom mapping
    filename = source_data_from_model_cases[2]['from_parent']['binaryfile']
    source_model = pfl_nwt_with_grid.parent
    modelname = 'parent'
    pfl_nwt_with_grid._parent_layers = {0: -0.5, 1: 0, 2: 1, 3: 2, 4: 3}
    sd = MFBinaryArraySourceData(variable='strt', filename=filename,
                                 dest_model=model,
                                 source_modelgrid=source_model.modelgrid,
                                 from_source_model_layers={},
                                 length_units=model.cfg[modelname]['length_units'],
                                 time_units=model.cfg[modelname]['time_units'])
    data = sd.get_data()
    # first two layers in dest model should both be from parent layer 0
    mask = sd._source_grid_mask
    arr0 = sd.regrid_from_source_model(sd.source_array[0],
                                       mask=mask,
                                       method='linear')
    assert np.array_equal(data[0], data[1])
    assert np.array_equal(arr0, data[0])
    pfl_nwt_with_grid._parent_layers = None # reset