Python find_path Exemples, sfrmaker.routing.find_path Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : checks.py Projet : elwan3/SFRmaker

def valid_nsegs(nsegs, outsegs=None, increasing=True):
    """Check that segment numbers are valid.

    Parameters
    ----------
    nsegs : list of segment numbers
    outsegs : list of corresponding routing connections
        Required if increasing=True.
    increasing : bool
        If True, segment numbers must also only increase downstream.
    """
    # cast to array if list or series
    nsegs = np.atleast_1d(nsegs)
    outsegs = np.atleast_1d(outsegs)
    consecutive_and_onebased = valid_rnos(nsegs)
    if increasing:
        assert outsegs is not None
        graph = make_graph(nsegs, outsegs, one_to_many=False)
        monotonic = []
        for s in nsegs:
            seg_sequence = find_path(graph.copy(),
                                     s)[:-1]  # last number is 0 for outlet
            monotonic.append(np.all(np.diff(np.array(seg_sequence)) > 0))
        monotonic = np.all(monotonic)
        return consecutive_and_onebased & monotonic
    else:
        return consecutive_and_onebased

Exemple #2

0

Afficher le fichier

Fichier : flows.py Projet : turbidite75/sfrmaker

def add_to_perioddata(sfrdata,
                      data,
                      flowline_routing=None,
                      variable='inflow',
                      line_id_column_in_data=None,
                      rno_column_in_data=None,
                      period_column_in_data='per',
                      variable_column_in_data='Q_avg'):
    """Add data to the period data table (sfrdata.period_data)
    for a MODFLOW-6 style sfrpackage. Source data

    """
    sfrd = sfrdata

    # cull data to valid periods
    data = data.loc[data[period_column_in_data] >= 0].copy()

    # map NHDPlus COMIDs to reach numbers
    if flowline_routing is not None:
        assert line_id_column_in_data in data.columns, \
            "Data need an id column so {} locations can be mapped to reach numbers".format(variable)
        rno_column_in_data = 'rno'
        r1 = sfrd.reach_data.loc[sfrd.reach_data.ireach == 1]
        line_id_rno_mapping = dict(zip(r1['line_id'], r1['rno']))
        line_ids = get_next_id_in_subset(r1.line_id, flowline_routing,
                                         data[line_id_column_in_data])
        data[rno_column_in_data] = [
            line_id_rno_mapping[lid] for lid in line_ids
        ]
    else:
        assert rno_column_in_data in data.columns, \
            "Data to add need reach number or flowline routing information is needed."

    # check for duplicate inflows in same path
    if variable == 'inflow':
        line_ids = set(data[line_id_column_in_data])
        drop = set()
        dropped_line_info_file = 'dropped_inflows_locations.csv'
        for lid in line_ids:
            path = find_path(flowline_routing, start=lid)
            duplicated = set(path[1:]).intersection(line_ids)
            if len(duplicated) > 0:
                drop.add(lid)
                txt = ('warning: {}: {} is upstream '
                       'of the following line_ids:\n{}\n'
                       'see {} for details.').format(line_id_column_in_data,
                                                     lid, duplicated,
                                                     dropped_line_info_file)
                print(txt)
        if len(drop) > 0:
            data.loc[data[line_id_column_in_data].isin(drop)].to_csv(
                dropped_line_info_file, index=False)
            data = data.loc[~data[line_id_column_in_data].isin(drop)]

    # add inflows to period_data
    period_data = sfrd.period_data
    period_data['rno'] = data[rno_column_in_data]
    period_data['per'] = data[period_column_in_data]
    period_data[variable] = data[variable_column_in_data]

Exemple #3

0

Afficher le fichier

Fichier : checks.py Projet : elwan3/SFRmaker

def routing_is_circular(fromid, toid):
    """Verify that segments or reaches never route to themselves.

    Parameters
    ----------
    fromid : list or 1D array
        e.g. COMIDS, segments, or rnos
    toid : list or 1D array
        routing connections
    """
    fromid = np.atleast_1d(fromid)
    toid = np.atleast_1d(toid)

    graph = make_graph(fromid, toid, one_to_many=False)
    paths = {fid: find_path(graph, fid) for fid in graph.keys()}
    # a fromid should not appear more than once in its sequence
    for k, v in paths.items():
        if v.count(k) > 1:
            return True
    return False

Exemple #4

0

Afficher le fichier

 def _set_paths(self):
     routing = self.routing
     self._paths = {seg: find_path(routing, seg) for seg in routing.keys()}

Exemple #5

0

Afficher le fichier

def add_to_segment_data(sfrdata,
                        data,
                        flowline_routing=None,
                        variable='flow',
                        line_id_column=None,
                        segment_column=None,
                        period_column='per',
                        data_column='Q_avg'):
    """Like add_to_perioddata, but for MODFLOW-2005.
    """
    sfrd = sfrdata

    # cull data to valid periods
    data = data.loc[data[period_column] >= 0].copy()

    # map NHDPlus COMIDs to reach numbers
    if flowline_routing is not None:
        assert line_id_column in data.columns, \
            "Data need an id column so {} locations can be mapped to reach numbers".format(variable)
        flowline_routing = {
            k: v if v in flowline_routing.keys() else 0
            for k, v in flowline_routing.items()
        }
        segment_column = 'segment'
        r1 = sfrd.reach_data.loc[sfrd.reach_data.ireach == 1]
        line_id_iseg_mapping = dict(zip(r1['line_id'], r1['iseg']))
        line_ids = get_next_id_in_subset(r1.line_id, flowline_routing,
                                         data[line_id_column])
        data[segment_column] = [line_id_iseg_mapping[lid] for lid in line_ids]
    else:
        assert segment_column in data.columns, \
            "Data to add need segment number or flowline routing information is needed."

    # check for duplicate inflows in same path
    if variable == 'flow':
        line_ids = set(data[line_id_column])
        drop = set()
        dropped_line_info_file = 'dropped_inflows_locations.csv'
        for lid in line_ids:
            path = find_path(flowline_routing, start=lid)
            duplicated = set(path[1:]).intersection(line_ids)
            if len(duplicated) > 0:
                drop.add(lid)
                txt = ('warning: {}: {} is upstream '
                       'of the following line_ids:\n{}\n'
                       'see {} for details.').format(line_id_column, lid,
                                                     duplicated,
                                                     dropped_line_info_file)
                print(txt)
        if len(drop) > 0:
            data.loc[data[line_id_column].isin(drop)].to_csv(
                dropped_line_info_file, index=False)
            data = data.loc[~data[line_id_column].isin(drop)]

    # rename columns in data to be added to same names as segment_data
    data.rename(columns={
        period_column: 'per',
        segment_column: 'nseg',
        data_column: variable
    },
                inplace=True)
    # update existing segment data
    sfrd.segment_data.index = pd.MultiIndex.from_tuples(zip(
        sfrd.segment_data.per, sfrd.segment_data.nseg),
                                                        names=['per', 'nseg'])
    loc = list(zip(data.per, data.nseg))
    data.index = pd.MultiIndex.from_tuples(loc, names=['per', 'nseg'])
    replace = sorted(
        list(set(data.index).intersection(sfrd.segment_data.index)))
    add = sorted(list(set(data.index).difference(sfrd.segment_data.index)))
    sfrd.segment_data.loc[replace, variable] = data.loc[replace, variable]

    # concat on the added data (create additional rows in segment_data table)
    to_concat = [sfrd.segment_data]
    period_groups = data.loc[add, ['per', 'nseg', variable]].reset_index(
        drop=True).groupby('per')
    for per, group in period_groups:
        # start with existing data (row) for that segment
        df = sfrd.segment_data.loc[(slice(None, None), group.nseg), :].copy()
        df['per'] = per
        df.index = zip(df.per, df.nseg)
        df[variable] = group[variable].values
        to_concat.append(df)
    sfrd.segment_data = pd.concat(to_concat).reset_index(drop=True)

Exemple #6

0

Afficher le fichier

def add_to_perioddata(sfrdata,
                      data,
                      flowline_routing=None,
                      variable='inflow',
                      line_id_column=None,
                      rno_column=None,
                      period_column='per',
                      data_column='Q_avg',
                      one_inflow_per_path=False,
                      distribute_flows_to_reaches=False):
    """Add data to the period data table (sfrdata.period_data)
    for a MODFLOW-6 style sfrpackage.

    Parameters
    ----------
    sfrdata : sfrmaker.SFRData instance
        SFRData instance with reach_data table attribute. To add observations from x, y coordinates,
        the reach_data table must have a geometry column with LineStrings representing each reach, or
        an sfrlines_shapefile is required. Reach numbers are assumed to be in an 'rno' column.
    data : DataFrame, path to csv file, or list of DataFrames or file paths
        Table with information on the observation sites to be located. Must have
        either reach numbers (rno_column), line_ids (line_id_column),
        or x and y locations (x_column_in_data and y_column_in_data).
    flowline_routing : dict
        Optional dictionary of routing for source hydrography. Only needed
        if locating by line_id, and SFR network is a subset of the full source
        hydrography (i.e. some lines were dropped in the creation of the SFR packge,
        or if the sites are inflow points corresponding to lines outside of the model perimeter).
        In this case, observation points referenced to line_ids that are missing from the SFR
        network are placed at the first reach corresponding to the next downstream line_id
        that is represented in the SFR network. By default, None.
    variable : str, optional
        Modflow-6 period variable (see Modflow-6 Description of Input and Outpu), by default 'inflow'
    line_id_column : str
        Column in data matching observation sites to line_ids in the source hydrography data.
        Either line_id_column or rno_column must be specified. By default, None
    rno_column : str
        Column in data matching observation sites to reach numbers in the SFR network. By default, None.
    period_column : str, optional
        Column with modflow stress period for each inflow value, by default 'per', by default, 'per'.
    data_column : str, optional
        Column with flow values, by default 'Q_avg'
    one_inflow_per_path : bool, optional
        Limit inflows to one per (headwater to outlet) routing path, choosing the inflow location 
        that is furthest downstream. By default, False.
    distribute_flows_to_reaches : bool, optional
        Option to distribute any flows specified by line_id evenly across the reaches
        associated with that line. Otherwise, all flow is applied to the first reach.
        By default, False.

    Returns
    -------
    Updates the sfrdata.perioddata DataFrame.
    """
    print(f'adding {variable} to the SFR package stress period data...')
    sfrd = sfrdata

    # allow input via a list of tables or single table
    data = read_tables(data)

    # cull data to valid periods
    data = data.loc[data[period_column] >= 0].copy()

    # map NHDPlus COMIDs to reach numbers
    if flowline_routing is not None:
        assert line_id_column in data.columns, \
            "Data need an id column so {} locations can be mapped to reach numbers".format(variable)
        # replace ids that are not keys (those outside the network) with zeros
        # (0 is the exit condition for finding paths in get_next_id_in_subset)
        flowline_routing = {
            k: v if v in flowline_routing.keys() else 0
            for k, v in flowline_routing.items()
        }
        rno_column = 'rno'
        r1 = sfrd.reach_data.loc[sfrd.reach_data.ireach == 1]
        line_id_rno_mapping = dict(zip(r1['line_id'], r1['rno']))

        # check to make sure that all of the IDs in data
        # are in the routing information
        # exclude the outlet ID (0)
        valid_ids = set(r1.line_id).union(flowline_routing.keys()) \
                                   .union(flowline_routing.values()) \
                                   .difference({0})
        in_routing = np.array([
            True if line_id in valid_ids else False
            for line_id in data[line_id_column]
        ])
        if np.any(~in_routing):
            msg = ("sfrmaker.flows.add_to_perioddata: The following "
                   f"{np.sum(~in_routing)} line_ids in data\nare not "
                   "associated with SFR reaches or in the supplied "
                   "flowline_routing information:\n"
                   f"{data[~in_routing]}\n")
            # don't allow any inflow values that are un-routed
            # (probably a mistake)
            if variable == 'inflow':
                raise ValueError(msg)
            # for runoff or other variables that are widely distributed
            # (not discrete point locations), the input data may often
            # contain comids that don't route to anywhere in the model
            else:
                msg += f"{variable} for these line_ids will not be applied"
                warnings.warn(msg)
        data = data.loc[in_routing]
        line_ids = get_next_id_in_subset(r1.line_id, flowline_routing,
                                         data[line_id_column])
        data['line_id_in_model'] = line_ids

        # sum data by line_id_in_model
        # (might have multiple original line_ids contributing
        #  to each line_id_in_model)
        if len(set(data['line_id_in_model'])) < len(set(data[line_id_column])):
            by_line_id_in_model = data.groupby(['per', 'line_id_in_model'])
            sums = by_line_id_in_model.sum()[[data_column]].reset_index()
            # preserve other columns that were dropped in sum()
            # (e.g. datetime)
            other_columns = data.columns.difference(sums.columns).difference(
                {line_id_column})
            for c in other_columns:
                sums[c] = by_line_id_in_model.first().reset_index()[c]
            # reassign to 'data' variable
            data = sums
            del sums

        # the 'data' DataFrame currently has 1 row per line id
        # optionally expand 'data' to have 1 row per reach
        # (multiple rows per line id), with the applied value
        # evenly split among the reaches for each line id
        if distribute_flows_to_reaches:

            # get a subset of reach_data that only includes
            # the lines in the 'data' DataFrame
            lines_in_data = sfrd.reach_data['line_id'].isin(
                data['line_id_in_model'])
            reach_data = sfrd.reach_data.loc[lines_in_data,
                                             ['rno', 'line_id']].copy()
            # rename line_id column for consistency with
            # distribute_flows_to_reaches=False
            reach_data.rename(columns={'line_id': 'line_id_in_model'},
                              inplace=True)

            # get the number of reaches associated with each line
            reach_counts = reach_data.groupby(
                'line_id_in_model').count()['rno'].to_dict()
            by_period = data.groupby('per')

            dfs = []
            for per, group in by_period:

                # line_ids should be unique at this point
                assert len(group['line_id_in_model'].unique()) == len(group)

                values_by_line = dict(
                    zip(group['line_id_in_model'], group[data_column]))
                perioddata = reach_data.copy()
                perioddata['per'] = per
                # transfer other data from group to expanded perioddata dataframe
                other_columns = [
                    c for c in group.columns if c not in {
                        rno_column, period_column, data_column, line_id_column,
                        'line_id_in_model'
                    }
                ]
                for c in other_columns:
                    other_column_values_by_line = dict(
                        zip(group['line_id_in_model'], group[c]))
                    perioddata[c] = [
                        other_column_values_by_line.get(line_id)
                        for line_id in perioddata['line_id_in_model']
                    ]

                # add the flows apportioned to each reach
                values_by_reach = [
                    values_by_line.get(line_id, 0) / reach_counts[line_id]
                    for line_id in perioddata['line_id_in_model']
                ]
                perioddata[data_column] = values_by_reach

                # check the sum
                group_lines_in_model = group['line_id_in_model'].isin(
                    perioddata['line_id_in_model'])
                expected_sum = group.loc[group_lines_in_model,
                                         data_column].sum()
                assert np.allclose(perioddata[data_column].sum(), expected_sum)
                dfs.append(perioddata)
            data = pd.concat(dfs)
        # otherwise, assign value to first reach associated with line
        else:
            data[rno_column] = [
                line_id_rno_mapping.get(lid, 0)
                for lid in data['line_id_in_model']
            ]
    else:
        assert rno_column in data.columns, \
            "Data to add need reach number or flowline routing information is needed."

    # check for duplicate inflows in same path
    if variable == 'inflow' and one_inflow_per_path:
        line_ids = set(data['line_id_in_model'])
        drop = set()
        dropped_line_info_file = 'dropped_inflows_locations.csv'
        for lid in line_ids:
            path = find_path(flowline_routing, start=lid)
            duplicated = set(path[1:]).intersection(line_ids)
            if len(duplicated) > 0:
                drop.add(lid)
                txt = ('warning: {}: {} is upstream '
                       'of the following line_ids:\n{}\n'
                       'see {} for details.').format(line_id_column, lid,
                                                     duplicated,
                                                     dropped_line_info_file)
                print(txt)
        if len(drop) > 0:
            data.loc[data[line_id_column].isin(drop)].to_csv(
                dropped_line_info_file, index=False)
            data = data.loc[~data[line_id_column].isin(drop)]

    # add inflows to period_data
    period_data = sfrd.period_data
    # set multiindex on data
    # to allow direct assignment of values at period, rnos to perioddata
    data.set_index(['per', 'rno'], inplace=True)
    # drop any references to line_id 0 (values not in model)
    data_period_mean_to_zero = data.loc[data['line_id_in_model'] == 0] \
        .groupby('per')[data_column].sum().mean()
    if np.isnan(data_period_mean_to_zero):
        data_period_mean_to_zero = 0.
    data_period_mean_in_model = data.loc[data['line_id_in_model'] != 0] \
        .groupby('per')[data_column].sum().mean()
    data_period_mean_tot = data_period_mean_in_model + data_period_mean_to_zero
    pct_routed = data_period_mean_in_model / data_period_mean_tot

    # join data to perioddata
    # so that index includes existing periods, rnos
    # and those in data
    # (df.update doesn't yet support outer joins)
    original_cols = period_data.columns
    period_data = period_data.join(data[[data_column]],
                                   how='outer',
                                   rsuffix='data_')
    period_data = period_data[original_cols]

    # add the new values
    # add any aux columns to period_data if they aren't there
    # if aux columns already exist, just update the values
    # (preserving any values already in period_data but not in data)
    data[variable] = data[data_column]
    if line_id_column in data:
        data['specified_line_id'] = data[line_id_column]
        if 'specified_line_id' not in period_data:
            period_data['specified_line_id'] = data[line_id_column]
    update_columns = [
        c for c in data.columns
        if c not in {rno_column, period_column, data_column, line_id_column}
    ]
    # add any aux columns to period_data if they aren't there
    for c in update_columns:
        if c not in period_data.columns:
            period_data[c] = data[c]
    # do the update
    period_data.update(data[update_columns])
    # explicitly set the variable dtype int float
    period_data[variable] = period_data[variable].astype(float)

    # report mean value by period in input data vs SFR period data
    period_data_period_mean = period_data.groupby('per')[variable].sum().mean()
    pct_in_model = period_data_period_mean / data_period_mean_tot
    text = (f"Stress period mean {variable} in input data routing to a"
            f" model SFR segment: {data_period_mean_in_model:,g}"
            f" ({pct_routed:.1%})\n")
    text += (f"Stress period mean {variable} in input data not routing to a"
             f" model SFR segment: {data_period_mean_to_zero:,g}\n")
    text += (f"Stress period mean {variable} in SFR package Period Data:"
             f" {period_data_period_mean:,g}"
             f" ({pct_in_model:.1%})\n")
    print(text)

    sfrd._period_data = period_data
    print('done')

Exemple #7

0

Afficher le fichier

def get_inflow_locations_from_parent_model(parent_reach_data,
                                           inset_reach_data,
                                           inset_grid,
                                           active_area=None):
    """Get places in an inset model SFR network where the parent SFR network crosses
    the inset model boundary, using common line ID numbers from parent and inset reach datasets.
    MF2005 or MF6 supported; if either dataset contains only reach numbers (is MODFLOW-6),
    the reach numbers are used as segment numbers, with each segment only having one reach.

    Parameters
    ----------
    parent_reach_data : str (filepath) or DataFrame
        SFR reach data for parent model. Must include columns:
        line_id : int; unique identifier for hydrography line that each reach is based on
        rno : int; unique identifier for each reach. Optional if iseg and ireach columns are included.
        iseg : int; unique identifier for each segment. Optional if rno is included.
        ireach : int; unique identifier for each reach. Optional if rno is included.
        geometry : shapely.geometry object representing location of each reach
    inset_reach_data : str (filepath) or DataFrame
        SFR reach data for inset model. Same columns as parent_reach_data,
        except a geometry column isn't needed. line_id values must correspond to
        same source hydrography as those in parent_reach_data.
    inset_grid : flopy.discretization.StructuredGrid instance describing model grid
        Must be in same coordinate system as geometries in parent_reach_data.
        Required only if active_area is None.
    active_area : shapely.geometry.Polygon object
        Describes the area of the inset model where SFR is applied. Used to find
        inset reaches from parent model. Must be in same coordinate system as
        geometries in parent_reach_data. Required only if inset_grid is None.

    Returns
    -------
    locations : DataFrame
        Columns:
        parent_segment : parent model segment
        parent_reach : parent model reach
        parent_rno : parent model reach number
        line_id : unique identifier for hydrography line that each reach is based on
    """

    # spatial reference instances defining parent and inset grids
    if isinstance(inset_grid, str):
        grid = load_modelgrid(inset_grid)
    elif isinstance(inset_grid, flopy.discretization.grid.Grid):
        grid = inset_grid
    else:
        raise ValueError('Unrecognized input for inset_grid')

    if active_area is None:
        l, r, b, t = grid.extent
        active_area = box(l, b, r, t)

    # parent and inset reach data
    if isinstance(parent_reach_data, str):
        prd = shp2df(parent_reach_data)
    elif isinstance(parent_reach_data, pd.DataFrame):
        prd = parent_reach_data.copy()
    else:
        raise ValueError('Unrecognized input for parent_reach_data')
    if 'rno' in prd.columns and 'iseg' not in prd.columns:
        prd['iseg'] = prd['rno']
        prd['ireach'] = 1
    mustinclude_cols = {'line_id', 'rno', 'iseg', 'ireach', 'geometry'}
    assert len(mustinclude_cols.intersection(
        prd.columns)) == len(mustinclude_cols)

    if isinstance(inset_reach_data, str):
        if inset_reach_data.endswith('.shp'):
            ird = shp2df(inset_reach_data)
        else:
            ird = pd.read_csv(inset_reach_data)
    elif isinstance(inset_reach_data, pd.DataFrame):
        ird = inset_reach_data.copy()
    else:
        raise ValueError('Unrecognized input for inset_reach_data')
    if 'rno' in ird.columns and 'iseg' not in ird.columns:
        ird['iseg'] = ird['rno']
        ird['ireach'] = 1
    mustinclude_cols = {'line_id', 'rno', 'iseg', 'ireach'}
    assert len(mustinclude_cols.intersection(
        ird.columns)) == len(mustinclude_cols)

    graph = make_graph(ird.rno.values, ird.outreach.values, one_to_many=False)

    # cull parent reach data to only lines that cross or are just upstream of inset boundary
    buffered = active_area.buffer(5000, cap_style=2)
    close = [g.intersects(buffered) for g in prd.geometry]
    prd = prd.loc[close]
    prd.index = prd.rno
    boundary = active_area.exterior
    inset_line_id_connections = {}  # parent rno: inset line_id
    for i, r in prd.iterrows():
        if r.outreach not in prd.index:
            continue
        downstream_line = prd.loc[r.outreach, 'geometry']
        upstream_line = prd.loc[prd.rno == r.outreach, 'geometry'].values[0]
        intersects = r.geometry.intersects(boundary)
        intersects_downstream = downstream_line.within(active_area)
        # intersects_upstream = upstream_line.within(active_area)
        in_inset_model = r.geometry.within(active_area)
        if intersects_downstream:
            if intersects:
                # if not intersects_upstream: # exclude lines that originated within the model
                #    # lines that cross route to their counterpart in inset model
                inset_line_id_connections[r.rno] = r.line_id
                pass
            elif not in_inset_model:
                # lines that route to a line within the inset model
                # route to that line's inset counterpart
                inset_line_id_connections[r.rno] = prd.loc[r.outreach,
                                                           'line_id']
                pass

    prd = prd.loc[prd.rno.isin(inset_line_id_connections.keys())]

    # parent rno lookup
    parent_rno_lookup = {v: k for k, v in inset_line_id_connections.items()}

    # inlet reaches in inset model
    ird = ird.loc[ird.ireach == 1]
    ird = ird.loc[ird.line_id.isin(inset_line_id_connections.values())]

    # for each reach in ird (potential inset inlets)
    # check that there isn't another inlet downstream
    drop_reaches = []
    for i, r in ird.iterrows():
        path = find_path(graph, r.rno)
        another_inlet_downstream = len(
            set(path[1:]).intersection(set(ird.rno))) > 0
        if another_inlet_downstream:
            drop_reaches.append(r.rno)

    ird = ird.loc[~ird.rno.isin(drop_reaches)]
    # cull parent flows to outlet reaches
    iseg_ireach = zip(prd.iseg, prd.ireach)
    parent_outlet_iseg_ireach = dict(zip(prd.rno, iseg_ireach))

    df = ird[['line_id', 'name', 'rno', 'iseg', 'ireach']].copy()
    df['parent_rno'] = [parent_rno_lookup[lid] for lid in df['line_id']]
    df['parent_iseg'] = [
        parent_outlet_iseg_ireach[rno][0] for rno in df['parent_rno']
    ]
    df['parent_ireach'] = [
        parent_outlet_iseg_ireach[rno][1] for rno in df['parent_rno']
    ]
    return df.reset_index(drop=True)

Exemple #8

0

Afficher le fichier

def add_to_perioddata(sfrdata,
                      data,
                      flowline_routing=None,
                      variable='inflow',
                      line_id_column=None,
                      rno_column=None,
                      period_column='per',
                      data_column='Q_avg'):
    """Add data to the period data table (sfrdata.period_data)
    for a MODFLOW-6 style sfrpackage.

    Parameters
    ----------
    sfrdata : sfrmaker.SFRData instance
        SFRData instance with reach_data table attribute. To add observations from x, y coordinates,
        the reach_data table must have a geometry column with LineStrings representing each reach, or
        an sfrlines_shapefile is required. Reach numbers are assumed to be in an 'rno' column.
    data : DataFrame, path to csv file, or list of DataFrames or file paths
        Table with information on the observation sites to be located. Must have
        either reach numbers (rno_column), line_ids (line_id_column),
        or x and y locations (x_column_in_data and y_column_in_data).
    flowline_routing : dict
        Optional dictionary of routing for source hydrography. Only needed
        if locating by line_id, and SFR network is a subset of the full source
        hydrography (i.e. some lines were dropped in the creation of the SFR packge,
        or if the sites are inflow points corresponding to lines outside of the model perimeter).
        In this case, observation points referenced to line_ids that are missing from the SFR
        network are placed at the first reach corresponding to the next downstream line_id
        that is represented in the SFR network. By default, None.
    variable : str, optional
        Modflow-6 period variable (see Modflow-6 Description of Input and Outpu), by default 'inflow'
    line_id_column : str
        Column in data matching observation sites to line_ids in the source hydrography data.
        Either line_id_column or rno_column must be specified. By default, None
    rno_column : str
        Column in data matching observation sites to reach numbers in the SFR network. By default, None.
    period_column : str, optional
        Column with modflow stress period for each inflow value, by default 'per', by default, 'per'.
    data_column : str, optional
        Column with flow values, by default 'Q_avg'

    Returns
    -------
    Updates the sfrdata.perioddata DataFrame.
    """
    sfrd = sfrdata

    # allow input via a list of tables or single table
    data = read_tables(data)

    # cull data to valid periods
    data = data.loc[data[period_column] >= 0].copy()

    # map NHDPlus COMIDs to reach numbers
    if flowline_routing is not None:
        assert line_id_column in data.columns, \
            "Data need an id column so {} locations can be mapped to reach numbers".format(variable)
        # replace ids that are not keys (those outside the network) with zeros
        # (0 is the exit condition for finding paths in get_next_id_in_subset)
        flowline_routing = {
            k: v if v in flowline_routing.keys() else 0
            for k, v in flowline_routing.items()
        }
        rno_column = 'rno'
        r1 = sfrd.reach_data.loc[sfrd.reach_data.ireach == 1]
        line_id_rno_mapping = dict(zip(r1['line_id'], r1['rno']))
        line_ids = get_next_id_in_subset(r1.line_id, flowline_routing,
                                         data[line_id_column])
        data['line_id_in_model'] = line_ids
        data[rno_column] = [line_id_rno_mapping[lid] for lid in line_ids]
    else:
        assert rno_column in data.columns, \
            "Data to add need reach number or flowline routing information is needed."

    # check for duplicate inflows in same path
    if variable == 'inflow':
        line_ids = set(data[line_id_column])
        drop = set()
        dropped_line_info_file = 'dropped_inflows_locations.csv'
        for lid in line_ids:
            path = find_path(flowline_routing, start=lid)
            duplicated = set(path[1:]).intersection(line_ids)
            if len(duplicated) > 0:
                drop.add(lid)
                txt = ('warning: {}: {} is upstream '
                       'of the following line_ids:\n{}\n'
                       'see {} for details.').format(line_id_column, lid,
                                                     duplicated,
                                                     dropped_line_info_file)
                print(txt)
        if len(drop) > 0:
            data.loc[data[line_id_column].isin(drop)].to_csv(
                dropped_line_info_file, index=False)
            data = data.loc[~data[line_id_column].isin(drop)]

    # add inflows to period_data
    period_data = sfrd.period_data
    period_data['rno'] = data[rno_column]
    period_data['per'] = data[period_column]
    period_data[variable] = data[data_column]
    period_data['specified_line_id'] = data[line_id_column]
    other_columns = [
        c for c in data.columns
        if c not in {rno_column, period_column, data_column, line_id_column}
    ]
    for c in other_columns:
        period_data[c] = data[c]