def test_original_routing_attribute(nhdplus_dataframe, one_to_many, datapath): df = nhdplus_dataframe if one_to_many: # add some fake distributaries to a few flowlines id = df.COMID.values[0] inds = df.index.values[:3] modified_toids = [[toids[0], id] for toids in df.loc[inds, 'tocomid'] ] + df.loc[df.index.values[3]:, 'tocomid'].tolist() df['tocomid'] = modified_toids assert not is_to_one(df.tocomid) flowlines = [ '{}/tylerforks/NHDPlus/NHDSnapshot/Hydrography/NHDFlowline.shp'.format( datapath) ] prjfile = get_prj_file(NHDFlowlines=flowlines) # convert arbolate sums from km to m df['asum2'] = df.ArbolateSu * 1000 # convert comid end elevations from cm to m if 'MAXELEVSMO' in df.columns: df['elevup'] = df.MAXELEVSMO / 100. if 'MINELEVSMO' in df.columns: df['elevdn'] = df.MINELEVSMO / 100. lines = sfrmaker.Lines.from_dataframe(df, id_column='COMID', routing_column='tocomid', name_column='GNIS_NAME', attr_length_units='meters', attr_height_units='meters', prjfile=prjfile) # verify that values (to ids) in original routing dictionary are scalars assert is_to_one(lines._original_routing)
def routing(self): """Dictionary of routing connections from ids (keys) to to_ids (values). """ if self._routing is None or self._routing_changed(): toid = self.df.toid.values # check whether or not routing is # many-to-one or one-to-one (no diversions) # squeeze it down to_one = False # if below == True, all toids are scalar or length 1 lists if len(toid) > 1: to_one = is_to_one(toid) # if not, try converting any scalars to lists if not to_one: toid = [[l] if np.isscalar(l) else l for l in toid] to_one = is_to_one(toid) toid = np.squeeze(list(toid)) routing = make_graph(self.df.id.values, toid, one_to_many=not to_one) if not to_one: routing = pick_toids(routing, self.elevup) else: routing = {self.df.id.values[0]: 0} self._routing = routing return self._routing
def test_load_nhdplus_hr(neversink_lines_from_nhdplus_hr): lines = neversink_lines_from_nhdplus_hr assert isinstance(lines, sfrmaker.lines.Lines) assert is_to_one(lines._original_routing)
def to_sfr(self, grid=None, active_area=None, isfr=None, model=None, model_length_units='undefined', model_time_units='days', minimum_reach_length=None, width_from_asum_a_param=0.1193, width_from_asum_b_param=0.5032, minimum_reach_width=1., consolidate_conductance=False, one_reach_per_cell=False, add_outlets=None, package_name=None, **kwargs): """Create a streamflow routing dataset from the information in sfrmaker.lines class instance and a supplied sfrmaker.grid class instance. Parameters ---------- grid : sfrmaker.grid or flopy.discretization.StructuredGrid Numerica model grid instance. Required unless an attached model has a valid modelgrid attribute. active_area : shapely Polygon, list of shapely Polygons, or shapefile path; optional Shapely Polygons must be in same CRS as input flowlines; shapefile features will be reprojected if their crs is different. isfr : ndarray, optional Numpy integer array of the same size as the model grid, designating area that will be populated with SFR reaches (0=no SFR; 1=SFR). An isfr array of shape nrow x ncol will be broadcast to all layers. Only required if a model is not supplied, or if SFR is only desired in a subset of active model cells. By default, None, in which case the model ibound or idomain array will be used. model : flopy.modflow.Modflow or flopy.mf6.ModflowGwf, optional Flopy model instance model_length_units : str; e.g. {'ft', 'feet', 'meters', etc.}, optional Length units of the model. While SFRmaker will try to read these from a supplied grid (first) and then a supplied model (second), it is good practice to specify them explicitly here. model_time_units : str; e.g. {'d', 'days'}, optional Time units for model. By default, days. minimum_reach_length : float, optional Minimum reach length to retain. Default is to compute an effective mean model cell length by taking the square root of the average cell area, and then set minimum_reach_length to 5% of effective mean cell length. width_from_asum_a_param : float, optional :math:`a` parameter used for estimating channel width from arbolate sum. Only needed if input flowlines are lacking width information. See :func:`~sfrmaker.utils.width_from_arbolate`. By default, 0.1193. width_from_asum_b_param : float, optional :math:`b` parameter used for estimating channel width from arbolate sum. Only needed if input flowlines are lacking width information. See :func:`~sfrmaker.utils.width_from_arbolate`. By default, 0.5032. minimum_reach_width : float, optional Minimum reach width to specify (in model units), if computing widths from arbolate sum values. (default = 1) consolidate_conductance : bool If True, total reach conductance each cell is computed, and assigned to the most downstream reach via the hydraulic conductivity parameter. one_reach_per_cell : bool If True, streambed conductance in each reach is consolidated (consolidate_conductance = True), and additional reaches besides the most downstream reach are dropped. add_outlets : sequence of ints Option to add breaks in routing at specified line ids. For example if controlled flows out of a reservoir are specified as inflows to the SFR network, an outlet can be added above to the dam to prevent double-counting of flow. By default, None package_name : str Base name for writing sfr output. kwargs : keyword arguments to :class:`SFRData` Returns ------- sfrdata : sfrmaker.SFRData instance """ print("\nSFRmaker version {}".format(sfrmaker.__version__)) print("\nCreating sfr dataset...") totim = time.time() if flopy and active_area is None and isfr is None and model is not None: if model.version == 'mf6': isfr = np.sum(model.dis.idomain.array == 1, axis=0) > 0 else: isfr = np.sum(model.bas6.ibound.array == 1, axis=0) > 0 if flopy and isinstance(grid, flopy.discretization.StructuredGrid): print('\nCreating grid class instance from flopy Grid instance...') ta = time.time() grid = StructuredGrid.from_modelgrid(grid, active_area=active_area, isfr=isfr) print("grid class created in {:.2f}s\n".format(time.time() - ta)) elif flopy and model is not None: grid = StructuredGrid.from_modelgrid(model.modelgrid, active_area=active_area, isfr=isfr) elif not isinstance(grid, sfrmaker.grid.Grid): raise TypeError('Unrecognized input for grid: {}'.format(grid)) # print grid information to screen print(grid) # print model information to screen print(model) model_length_units = get_length_units(model_length_units, grid, model) mult = convert_length_units(self.attr_length_units, model_length_units) mult_h = convert_length_units(self.attr_height_units, model_length_units) gis_mult = convert_length_units(self.geometry_length_units, model_length_units) # to_crs the flowlines if they aren't in same CRS as grid if self.crs != grid.crs: self.to_crs(grid.crs) # cull the flowlines to the active part of the model grid if grid.active_area is not None: self.cull(grid.active_area, inplace=True, simplify=True, tol=2000) elif grid._bounds is not None: # cull to grid bounding box if already computed self.cull(box(*grid._bounds), inplace=True) if package_name is None: if model is not None: package_name = model.name else: package_name = 'model' # convert routing connections (toid column) from lists (one-to-many) # to ints (one-to-one or many-to-one) routing = self.routing.copy() # one to many routing is not supported to_one = is_to_one(routing.values()) assert to_one, "routing is still one-to-many" # if not to_one: # routing = pick_toids(routing, elevup) valid_ids = routing.keys() # df.toid column is basis for routing attributes # all paths terminating in invalid toids (outside of the model) # will be none; set invalid toids = 0 # TODO: write a test for pick_toids if some IDs route to more than one connection assert not np.any([isinstance(r, list) for r in routing.items() ]), "one to many routing not supported" self.df.toid = [ routing[i] if routing[i] in valid_ids else 0 for i in self.df.id.tolist() ] # intersect lines with model grid to get preliminary reaches rd = self.intersect(grid) # length of intersected line fragments (in model units) rd['rchlen'] = np.array([g.length for g in rd.geometry]) * gis_mult # estimate widths if they aren't supplied if self.df.width1.sum() == 0: print("Computing widths...") # compute arbolate sums for original LineStrings if they weren't provided if 'asum2' not in self.df.columns: raise NotImplementedError( 'Check length unit conversions before using this option.') asums = arbolate_sum( self.df.id, dict( zip( self.df.id, np.array([g.length for g in self.df.geometry]) * convert_length_units(self.geometry_length_units, 'meters'))), self.routing) else: asums = dict( zip( self.df.id, self.df.asum2 * convert_length_units( self.attr_length_units, 'meters'))) # populate starting asums (asum1) routing_r = {v: k for k, v in self.routing.items() if v != 0} self.df['asum1'] = [ asums.get(routing_r.get(id, 0), 0) for id in self.df.id.values ] asum1s = dict(zip(self.df.id, self.df.asum1)) # compute arbolate sum at reach midpoints (in meters) lengths = rd[['line_id', 'ireach', 'geometry']].copy() lengths['rchlen'] = np.array( [g.length for g in lengths.geometry]) * convert_length_units( self.geometry_length_units, 'meters') groups = lengths.groupby( 'line_id') # fragments grouped by parent line reach_cumsums = [] ordered_ids = rd.line_id.loc[rd.line_id.diff() != 0].values for id in ordered_ids: grp = groups.get_group(id).sort_values(by='ireach') dist = np.cumsum(grp.rchlen.values) - 0.5 * grp.rchlen.values reach_cumsums.append(dist) reach_cumsums = np.concatenate(reach_cumsums) segment_asums = [asum1s[id] for id in lengths.line_id] reach_asums = segment_asums + reach_cumsums # maintain positive asums; lengths in NHD often aren't exactly equal to feature lengths # reach_asums[reach_asums < 0.] = 0 rd['asum'] = reach_asums width = width_from_arbolate_sum(reach_asums, a=width_from_asum_a_param, b=width_from_asum_b_param, minimum_width=minimum_reach_width, input_units='meters', output_units=model_length_units) rd['width'] = width rd.loc[rd.width < minimum_reach_width, 'width'] = minimum_reach_width # assign width1 and width2 back to segment data self.df['width1'] = width_from_arbolate_sum( self.df.asum1.values, a=width_from_asum_a_param, b=width_from_asum_b_param, minimum_width=minimum_reach_width, input_units=self.attr_length_units, output_units=model_length_units) self.df['width2'] = width_from_arbolate_sum( self.df.asum2.values, a=width_from_asum_a_param, b=width_from_asum_b_param, minimum_width=minimum_reach_width, input_units=self.attr_length_units, output_units=model_length_units) # interpolate linestring end widths to intersected reaches else: # verify that each linestring has only 1 segment associated with it # (interpolation might be wrong for multiple segments otherwise) assert rd.groupby('line_id').iseg.nunique().max() == 1 # sort the linestring and reach data so that they are aligned self.df.sort_values(by='id', inplace=True) rd.sort_values(by=['line_id', 'ireach'], inplace=True) rd['width'] = interpolate_to_reaches( reach_data=rd, segment_data=self.df, segvar1='width1', segvar2='width2', reach_data_group_col='line_id', segment_data_group_col='id') * mult # discard very small reaches; redo numbering # set minimum reach length based on cell size thresh = 0.05 # fraction of cell length (based on square root of area) if minimum_reach_length is None: cellgeoms = grid.df.loc[rd.node.values, 'geometry'] mean_area = np.mean([g.area for g in cellgeoms]) minimum_reach_length = np.sqrt(mean_area) * thresh * gis_mult inds = rd.rchlen > minimum_reach_length print('\nDropping {} reaches with length < {:.2f} {}...'.format( np.sum(~inds), minimum_reach_length, model_length_units)) rd = rd.loc[inds].copy() rd['strhc1'] = 1. # default value of streambed Kv for now # handle co-located reaches if consolidate_conductance or one_reach_per_cell: rd = consolidate_reach_conductances( rd, keep_only_dominant=one_reach_per_cell) # patch the routing # 1) reduce one to many routing to one-to-one routing (pick_toids() above) # 2) create new graph with just one-to-one segments # 3) list new paths; 2) and 3) should be automatic following 1) # 4) code below will update new graph to only include remaining segments # 5) create sfrdata instance; numbering will be messed up # 6) run methods on sfrdata instance to fix numbering and route reaches with unique numbers print('\nRepairing routing connections...') remaining_ids = rd.line_id.unique() # routing and paths properties should update automatically # when id and toid columns are changed in self.df # but only rd (reach_data) has been changed new_routing = {} paths = self.paths.copy() # for each segment for k in remaining_ids: # interate through successive downstream segments for s in paths[k][1:]: # assign the first segment that still exists as the outseg if s in remaining_ids: new_routing[k] = s break # if no segments are left downstream, assign outlet if k not in new_routing.keys(): new_routing[k] = 0 # add any outlets to the stream network # for now handle int or str ids if add_outlets is not None: # get the if isinstance(add_outlets, str) or isinstance(add_outlets, int): add_outlets = [add_outlets] for outlet_id in add_outlets: if rd.line_id.dtype == np.object: outlet_id = str(outlet_id) outlet_toid = '0' else: outlet_id = int(outlet_id) outlet_toid = 0 valid_outlet_ids = get_previous_ids_in_subset( rd.line_id, self.routing, outlet_id) loc = rd.line_id.isin(valid_outlet_ids) rd.loc[loc, 'toid'] = outlet_toid for valid_outlet_id in valid_outlet_ids: new_routing[valid_outlet_id] = outlet_toid # map remaining_ids to segment numbers segment = dict(zip(rd.line_id, rd.iseg)) line_id = {s: lid for lid, s in segment.items()} # get the segment associated with each line id nseg = [segment[rid] for rid in remaining_ids] # get the segment associated with new connection for each line id outseg = [segment.get(new_routing[line_id[s]], 0) for s in nseg] # renumber the segments to be consecutive, # starting at 1 and only increasing downstream r = renumber_segments(nseg, outseg) # map new segment numbers to line_ids line_id = {r[s]: lid for s, lid in line_id.items()} # segment2 = {lid: r[s] for lid, s in segment.items()} # line_id2 = {s: lid for lid, s in segment2.items()} # update reach_data rd['iseg'] = [r[s] for s in rd.iseg] print('\nSetting up segment data...') sd = pd.DataFrame() sd['nseg'] = [r[s] for s in nseg] sd['outseg'] = [r[s] for s in outseg] sd.sort_values(by='nseg', inplace=True) # verify that no segments route to themselves assert not routing_is_circular(sd.nseg, sd.outseg) # (elevup dict was created above) elevup = self.elevup elevdn = dict(zip(self.df.id, self.df.elevdn)) sd['elevup'] = [elevup[line_id[s]] for s in sd.nseg] sd['elevdn'] = [elevdn[line_id[s]] for s in sd.nseg] # convert elevation units sd['elevup'] *= mult_h sd['elevdn'] *= mult_h # apply widths if they were included if self.df[['width1', 'width2']].sum().sum() > 0: width1 = dict(zip(self.df.id, self.df.width1)) width2 = dict(zip(self.df.id, self.df.width2)) sd['width1'] = [width1[line_id[s]] for s in sd.nseg] sd['width2'] = [width2[line_id[s]] for s in sd.nseg] sd['width1'] *= mult sd['width2'] *= mult # convert length units from source data to model elif self.df.width2.sum() == 0: raise NotImplementedError( 'Need to supply width1 and width2 or use arbolate sum.') # create sfrdata instance # this class has methods for fix segment and reach numbering, # assigning elevations and other properties by reach, # smoothing elevations, writing sfr package files # and other output rd = rd[[c for c in SFRData.rdcols if c in rd.columns]].copy() sfrd = SFRData(reach_data=rd, segment_data=sd, grid=grid, model=model, model_length_units=model_length_units, model_time_units=model_time_units, package_name=package_name, **kwargs) print("\nTime to create sfr dataset: {:.2f}s\n".format(time.time() - totim)) return sfrd