def make_lakarr2d(grid, lakesdata, include_ids, id_column='hydroid'): """ Make a nrow x ncol array with lake package extent for each lake, using the numbers in the 'id' column in the lakes shapefile. """ if isinstance(lakesdata, str): # implement automatic reprojection in gis-utils # maintaining backwards compatibility kwargs = {'dest_crs': grid.crs} kwargs = get_input_arguments(kwargs, shp2df) lakes = shp2df(lakesdata, **kwargs) elif isinstance(lakesdata, pd.DataFrame): lakes = lakesdata.copy() else: raise ValueError( 'unrecognized input for "lakesdata": {}'.format(lakesdata)) id_column = id_column.lower() lakes.columns = [c.lower() for c in lakes.columns] lakes.index = lakes[id_column] lakes = lakes.loc[include_ids] lakes['lakid'] = np.arange(1, len(lakes) + 1) lakes['geometry'] = [Polygon(g.exterior) for g in lakes.geometry] arr = rasterize(lakes, grid=grid, id_column='lakid') # ensure that order of hydroids is unchanged # (used to match features to lake IDs in lake package) assert lakes[id_column].tolist() == include_ids return arr
def test_add_observations_from_line_ids(shellmound_sfrdata, flux_observation_data, outdir): obs = shellmound_sfrdata.add_observations(flux_observation_data, obstype='downstream-flow', line_id_column='line_id', obsname_column='site_no') assert np.all(obs == shellmound_sfrdata._observations) assert set(obs.columns) == {'obsname', 'obstype', 'rno', 'iseg', 'ireach'} # get the last reach in each segment rd = shellmound_sfrdata.reach_data.sort_values( by=['iseg', 'ireach'], axis=0).groupby('iseg').last() rno = dict(zip(rd.line_id, rd.rno)) assert set(obs.rno) == set( [rno[lid] for lid in flux_observation_data.line_id]) rd = shellmound_sfrdata.reach_data iseg_ireach = dict(list(zip(rd.rno, zip(rd.iseg, rd.ireach)))) for i, r in obs.iterrows(): assert (r.iseg, r.ireach) == iseg_ireach[r.rno] out_shapefile = os.path.join(outdir, 'obs.shp') # test shapefile export shellmound_sfrdata.export_observations(filename=out_shapefile) df = shp2df(out_shapefile) pd.testing.assert_frame_equal(df.drop('geometry', axis=1), shellmound_sfrdata.observations, check_dtype=False) # test assigning obs from custom reach number column? obs = shellmound_sfrdata.add_observations(flux_observation_data, obstype='downstream-flow', rno_column='junk', obsname_column='site_no') assert set(obs.rno) == set(flux_observation_data.junk)
def cull_data_to_active_area(data, active_area, active_area_id_column=None, active_area_feature_id=None, data_crs=None, metadata=None): df = data.copy() if metadata is not None: md = metadata.copy() if isinstance(active_area, Path) or isinstance(active_area, str): active_area = [active_area] active_area = [str(filepath) for filepath in active_area] active_area_df = shp2df(active_area, dest_crs=data_crs) if active_area_id_column is not None and active_area_feature_id is not None: loc = active_area_df[active_area_id_column] == active_area_feature_id assert any(loc), "feature {} not found!".format(active_area_feature_id) active_area_polygon = active_area_df.loc[loc, 'geometry'] else: active_area_polygon = MultiPolygon(active_area_df.geometry.tolist()) if metadata is not None: within = np.array([g.within(active_area_polygon) for g in md.geometry]) md = md.loc[within] df_within = df.site_no.isin(md['site_no']) else: within = np.array([g.within(active_area_polygon) for g in df.geometry]) df_within = within if not np.all(within): print( 'Culling {} sites outside of the model area defined by {}.'.format( np.sum(~within), active_area)) df = df.loc[df_within] if metadata is not None: return df, md return df
def from_shapefile(cls, shapefile=None, node_col='node', kcol='k', icol='i', jcol='j', isfr_col='isfr', active_area=None, crs=None, epsg=None, proj_str=None, prjfile=None): if crs is None: crs = get_shapefile_crs(shapefile) crs = get_crs(prjfile=prjfile, epsg=epsg, proj_str=proj_str, crs=crs) with fiona.open(shapefile) as src: bounds = src.bounds df = shp2df(shapefile) assert 'geometry' in df.columns, "No feature geometries found in {}.".format( shapefile) return cls.from_dataframe(df, node_col=node_col, kcol=kcol, icol=icol, jcol=jcol, isfr_col=isfr_col, bounds=bounds, active_area=active_area, crs=crs)
def from_shapefile(cls, shapefile=None, node_col='node', kcol='k', icol='i', jcol='j', isfr_col='isfr', active_area=None, epsg=None, proj_str=None, prjfile=None): if prjfile is None: prjfile = shapefile.replace('.shp', '.prj') prjfile = prjfile if os.path.exists(prjfile) else None with fiona.open(shapefile) as src: bounds = src.bounds df = shp2df(shapefile) assert 'geometry' in df.columns, "No feature geometries found in {}.".format( shapefile) return cls.from_dataframe(df, node_col=node_col, kcol=kcol, icol=icol, jcol=jcol, isfr_col=isfr_col, bounds=bounds, active_area=active_area, epsg=epsg, proj_str=proj_str, prjfile=prjfile)
def make_bdlknc_zones(grid, lakesshp, include_ids, feat_id_column='feat_id', lake_package_id_column='lak_id'): """ Make zones for populating with lakebed leakance values. Same as lakarr, but with a buffer around each lake so that horizontal connections have non-zero values of bdlknc, and near-shore areas can be assigend higher leakance values. """ print('setting up lakebed leakance zones...') t0 = time.time() if isinstance(lakesshp, str): # implement automatic reprojection in gis-utils # maintaining backwards compatibility kwargs = {'dest_crs': grid.crs} kwargs = get_input_arguments(kwargs, shp2df) lakes = shp2df(lakesshp, **kwargs) elif isinstance(lakesshp, pd.DataFrame): lakes = lakesshp.copy() else: raise ValueError( 'unrecognized input for "lakesshp": {}'.format(lakesshp)) # Exterior buffer id_column = feat_id_column.lower() lakes.columns = [c.lower() for c in lakes.columns] exterior_buffer = 30 # m lakes.index = lakes[id_column] lakes = lakes.loc[include_ids] if lake_package_id_column not in lakes.columns: lakes[lake_package_id_column] = np.arange(1, len(lakes) + 1) # speed up buffer construction by getting exteriors once # and probably more importantly, # simplifying possibly complex geometries of lakes generated from 2ft lidar unbuffered_exteriors = [ Polygon(g.exterior).simplify(5) for g in lakes.geometry ] lakes['geometry'] = [ g.buffer(exterior_buffer) for g in unbuffered_exteriors ] arr = rasterize(lakes, grid=grid, id_column=lake_package_id_column) # Interior buffer for lower leakance, assumed to be 20 m around the lake interior_buffer = -20 # m lakes['geometry'] = [ g.buffer(interior_buffer) for g in unbuffered_exteriors ] arr2 = rasterize(lakes, grid=grid, id_column=lake_package_id_column) arr2 = arr2 * 100 # Create new ids for the interior, as multiples of 10 arr[arr2 > 0] = arr2[arr2 > 0] # ensure that order of hydroids is unchanged # (used to match features to lake IDs in lake package) assert lakes[id_column].tolist() == list(include_ids) print('finished in {:.2f}s'.format(time.time() - t0)) return arr
def test_export_period_data(shellmound_sfrdata_with_period_data, outdir): sfrd = shellmound_sfrdata_with_period_data outfile = '{}/test_mf6_sfr_period_data_inflow.shp'.format(outdir) sfrd.export_period_data(outfile) df = shp2df(outfile) nodes = dict(zip(sfrd.reach_data.rno, sfrd.reach_data.node)) pers = [int(c.strip('inflow')) for c in df.columns if 'inflow' in c] assert set(pers) == set(sfrd.period_data.per) assert set(df['rno']) == set(sfrd.period_data.rno) assert np.allclose(df['0inflow'].append(df['1inflow']).values, sfrd.period_data['inflow'].values) assert np.array_equal(df.node.values, np.array([nodes[rno] for rno in df.rno], dtype=int)) # check export still works if there are multiple items in a reach sfrd._period_data = sfrd.period_data.append(sfrd.period_data) sfrd.export_period_data(outfile) df = shp2df(outfile) assert np.allclose(sorted(df['0inflow'].append(df['1inflow']).values), sorted(sfrd.period_data.groupby(['rno', 'per']).sum().inflow.values))
def test_grid_write_shapefile(modelgrid, tmpdir): filename = os.path.join(tmpdir, 'grid.shp') modelgrid.write_shapefile(filename) with fiona.open(filename) as src: assert src.crs['init'] == 'epsg:3070' assert np.allclose(src.bounds, modelgrid.bounds) df = shp2df(filename) i, j = np.indices((modelgrid.nrow, modelgrid.ncol)) assert np.array_equal(np.arange(len(df), dtype=int), df.node.values) assert np.array_equal(i.ravel(), df.i.values) assert np.array_equal(j.ravel(), df.j.values)
def test_points_to_raster(point_data, test_output_path): bottom_shapefiles = [test_output_path / 'test_points.shp'] outfile = test_output_path / 'test_points_raster.tif' points_to_raster(bottom_shapefiles, data_col='values', output_resolution=0.1, outfile=outfile) source_data = shp2df(str(bottom_shapefiles[0])) x = [g.x for g in source_data.geometry] y = [g.y for g in source_data.geometry] results = get_values_at_points(outfile, x, y) assert np.allclose(results, source_data['values'].values)
def from_shapefile(cls, shapefile, id_column='id', routing_column='toid', arbolate_sum_column2='asum2', width1_column='width1', width2_column='width2', up_elevation_column='elevup', dn_elevation_column='elevdn', name_column='name', attr_length_units='meters', attr_height_units='meters', filter=None, epsg=None, proj_str=None, prjfile=None): """ Parameters ---------- filter : tuple or str Bounding box (tuple) or shapefile of model stream network area. """ if prjfile is None: prjfile = shapefile.replace('.shp', '.prj') prjfile = prjfile if os.path.exists(prjfile) else None shpfile_crs = crs(epsg=epsg, proj_str=proj_str, prjfile=prjfile) # ensure that filter bbox is in same crs as flowlines if filter is not None and not isinstance(filter, tuple): filter = get_bbox(filter, shpfile_crs) df = shp2df(shapefile, filter=filter) assert 'geometry' in df.columns, "No feature geometries found in {}.".format( shapefile) return cls.from_dataframe(df, id_column=id_column, routing_column=routing_column, arbolate_sum_column2=arbolate_sum_column2, width1_column=width1_column, width2_column=width2_column, up_elevation_column=up_elevation_column, dn_elevation_column=dn_elevation_column, name_column=name_column, attr_length_units=attr_length_units, attr_height_units=attr_height_units, epsg=epsg, proj_str=proj_str, prjfile=prjfile)
def read_nhdplus(shpfiles, bbox_filter=None, index_col='comid'): # read shapefile into dataframe and find the index column df = shp2df(shpfiles, filter=bbox_filter) if len(df) > 0: index_col = [c for c in df.columns if c.lower() == index_col] if len(index_col) == 0: if isinstance(shpfiles, list): shpfiles = '\n'.join(shpfiles) raise IndexError('No {} column found in: \n{}'.format( index_col, shpfiles)) else: df.index = df[index_col[0]] return df
def test_transient_list_export(model): m, grid, output_path = model outfiles = export(m, grid, 'wel', output_path=output_path) variables = ['wel0_stress_period_data'] if m.version != 'mf6': variables = ['wel_stress_period_data'] check_files(outfiles, variables=variables) df = mftransientlist_to_dataframe(m.wel.stress_period_data, squeeze=True) df.index = range(len(df)) if 'cellid' in df.columns: df['cellid'] = df['cellid'].astype(str) df2 = shp2df(outfiles[0]).drop('geometry', axis=1) assert np.allclose(df.drop('cellid', axis=1), df2.drop('cellid', axis=1))
def test_get_upstream_area(): catchments = ['/Users/aleaf/Documents/NHDPlus/NHDPlusGL/NHDPlus04/NHDPlusCatchment/Catchment.shp', '/Users/aleaf/Documents/NHDPlus/NHDPlusMS/NHDPlus07/NHDPlusCatchment/Catchment.shp'] plusflow = ['/Users/aleaf/Documents/NHDPlus/NHDPlusGL/NHDPlus04/NHDPlusAttributes/PlusFlow.dbf', '/Users/aleaf/Documents/NHDPlus/NHDPlusMS/NHDPlus07/NHDPlusAttributes/PlusFlow.dbf'] nodasites = '/Users/aleaf/Documents/USFS/Nicolet/targets/north/flux_field_no_da.shp' flowlines = ['/Users/aleaf/Documents/NHDPlus/NHDPlusGL/NHDPlus04/NHDSnapshot/Hydrography/NHDFlowline.shp', '/Users/aleaf/Documents/NHDPlus/NHDPlusMS/NHDPlus07/NHDSnapshot/Hydrography/NHDFlowline.shp'] nearfield = '/Users/aleaf/Documents/USFS/Nicolet/shps/Nicolet_north_NF.shp' nf = shape(fiona.open(nearfield).next()['geometry']) nf = project(nf, '+init=epsg:26716', '+init=epsg:4269') bbox = nf.bounds noda = shp2df(nodasites) get_upstream_area(noda.geometry.tolist(), plusflow, flowlines, catchments, nf)
def read_polygon_feature(feature, dest_crs, feature_crs=None): """Read a geometric feature from a shapefile, shapely geometry object, or collection of shapely geometry objects. Reproject to dest_crs if the feature is in a different CRS. Parameters ---------- feature : shapely Polygon, list of Polygons, or shapefile path Polygons must be in same CRS as linework; shapefile features will be reprojected if their crs is different. dest_crs : instance of sfrmaker.crs Output CRS for the feature. Returns ------- feature : shapely geometry object """ if isinstance(feature, str): with fiona.open(feature) as src: feature_crs = crs(src.crs) geoms = shp2df(feature)['geometry'].values feature = unary_union(geoms) elif isinstance(feature, collections.Iterable): if isinstance(feature[0], dict): try: feature = [shape(f) for f in feature] except Exception as ex: print(ex) print( "Supplied dictionary doesn't appear to be valid GeoJSON.") feature = unary_union(feature) elif isinstance(feature, dict): try: feature = shape(feature) except Exception as ex: print(ex) print("Supplied dictionary doesn't appear to be valid GeoJSON.") elif isinstance(feature, Polygon): pass else: raise TypeError("Unrecognized feature input.") if feature_crs is not None and feature_crs != dest_crs: feature = project(feature, feature_crs.proj_str, dest_crs.proj_str) return feature.buffer(0)
def test_edit_flowlines(flowlines, preprocessed_flowlines, test_data_path): if flowlines is None: flowlines = preprocessed_flowlines flowline_edits_file = os.path.join(test_data_path, 'flowline_edits.yml') edited_flowlines = edit_flowlines(flowlines, flowline_edits_file, logger=None) with open(flowline_edits_file) as src: cfg = yaml.load(src, Loader=yaml.Loader) # verify that flowlines were dropped assert not any( set(cfg['drop_flowlines']).intersection(edited_flowlines.COMID)) # verify routing changes for comid, tocomid in cfg['reroute_flowlines'].items(): assert edited_flowlines.loc[comid, 'tocomid'] == tocomid add_flowlines = shp2df(os.path.join(test_data_path, 'yazoo.shp')) assert not any(set(add_flowlines.comid).difference(edited_flowlines.index)) if isinstance(flowlines, str): assert os.path.exists(flowlines[:-4] + '.prj')
def parent_model_sfr_flow_results(): parent_model_sfrlines = 'sfrmaker/test/data/shellmound/merasnwt_sfrlines.shp' rd = shp2df(parent_model_sfrlines) rd['Qout'] = 0. rd = rd.rename(columns={'iseg': 'segment', 'ireach': 'reach'}) \ [['rno', 'segment', 'reach', 'Qout']].copy() rd.loc[rd.rno == 13933, 'Qout'] = 353146.667 rd.loc[rd.rno == 11780, 'Qout'] = 3531.46667 rd.loc[rd.rno == 11949, 'Qout'] = 3.53146667 rd.loc[rd.rno == 11483, 'Qout'] = 353.146667 rd.loc[rd.rno == 13070, 'Qout'] = 7062.93334 rd.loc[rd.rno == 15682, 'Qout'] = 7.06293334 rd.loc[rd.rno == 15684, 'Qout'] = 35314.6667 rd['kstpkper'] = [(0, 0)] * len(rd) rd2 = rd.copy() rd2['kstpkper'] = [(1, 1)] * len(rd) rd2['Qout'] *= 2 rd = rd.append(rd2).copy() return rd
def get_nhdplus_v2_routing(PlusFlow_file, from_col='FROMCOMID', to_col='TOCOMID'): """Read PlusFlow file and return the routing information as a dictionary of to:from COMID numbers. """ fname, ext = os.path.splitext(PlusFlow_file) if ext in ['.shp', '.dbf']: df = shp2df(PlusFlow_file) elif ext == '.csv': df = pd.read_csv(PlusFlow_file) else: raise Exception("Unrecognized file-type for PlusFlow table: {}".format( PlusFlow_file)) flowline_routing = dict(zip(df[from_col], df[to_col])) comids = set(df[from_col]) flowline_routing = { k: v if v in comids else 0 for k, v in flowline_routing.items() } return flowline_routing
def assign_geographic_obsgroups(metadata, geographic_groups, geographic_groups_col, metadata_crs): md = metadata.copy() if geographic_groups is not None: if isinstance(geographic_groups, dict): pass else: geo_group_dict = {} if isinstance(geographic_groups, str) or isinstance( geographic_groups, Path): geographic_groups = [geographic_groups] for item in reversed(geographic_groups): group_info = shp2df(str(item), dest_crs=metadata_crs) groups = dict( zip(group_info[geographic_groups_col], group_info['geometry'])) geo_group_dict.update(groups) for group_name, polygon in geo_group_dict.items(): within = [g.within(polygon) for g in md.geometry] md.loc[within, 'geo_group'] = group_name return md
def test_export_sfr(model): m, grid, output_path = model # mf2005 style SFR export not implemented yet # TODO: implement mf2005 sfr package export if m.version != 'mf6': return outfiles = export(m, grid, 'sfr', output_path=output_path) # TODO: finish this test variables = ['shellmound.sfr'] if m.version != 'mf6': variables = ['wel_stress_period_data'] df = pd.DataFrame(m.sfr.reach_data.array) compare_cols = ['strtop'] else: df = pd.DataFrame(m.sfr.packagedata.array) compare_cols = ['rlen', 'rwid', 'rgrd', 'rtp', 'rbth', 'rhk'] check_files(outfiles, variables=variables) df.index = range(len(df)) if 'cellid' in df.columns: df['cellid'] = df['cellid'].astype(str) df2 = shp2df(outfiles[0]).drop('geometry', axis=1) df2['cellid'] = list(zip(df2['k'], df2['i'], df2['j'])) df2['cellid'] = df2['cellid'].astype(str) assert np.allclose(df[compare_cols], df2[compare_cols])
def from_shapefile(cls, shapefile, id_column='id', routing_column='toid', arbolate_sum_column2='asum2', width1_column='width1', width2_column='width2', up_elevation_column='elevup', dn_elevation_column='elevdn', name_column='name', attr_length_units='meters', attr_height_units='meters', filter=None, crs=None, epsg=None, proj_str=None, prjfile=None): """Create a Lines instance from a shapefile. Parameters ---------- shapefile : str Input shapefile id_column : str, optional Attribute field with line identifiers, by default 'id' routing_column : str, optional Attribute field with downstream routing connections, by default 'toid' arbolate_sum_column2 : str, optional Attribute field with arbolate sums at downstream ends of lines, by default 'asum2' width1_column : str, optional Attribute field with channel widths at upstream ends of lines, by default 'width1' width2_column : str, optional Attribute field with channel widths at downstream ends of lines, by default 'width2' up_elevation_column : str, optional Attribute field with elevations at upstream ends of lines, by default 'elevup' dn_elevation_column : str, optional Attribute field with elevations at downstream ends of lines, by default 'elevdn' name_column : str, optional Attribute field with feature names, by default 'name' attr_length_units : str, optional Length units for feature attributes (e.g. width, arbolate sum, etc.) By default, meters. attr_height_units : str, optional Length units for elevation attributes By default, 'meters'. filter : tuple, optional (xmin, ymin, xmax, ymax) bounding box to filter which records are read from the shapefile. By default None. epsg: int, optional EPSG code identifying Coordinate Reference System (CRS) for features in the input shapefile. proj_str: str, optional proj_str string identifying CRS for features in the input shapefile. prjfile: str, optional File path to projection (.prj) file identifying CRS for features in the input shapefile. By default, the projection file included with the input shapefile will be used. Returns ------- lines : :class:`Lines` instance """ if prjfile is None: prjfile = shapefile.replace('.shp', '.prj') prjfile = prjfile if os.path.exists(prjfile) else None shpfile_crs = get_crs(prjfile=prjfile, epsg=epsg, proj_str=proj_str, crs=crs) # ensure that filter bbox is in same crs as flowlines if filter is not None and not isinstance(filter, tuple): filter = get_bbox(filter, shpfile_crs) df = shp2df(shapefile, filter=filter) assert 'geometry' in df.columns, "No feature geometries found in {}.".format( shapefile) return cls.from_dataframe(df, id_column=id_column, routing_column=routing_column, arbolate_sum_column2=arbolate_sum_column2, width1_column=width1_column, width2_column=width2_column, up_elevation_column=up_elevation_column, dn_elevation_column=dn_elevation_column, name_column=name_column, attr_length_units=attr_length_units, attr_height_units=attr_height_units, epsg=epsg, proj_str=proj_str, prjfile=prjfile)
def add_observations(sfrdata, data, flowline_routing=None, obstype=None, sfrlines_shapefile=None, rno_column_in_sfrlines='rno', x_location_column=None, y_location_column=None, line_id_column=None, rno_column=None, obstype_column=None, obsname_column='site_no'): """Add SFR observations to the observations DataFrame attribute of an sfrdata instance. Observations can by located on the SFR network by specifying reach number directly (rno_column_in_data), by x, y location (x_column_in_data and y_column in data), or by specifying the source hydrography lines that they are located on (line_id_column_in_data). Parameters ---------- sfrdata : sfrmaker.SFRData instance SFRData instance with reach_data table attribute. To add observations from x, y coordinates, the reach_data table must have a geometry column with LineStrings representing each reach, or an sfrlines_shapefile is required. Reach numbers are assumed to be in an 'rno' column. data : DataFrame Table with information on the observation sites to be located. Must have either reach numbers (rno_column_in_data), line_ids (line_id_column_in_data), or x and y locations (x_column_in_data and y_column_in_data). obstype : str (optional) Type of observation to record, for MODFLOW-6 (default 'downstream-flow'; see MODFLOW-6 IO documentation for more details). Alternatively, observation types can be specified by row in data, using the obstype_column_in_data argument. x_location_column : str (optional) Column in data with site x-coordinates (in same CRS as SFR network). y_location_column : str (optional) Column in data with site y-coordinates (in same CRS as SFR network). sfrlines_shapefile : str (optional) Shapefile version of SFRdata.reach_data. Only needed if SFRdata.reach_data doesn't have LineString geometries for the reaches. rno_column_in_sfrlines : str (optional) Column in sfrlines with reach numbers for matching lines with reaches in sfrdata, or reach numbers assigned to observation sites. (default 'rno') line_id_column : str Column in data matching observation sites to line_ids in the source hydrography data. rno_column : str Column in data matching observation sites to reach numbers in the SFR network. flowline_routing : dict Optional dictionary of routing for source hydrography. Only needed if locating by line_id, and SFR network is a subset of the full source hydrography (i.e. some lines were dropped in the creation of the SFR packge, or if the sites are inflow points corresponding to lines outside of the model perimeter). In this case, observation points referenced to line_ids that are missing from the SFR network are placed at the first reach corresponding to the next downstream line_id that is represented in the SFR network. obstype_column : str (optional) Column in data with MODFLOW-6 observation types. For adding observations of different types. If obstype and obstype_column_in_data are none, the default of 'downstream-flow' will be used. obsname_column : str Column in data with unique identifier (e.g. site number or name) for observation sites. Notes ----- Sites located by line_id (source hydrography) will be assigned to the last reach in the segment corresponding to the line_id. Locating by x, y or reach number is more accurate. """ sfrd = sfrdata reach_data = sfrdata.reach_data.copy() # allow input via a list of tables or single table input_data = data if not isinstance(input_data, list): input_data = [input_data] dfs = [] for item in input_data: if isinstance(item, str): dfs.append(pd.read_csv(item)) elif isinstance(item, pd.DataFrame): dfs.append(item.copy()) else: raise Exception( 'Unrecognized input type for data:\n{}'.format(item)) data = pd.concat(dfs).reset_index(drop=True) # read reach geometries from a shapefile if sfrlines_shapefile is not None: sfrlines = shp2df(sfrlines_shapefile) geoms = dict( zip(sfrlines[rno_column_in_sfrlines], sfrlines['geometry'])) reach_data['geometry'] = [geoms[rno] for rno in reach_data['rno']] # if no reach number is provided msg = "Observation sites need reach number, (x,y) coordinates, or source hydrography IDs" if rno_column not in data.columns: rno_column = 'rno' # get reach numbers by x, y location of sites if x_location_column in data.columns and y_location_column in data.columns: locs = locate_sites( data, reach_data, x_column_in_data=x_location_column, y_column_in_data=y_location_column, reach_id_col='rno', # reach number column in reach_data site_number_col=obsname_column) data[rno_column] = locs['rno'] # get reach number from site locations in source hydrography (line_ids) elif line_id_column in data.columns: # map NHDPlus COMIDs to reach numbers if flowline_routing is None: line_id = dict(zip(reach_data.iseg, reach_data.line_id)) sfr_routing = sfrdata.segment_routing.copy() # routing for source hydrography flowline_routing = { line_id.get(k, 0): line_id.get(v, 0) for k, v in sfr_routing.items() } # get the last reach in each segment r1 = reach_data.sort_values(by=['iseg', 'ireach'], axis=0).groupby('iseg').last() line_id_rno_mapping = dict(zip(r1['line_id'], r1['rno'])) line_ids = get_next_id_in_subset(r1.line_id, flowline_routing, data[line_id_column]) data[rno_column] = [line_id_rno_mapping[lid] for lid in line_ids] else: raise ValueError(msg) # create observations dataframe obsdata = pd.DataFrame(columns=sfrd.observations.columns) # remove duplicate locations data = data.groupby(rno_column).first().reset_index() obsdata['rno'] = data[rno_column] # segment and reach info iseg_ireach = dict( list(zip(reach_data.rno, zip(reach_data.iseg, reach_data.ireach)))) obsdata['iseg'] = [iseg_ireach[rno][0] for rno in obsdata.rno] obsdata['ireach'] = [iseg_ireach[rno][1] for rno in obsdata.rno] for col in ['rno', 'iseg', 'ireach']: obsdata[col] = obsdata[col].astype(int) if obstype is not None: obsdata['obstype'] = obstype elif obstype_column in data.columns: obsdata['obstype'] = data[obstype_column] else: obsdata['obstype'] = 'downstream-flow' obsdata['obsname'] = data[obsname_column].astype(str) return obsdata
def export_sfr_results(mf2005_sfr_outputfile=None, mf2005_SfrFile_instance=None, mf6_sfr_stage_file=None, mf6_sfr_budget_file=None, model=None, grid=None, kstpkper=(0, 0), sfrlinesfile=None, pointsize=0.5, output_length_units='feet', output_time_units='seconds', gis=True, pdfs=True, output_path='postproc', suffix='', verbose=False): pdfs_dir, rasters_dir, shps_dir = make_output_folders(output_path) m = model if not isinstance(kstpkper, list): kstpkper = [kstpkper] print('Exporting SFR results...') for f in [mf2005_sfr_outputfile, mf6_sfr_stage_file, mf6_sfr_budget_file]: if f is not None: print('file: {}'.format(f)) df = read_sfr_output(mf2005_sfr_outputfile=mf2005_sfr_outputfile, mf2005_SfrFile_instance=mf2005_SfrFile_instance, mf6_sfr_stage_file=mf6_sfr_stage_file, mf6_sfr_budget_file=mf6_sfr_budget_file, model=model) lmult = convert_length_units(get_length_units(m), output_length_units) tmult = convert_time_units(get_time_units(m), output_time_units) unit_text = get_unit_text(output_length_units, output_time_units, 3) if 'GWF' in df.columns: df['Qaquifer'] = -df.GWF # for consistency with MF2005 if 'Qmean' not in df.columns: df['Qmean'] = df[['Qin', 'Qout']].abs().mean(axis=1) # write columns in the output units df['Qmean_{}'.format(unit_text)] = df.Qmean * lmult**3 / tmult df['Qaq_{}'.format(unit_text)] = df.Qaquifer * lmult**3 / tmult # add model top comparison if available if m.dis is not None and 'i' in df.columns and 'j' in df.columns: df['model_top'] = m.dis.top.array[df.i.values, df.j.values] if 'stage' in df.columns: df['above'] = df.stage - df.model_top groups = df.groupby('kstpkper') outfiles = [] if gis: prj_file = None if sfrlinesfile is not None: sfrlines = shp2df(sfrlinesfile) prj_file = sfrlines[:-4] + '.prj' sfrlines.sort_values(by=['iseg', 'ireach'], inplace=True) geoms = sfrlines.geometry else: #assert sr is not None, \ # 'need SpatialReference instance to locate model grid cells' #dfp = groups.get_group((0, 0)).copy() geoms = None #vertices = sr.get_vertices(dfp.i, dfp.j) #geoms = [Polygon(vrt) for vrt in vertices] for kstp, kper in kstpkper: print('stress period {}, timestep {}'.format(kper, kstp)) dfp = groups.get_group((kstp, kper)).copy() if geoms is not None: dfp['geometry'] = geoms #dfp = gp.GeoDataFrame(dfp) #dfp.crs = sr.proj4_str # to use cell polygons instead of lines # verts = m.sr.get_vertices(df.i.values, df.j.values) #df['geometry'] = [Polygon(v) for v in verts] dfp['stp'] = [t[0] for t in dfp['kstpkper']] dfp['per'] = [t[1] for t in dfp['kstpkper']] dfp.drop('kstpkper', axis=1, inplace=True) # geopandas doesn't like tuples outfile = '{}/sfrout_per{}_stp{}{}.shp'.format( shps_dir, kper, kstp, suffix) export_shapefile(outfile, dfp, modelgrid=grid, prj=prj_file) outfiles.append(outfile) #dfp.to_file(outfile) #print('wrote {}'.format(outfile)) if pdfs: # need to add a scale that addresses units for kstp, kper in kstpkper: print('stress period {}, timestep {}'.format(kper, kstp)) df = groups.get_group((kstp, kper)).copy() bf_outfile = '{}/baseflow_per{}_stp{}{}.pdf'.format( pdfs_dir, kper, kstp, suffix) sfr_baseflow_pdf(bf_outfile, df, pointsize=pointsize, verbose=verbose) qaq_outfile = '{}/qaquifer_per{}_stp{}.pdf'.format( pdfs_dir, kper, kstp, suffix) sfr_qaquifer_pdf(qaq_outfile, df, pointsize=pointsize, verbose=verbose) outfiles += [bf_outfile, qaq_outfile] return outfiles
def locate_sites(site_data, reach_data, active_area_shapefile=None, x_column_in_data=None, y_column_in_data=None, reach_id_col='rno', site_number_col='site_no', keep_columns=None, perimeter_buffer=1000, distance_threshold=1000): """Get SFR reach locations corresponding to x, y points (e.g. measurement site locations). Parameters ---------- site_data: ESRI shapefile DataFrame or shapefile with point locations and attribute data for stream flow observation sites. Point locations can be specified in a DataFrame by either x_column_in_data and y_column_in_data, or a 'geometry' column of shapely points. If shapefiles are provided for both site_data and reach_data, they can be in any CRS, but both must have .prj files. reach_data: ESRI shapefile SFRData.reach_data DataFrame, or shapefile equivalent with line-arcs representing all segments and/or reaches. If shapefiles are provided for both site_data and reach_data, they can be in any CRS, but both must have .prj files. active_area_shapefile: ESRI shapefile or shapely polygon (optional) Shapefile or polygon, in same CRS as sfr_lines_shapefile, defining areal extent (perimeter) of SFR network. x_column_in_data : str (optional) Column in data with site x-coordinates (in same CRS as SFR network). y_column_in_data : str (optional) Column in data with site y-coordinates (in same CRS as SFR network). reach_id_col: str Column with unique number for each stream line-arc. default "rno" site_number_col : str Name of column in sites_shapefile with number identifying each site to be located. default "site_no" keep_columns: list of strings List of columns in sites_shapefile to retain when writing output_csv_file and output_shape_file. perimeter_buffer : scalar Exclude flows within this distance of perimeter defined by active_area_shapefile. For example, a value of 1000 would mean that sites must be at least 1 km inside of the active area perimeter to be included. distance_threshold : scalar Only consider sites within this distance of a stream line-arc. Returns ------- locs : DataFrame """ sfrproj4 = None locsproj4 = None # read in sfr lines if not isinstance(reach_data, pd.DataFrame): sfrlines = shp2df(reach_data) sfrproj4 = get_proj_str(reach_data) elif isinstance(reach_data, pd.DataFrame): sfrlines = reach_data.copy() else: raise TypeError( 'Datatype for reach_data not understood: {}'.format(reach_data)) sfrlines.index = sfrlines[reach_id_col] # sites to locate if not isinstance(site_data, pd.DataFrame): locs = shp2df(site_data) if isinstance(site_data, list): locsproj4 = get_proj_str(site_data[0]) else: locsproj4 = get_proj_str(site_data) locs['site_no'] = locs[site_number_col] # str_ids(locs.site_no) elif isinstance(site_data, pd.DataFrame): locs = site_data.copy() else: raise TypeError( 'Datatype for site_data not understood: {}'.format(site_data)) # reproject if crs are available if locsproj4 is not None and sfrproj4 is not None: locs['geometry'] = project(locs.geometry.values, locsproj4, sfrproj4) # get the x and y coordinates if x_column_in_data is not None and y_column_in_data is not None: x = locs[x_column_in_data] y = locs[y_column_in_data] else: x = [p.x for p in locs.geometry] y = [p.y for p in locs.geometry] ids, distances = get_closest_reach(x, y, sfrlines, rno_column=reach_id_col) reach_id_col = reach_id_col.lower() locs[reach_id_col] = ids locs['distance'] = distances if 'iseg' in sfrlines.columns: locs['segment'] = sfrlines.loc[ids, 'iseg'].values locs['reach'] = sfrlines.loc[ids, 'ireach'].values locs = locs.loc[locs.distance <= distance_threshold] # cull observations at or outside of model perimeter # to only those along model perimeter if active_area_shapefile is not None: active_area = active_area_shapefile if not isinstance(active_area_shapefile, Polygon): active_area = shp2df(active_area_shapefile).geometry[0] perimeter = active_area.exterior.buffer(perimeter_buffer) perimeter_inside_buffer = Polygon(perimeter.interiors[0]) keep = [] for rn in locs[reach_id_col]: geom = sfrlines.loc[rn, 'geometry'] keep.append(geom.within(perimeter_inside_buffer)) else: keep = slice(None) if keep_columns is None: keep_columns = locs.columns.tolist() for c in [reach_id_col, 'segment', 'reach', 'geometry']: if c not in keep_columns and c in locs.columns: keep_columns.append(c) locs = locs.loc[keep, keep_columns] return locs
def read_wdnr_monthly_water_use(wu_file, wu_points, model, active_area=None, drop_ids=None, minimum_layer_thickness=2 ): """Read water use data from a master file generated from WDNR_wu_data.ipynb. Cull data to area of model. Reshape to one month-year-site value per row. Parameters ---------- wu_file : csv file Water use data ouput from the WDNR_wu_data.ipynb. wu_points : point shapefile Water use locations, generated in the WDNR_wu_data.ipynb Must be in same CRS as sr. model : flopy.modflow.Modflow instance Must have a valid attached .sr attribute defining the model grid. Only wells within the bounds of the sr will be retained. Sr is also used for row/column lookup. Must be in same CRS as wu_points. active_area : str (shapefile path) or shapely.geometry.Polygon Polygon denoting active area of the model. If specified, wells are culled to this area instead of the model bounding box. (default None) minimum_layer_thickness : scalar Minimum layer thickness to have pumping. Returns ------- monthly_data : DataFrame """ col_fmt = '{}_wdrl_gpm_amt' data_renames = {'site_seq_no': 'site_no', 'wdrl_year': 'year'} df = pd.read_csv(wu_file) drop_cols = [c for c in df.columns if 'unnamed' in c.lower()] drop_cols += ['objectid'] df.drop(drop_cols, axis=1, inplace=True, errors='ignore') df.rename(columns=data_renames, inplace=True) if drop_ids is not None: df = df.loc[~df.site_no.isin(drop_ids)].copy() # implement automatic reprojection in gis-utils # maintaining backwards compatibility kwargs = {'dest_crs': model.modelgrid.crs} kwargs = get_input_arguments(kwargs, shp2df) locs = shp2df(wu_points, **kwargs) site_seq_col = [c for c in locs if 'site_se' in c.lower()] locs_renames = {c: 'site_no' for c in site_seq_col} locs.rename(columns=locs_renames, inplace=True) if drop_ids is not None: locs = locs.loc[~locs.site_no.isin(drop_ids)].copy() if active_area is None: # cull the data to the model bounds features = model.modelgrid.bbox txt = "No wells are inside the model bounds of {}"\ .format(model.modelgrid.extent) elif isinstance(active_area, str): # implement automatic reprojection in gis-utils # maintaining backwards compatibility kwargs = {'dest_crs': model.modelgrid.crs} kwargs = get_input_arguments(kwargs, shp2df) features = shp2df(active_area, **kwargs).geometry.tolist() if len(features) > 1: features = MultiPolygon(features) else: features = Polygon(features[0]) txt = "No wells are inside the area of {}"\ .format(active_area) elif isinstance(active_area, Polygon): features = active_area within = [g.within(features) for g in locs.geometry] assert len(within) > 0, txt locs = locs.loc[within].copy() if len(locs) == 0: print('No wells within model area:\n{}\n{}'.format(wu_file, wu_points)) return None, None df = df.loc[df.site_no.isin(locs.site_no)] df.sort_values(by=['site_no', 'year'], inplace=True) # create seperate dataframe with well info well_info = df[['site_no', 'well_radius_mm', 'borehole_radius_mm', 'well_depth_m', 'elev_open_int_top_m', 'elev_open_int_bot_m', 'screen_length_m', 'screen_midpoint_elev_m']].copy() # groupby site number to cull duplicate information well_info = well_info.groupby('site_no').first() well_info['site_no'] = well_info.index # add top elevation, screen midpoint elev, row, column and layer points = dict(zip(locs['site_no'], locs.geometry)) well_info['x'] = [points[sn].x for sn in well_info.site_no] well_info['y'] = [points[sn].y for sn in well_info.site_no] # have to do a loop because modelgrid.rasterize currently only works with scalars print('intersecting wells with model grid...') t0 = time.time() #i, j = [], [] #for x, y in zip(well_info.x.values, well_info.y.values): # iy, jx = model.modelgrid.rasterize(x, y) # i.append(iy) # j.append(jx) i, j = get_ij(model.modelgrid, well_info.x.values, well_info.y.values) print("took {:.2f}s\n".format(time.time() - t0)) top = model.dis.top.array botm = model.dis.botm.array thickness = get_layer_thicknesses(top, botm) well_info['i'] = i well_info['j'] = j well_info['elv_m'] = top[i, j] well_info['elv_top_m'] = well_info.elev_open_int_top_m well_info['elv_botm_m'] = well_info.elev_open_int_bot_m well_info['elv_mdpt_m'] = well_info.screen_midpoint_elev_m well_info['k'] = get_layer(botm, i, j, elev=well_info['elv_mdpt_m'].values) well_info['laythick'] = thickness[well_info.k.values, i, j] well_info['ktop'] = get_layer(botm, i, j, elev=well_info['elv_top_m'].values) well_info['kbotm'] = get_layer(botm, i, j, elev=well_info['elv_botm_m'].values) # for wells in a layer below minimum thickness # move to layer with screen top, then screen botm, # put remainder in layer 1 and hope for the best well_info = wells.assign_layers_from_screen_top_botm(well_info, model, flux_col='q', screen_top_col='elv_top_m', screen_botm_col='elv_botm_m', across_layers=False, distribute_by='transmissivity', minimum_layer_thickness=2.) #isthin = well_info.laythick < minimum_layer_thickness #well_info.loc[isthin, 'k'] = well_info.loc[isthin, 'ktop'].values #well_info.loc[isthin, 'laythick'] = model.dis.thickness.array[well_info.k[isthin].values, # well_info.i[isthin].values, # well_info.j[isthin].values] #isthin = well_info.laythick < minimum_layer_thickness #well_info.loc[isthin, 'k'] = well_info.loc[isthin, 'kbotm'].values #well_info.loc[isthin, 'laythick'] = model.dis.thickness.array[well_info.k[isthin].values, # well_info.i[isthin].values, # well_info.j[isthin].values] #isthin = well_info.laythick < minimum_layer_thickness #well_info.loc[isthin, 'k'] = 1 #well_info.loc[isthin, 'laythick'] = model.dis.thickness.array[well_info.k[isthin].values, # well_info.i[isthin].values, # well_info.j[isthin].values] isthin = well_info.laythick < minimum_layer_thickness assert not np.any(isthin) # make a datetime column monthlyQ_cols = [col_fmt.format(calendar.month_abbr[i]).lower() for i in range(1, 13)] monthly_data = df[['site_no', 'year'] + monthlyQ_cols] monthly_data.columns = ['site_no', 'year'] + np.arange(1, 13).tolist() # stack the data # so that each row is a site number, year, month # reset the index to move multi-index levels back out to columns stacked = monthly_data.set_index(['site_no', 'year']).stack().reset_index() stacked.columns = ['site_no', 'year', 'month', 'gallons'] stacked['datetime'] = pd.to_datetime(['{}-{:02d}'.format(y, m) for y, m in zip(stacked.year, stacked.month)]) monthly_data = stacked return well_info, monthly_data
def get_flowline_routing(plusflow_file, dest_routing_file): if not os.path.exists(dest_routing_file): df = gisutils.shp2df(plusflow_file) routing = df[['FROMCOMID', 'TOCOMID']] routing.to_csv(dest_routing_file, index=False)
def get_inflow_locations_from_parent_model(parent_reach_data, inset_reach_data, inset_grid, active_area=None): """Get places in an inset model SFR network where the parent SFR network crosses the inset model boundary, using common line ID numbers from parent and inset reach datasets. MF2005 or MF6 supported; if either dataset contains only reach numbers (is MODFLOW-6), the reach numbers are used as segment numbers, with each segment only having one reach. Parameters ---------- parent_reach_data : str (filepath) or DataFrame SFR reach data for parent model. Must include columns: line_id : int; unique identifier for hydrography line that each reach is based on rno : int; unique identifier for each reach. Optional if iseg and ireach columns are included. iseg : int; unique identifier for each segment. Optional if rno is included. ireach : int; unique identifier for each reach. Optional if rno is included. geometry : shapely.geometry object representing location of each reach inset_reach_data : str (filepath) or DataFrame SFR reach data for inset model. Same columns as parent_reach_data, except a geometry column isn't needed. line_id values must correspond to same source hydrography as those in parent_reach_data. inset_grid : flopy.discretization.StructuredGrid instance describing model grid Must be in same coordinate system as geometries in parent_reach_data. Required only if active_area is None. active_area : shapely.geometry.Polygon object Describes the area of the inset model where SFR is applied. Used to find inset reaches from parent model. Must be in same coordinate system as geometries in parent_reach_data. Required only if inset_grid is None. Returns ------- locations : DataFrame Columns: parent_segment : parent model segment parent_reach : parent model reach parent_rno : parent model reach number line_id : unique identifier for hydrography line that each reach is based on """ # spatial reference instances defining parent and inset grids if isinstance(inset_grid, str): grid = load_modelgrid(inset_grid) elif isinstance(inset_grid, flopy.discretization.grid.Grid): grid = inset_grid else: raise ValueError('Unrecognized input for inset_grid') if active_area is None: l, r, b, t = grid.extent active_area = box(l, b, r, t) # parent and inset reach data if isinstance(parent_reach_data, str): prd = shp2df(parent_reach_data) elif isinstance(parent_reach_data, pd.DataFrame): prd = parent_reach_data.copy() else: raise ValueError('Unrecognized input for parent_reach_data') if 'rno' in prd.columns and 'iseg' not in prd.columns: prd['iseg'] = prd['rno'] prd['ireach'] = 1 mustinclude_cols = {'line_id', 'rno', 'iseg', 'ireach', 'geometry'} assert len(mustinclude_cols.intersection( prd.columns)) == len(mustinclude_cols) if isinstance(inset_reach_data, str): if inset_reach_data.endswith('.shp'): ird = shp2df(inset_reach_data) else: ird = pd.read_csv(inset_reach_data) elif isinstance(inset_reach_data, pd.DataFrame): ird = inset_reach_data.copy() else: raise ValueError('Unrecognized input for inset_reach_data') if 'rno' in ird.columns and 'iseg' not in ird.columns: ird['iseg'] = ird['rno'] ird['ireach'] = 1 mustinclude_cols = {'line_id', 'rno', 'iseg', 'ireach'} assert len(mustinclude_cols.intersection( ird.columns)) == len(mustinclude_cols) graph = make_graph(ird.rno.values, ird.outreach.values, one_to_many=False) # cull parent reach data to only lines that cross or are just upstream of inset boundary buffered = active_area.buffer(5000, cap_style=2) close = [g.intersects(buffered) for g in prd.geometry] prd = prd.loc[close] prd.index = prd.rno boundary = active_area.exterior inset_line_id_connections = {} # parent rno: inset line_id for i, r in prd.iterrows(): if r.outreach not in prd.index: continue downstream_line = prd.loc[r.outreach, 'geometry'] upstream_line = prd.loc[prd.rno == r.outreach, 'geometry'].values[0] intersects = r.geometry.intersects(boundary) intersects_downstream = downstream_line.within(active_area) # intersects_upstream = upstream_line.within(active_area) in_inset_model = r.geometry.within(active_area) if intersects_downstream: if intersects: # if not intersects_upstream: # exclude lines that originated within the model # # lines that cross route to their counterpart in inset model inset_line_id_connections[r.rno] = r.line_id pass elif not in_inset_model: # lines that route to a line within the inset model # route to that line's inset counterpart inset_line_id_connections[r.rno] = prd.loc[r.outreach, 'line_id'] pass prd = prd.loc[prd.rno.isin(inset_line_id_connections.keys())] # parent rno lookup parent_rno_lookup = {v: k for k, v in inset_line_id_connections.items()} # inlet reaches in inset model ird = ird.loc[ird.ireach == 1] ird = ird.loc[ird.line_id.isin(inset_line_id_connections.values())] # for each reach in ird (potential inset inlets) # check that there isn't another inlet downstream drop_reaches = [] for i, r in ird.iterrows(): path = find_path(graph, r.rno) another_inlet_downstream = len( set(path[1:]).intersection(set(ird.rno))) > 0 if another_inlet_downstream: drop_reaches.append(r.rno) ird = ird.loc[~ird.rno.isin(drop_reaches)] # cull parent flows to outlet reaches iseg_ireach = zip(prd.iseg, prd.ireach) parent_outlet_iseg_ireach = dict(zip(prd.rno, iseg_ireach)) df = ird[['line_id', 'name', 'rno', 'iseg', 'ireach']].copy() df['parent_rno'] = [parent_rno_lookup[lid] for lid in df['line_id']] df['parent_iseg'] = [ parent_outlet_iseg_ireach[rno][0] for rno in df['parent_rno'] ] df['parent_ireach'] = [ parent_outlet_iseg_ireach[rno][1] for rno in df['parent_rno'] ] return df.reset_index(drop=True)
def rasterize(feature, grid, id_column=None, include_ids=None, epsg=None, proj4=None, dtype=np.float32): """Rasterize a feature onto the model grid, using the rasterio.features.rasterize method. Features are intersected if they contain the cell center. Parameters ---------- feature : str (shapefile path), list of shapely objects, or dataframe with geometry column id_column : str Column with unique integer identifying each feature; values from this column will be assigned to the output raster. grid : grid.StructuredGrid instance epsg : int EPSG code for feature coordinate reference system. Optional, but an epgs code or proj4 string must be supplied if feature isn't a shapefile, and isn't in the same CRS as the model. proj4 : str Proj4 string for feature CRS (optional) dtype : dtype Datatype for the output array Returns ------- 2D numpy array with intersected values """ try: from rasterio import features from rasterio import Affine except: print('This method requires rasterio.') return #trans = Affine(sr.delr[0], 0., sr.xul, # 0., -sr.delc[0], sr.yul) * Affine.rotation(sr.rotation) trans = grid.transform if isinstance(feature, str): proj4 = get_proj_str(feature) df = shp2df(feature) elif isinstance(feature, pd.DataFrame): df = feature.copy() elif isinstance(feature, collections.Iterable): # list of shapefiles if isinstance(feature[0], str): proj4 = get_proj_str(feature[0]) df = shp2df(feature) else: df = pd.DataFrame({'geometry': feature}) elif not isinstance(feature, collections.Iterable): df = pd.DataFrame({'geometry': [feature]}) else: print('unrecognized feature input') return # handle shapefiles in different CRS than model grid reproject = False if proj4 is not None: if proj4 != grid.proj_str: reproject = True elif epsg is not None and grid.epsg is not None: if epsg != grid.epsg: reproject = True from fiona.crs import to_string, from_epsg proj4 = to_string(from_epsg(epsg)) if reproject: df['geometry'] = project(df.geometry.values, proj4, grid.proj_str) # subset to include_ids if id_column is not None and include_ids is not None: df = df.loc[df[id_column].isin(include_ids)].copy() # create list of GeoJSON features, with unique value for each feature if id_column is None: numbers = range(1, len(df)+1) # if IDs are strings, get a number for each one # pd.DataFrame.unique() generally preserves order elif isinstance(df[id_column].dtype, np.object): unique_values = df[id_column].unique() values = dict(zip(unique_values, range(1, len(unique_values) + 1))) numbers = [values[n] for n in df[id_column]] else: numbers = df[id_column].tolist() geoms = list(zip(df.geometry, numbers)) result = features.rasterize(geoms, out_shape=(grid.nrow, grid.ncol), transform=trans) assert result.sum(axis=(0, 1)) != 0, "Nothing was intersected!" return result.astype(dtype)
def load_nhdplus_v2(NHDPlus_paths=None, NHDFlowlines=None, PlusFlowlineVAA=None, PlusFlow=None, elevslope=None, filter=None, epsg=None, proj_str=None, prjfile=None): """ Parameters ========== NHDFlowlines : str or list of strings. Shapefile or list of NHDFlowline shapefiles containing feature geometries (line arcs) for stream network. Must contain the following attribute fields: COMID : common identifier number PlusFlowlineVAA : str or list of strings. DBF file or list of DBF files with NHDPlus attribute information. Must contain the following attribute fields: COMID : common identifier number PlusFlow : str or list of strings. DBF file or list of DBF files with NHDPlus routing information. Must contain the following attribute fields: COMID : common identifier number elevslope : str or list of strings. DBF file or list of DBF files with end elevations for each line arc in NHDFlowlines. Must contain the following attribute fields: COMID : common identifier number filter : tuple, str (filepath), shapely Polygon or GeoJSON polygon Bounding box (tuple) or polygon feature of model stream network area. Shapefiles will be reprojected to the CRS of the flowlines; all other feature types must be supplied in same CRS as flowlines. """ print("\nloading NHDPlus v2 hydrography data...") ta = time.time() if NHDPlus_paths is not None: NHDFlowlines, PlusFlowlineVAA, PlusFlow, elevslope = \ get_nhdplus_v2_filepaths(NHDPlus_paths) # get crs information from flowline projection file if prjfile is None: prjfile = get_prj_file(NHDPlus_paths, NHDFlowlines) nhdcrs = crs(epsg=epsg, proj_str=proj_str, prjfile=prjfile) # ensure that filter bbox is in same crs as flowlines # get filters from shapefiles, shapley Polygons or GeoJSON polygons if filter is not None and not isinstance(filter, tuple): filter = get_bbox(filter, dest_crs=nhdcrs) fl_cols = [ 'COMID', # 'FCODE', 'FDATE', 'FLOWDIR', # 'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM', # 'REACHCODE', 'RESOLUTION', 'WBAREACOMI', 'geometry' ] pfvaa_cols = [ 'ArbolateSu', # 'Hydroseq', 'DnHydroseq', 'StreamOrde', # 'LevelPathI', ] elevs_cols = ['MAXELEVSMO', 'MINELEVSMO'] # read flowlines and attribute tables into dataframes fl = read_nhdplus(NHDFlowlines, bbox_filter=filter) pfvaa = read_nhdplus(PlusFlowlineVAA) pf = shp2df(PlusFlow) elevs = read_nhdplus(elevslope) # join flowline and attribute dataframes df = fl[fl_cols].copy() df = df.join(pfvaa[pfvaa_cols], how='inner') df = df.join(elevs[elevs_cols], how='inner') print("\nload finished in {:.2f}s".format(time.time() - ta)) # add routing information from PlusFlow table; df['tocomid'] = get_tocomids(pf, df.index.tolist()) return df
def rasterize(feature, grid, id_column=None, include_ids=None, crs=None, epsg=None, proj4=None, dtype=np.float32, **kwargs): """Rasterize a feature onto the model grid, using the rasterio.features.rasterize method. Features are intersected if they contain the cell center. Parameters ---------- feature : str (shapefile path), list of shapely objects, or dataframe with geometry column id_column : str Column with unique integer identifying each feature; values from this column will be assigned to the output raster. grid : grid.StructuredGrid instance crs : obj A Python int, dict, str, or pyproj.crs.CRS instance passed to :meth:`pyproj.crs.CRS.from_user_input` Can be any of: - PROJ string - Dictionary of PROJ parameters - PROJ keyword arguments for parameters - JSON string with PROJ parameters - CRS WKT string - An authority string [i.e. 'epsg:4326'] - An EPSG integer code [i.e. 4326] - A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')] - An object with a `to_wkt` method. - A :class:`pyproj.crs.CRS` class dtype : dtype Datatype for the output array **kwargs : keyword arguments to rasterio.features.rasterize() https://rasterio.readthedocs.io/en/stable/api/rasterio.features.html Returns ------- 2D numpy array with intersected values """ try: from rasterio import Affine, features except: print('This method requires rasterio.') return if epsg is not None: warnings.warn( "The epsg argument is deprecated. Use crs instead, " "which requires gisutils >= 0.2", DeprecationWarning) if proj4 is not None: warnings.warn( "The epsg argument is deprecated. Use crs instead, " "which requires gisutils >= 0.2", DeprecationWarning) if crs is not None: if version.parse(gisutils.__version__) < version.parse('0.2.0'): raise ValueError("The crs argument requires gisutils >= 0.2") from gisutils import get_authority_crs crs = get_authority_crs(crs) trans = grid.transform kwargs = {} if isinstance(feature, str): proj4 = get_proj_str(feature) kwargs = {'dest_crs': grid.crs} kwargs = get_input_arguments(kwargs, shp2df) df = shp2df(feature, **kwargs) elif isinstance(feature, pd.DataFrame): df = feature.copy() elif isinstance(feature, collections.Iterable): # list of shapefiles if isinstance(feature[0], str): proj4 = get_proj_str(feature[0]) kwargs = {'dest_crs': grid.crs} kwargs = get_input_arguments(kwargs, shp2df) df = shp2df(feature, **kwargs) else: df = pd.DataFrame({'geometry': feature}) elif not isinstance(feature, collections.Iterable): df = pd.DataFrame({'geometry': [feature]}) else: print('unrecognized feature input') return # handle shapefiles in different CRS than model grid if 'dest_crs' not in kwargs: reproject = False # todo: consolidate rasterize reprojection to just use crs if crs is not None: if crs != grid.crs: df['geometry'] = project(df.geometry.values, crs, grid.crs) if proj4 is not None: if proj4 != grid.proj_str: reproject = True elif epsg is not None and grid.epsg is not None: if epsg != grid.epsg: reproject = True from fiona.crs import from_epsg, to_string proj4 = to_string(from_epsg(epsg)) if reproject: df['geometry'] = project(df.geometry.values, proj4, grid.proj_str) # subset to include_ids if id_column is not None and include_ids is not None: df = df.loc[df[id_column].isin(include_ids)].copy() # create list of GeoJSON features, with unique value for each feature if id_column is None: numbers = range(1, len(df) + 1) # if IDs are strings, get a number for each one # pd.DataFrame.unique() generally preserves order elif isinstance(df[id_column].dtype, np.object): unique_values = df[id_column].unique() values = dict(zip(unique_values, range(1, len(unique_values) + 1))) numbers = [values[n] for n in df[id_column]] else: numbers = df[id_column].tolist() geoms = list(zip(df.geometry, numbers)) result = features.rasterize(geoms, out_shape=(grid.nrow, grid.ncol), transform=trans) assert result.sum(axis=(0, 1)) != 0, "Nothing was intersected!" return result.astype(dtype)
def setup_structured_grid(xoff=None, yoff=None, xul=None, yul=None, nrow=None, ncol=None, nlay=None, dxy=None, delr=None, delc=None, top=None, botm=None, rotation=0., parent_model=None, snap_to_NHG=False, features=None, features_shapefile=None, id_column=None, include_ids=None, buffer=1000, crs=None, epsg=None, model_length_units=None, grid_file='grid.json', bbox_shapefile=None, **kwargs): """""" print('setting up model grid...') t0 = time.time() # conversions for model/parent model units to meters # set regular flag for handling delc/delr to_meters_inset = convert_length_units(model_length_units, 'meters') regular = True if dxy is not None: delr_m = np.round(dxy * to_meters_inset, 4) # dxy is specified in model units delc_m = delr_m if delr is not None: delr_m = np.round(delr * to_meters_inset, 4) # delr is specified in model units if not np.isscalar(delr_m): if (set(delr_m)) == 1: delr_m = delr_m[0] else: regular = False if delc is not None: delc_m = np.round(delc * to_meters_inset, 4) # delc is specified in model units if not np.isscalar(delc_m): if (set(delc_m)) == 1: delc_m = delc_m[0] else: regular = False if parent_model is not None: to_meters_parent = convert_length_units( get_model_length_units(parent_model), 'meters') # parent model grid spacing in meters parent_delr_m = np.round( parent_model.dis.delr.array[0] * to_meters_parent, 4) if not parent_delr_m % delr_m == 0: raise ValueError( 'inset delr spacing of {} must be factor of parent spacing of {}' .format(delr_m, parent_delr_m)) parent_delc_m = np.round( parent_model.dis.delc.array[0] * to_meters_parent, 4) if not parent_delc_m % delc_m == 0: raise ValueError( 'inset delc spacing of {} must be factor of parent spacing of {}' .format(delc_m, parent_delc_m)) if epsg is not None: crs = pyproj.crs.CRS.from_epsg(epsg) elif crs is not None: from gisutils import get_authority_crs crs = get_authority_crs(crs) elif parent_model is not None: crs = parent_model.modelgrid.crs # option 1: make grid from xoff, yoff and specified dimensions if xoff is not None and yoff is not None: assert nrow is not None and ncol is not None, \ "Need to specify nrow and ncol if specifying xoffset and yoffset." if regular: height_m = np.round(delc_m * nrow, 4) width_m = np.round(delr_m * ncol, 4) else: height_m = np.sum(delc_m) width_m = np.sum(delr_m) # optionally align grid with national hydrologic grid # grids snapping to NHD must have spacings that are a factor of 1 km if snap_to_NHG: assert regular and np.allclose(1000 % delc_m, 0, atol=1e-4) x, y = get_point_on_national_hydrogeologic_grid(xoff, yoff, offset='edge', op=np.floor) xoff = x yoff = y rotation = 0. # need to specify xul, yul in case snapping to parent # todo: allow snapping to parent grid on xoff, yoff if rotation != 0: raise NotImplementedError('Rotated grids not supported.') xul = xoff yul = yoff + height_m # option 2: make grid using buffered feature bounding box else: if features is None and features_shapefile is not None: # Make sure shapefile and bbox filter are in dest (model) CRS # TODO: CRS wrangling could be added to shp2df as a feature reproject_filter = False try: from gisutils import get_shapefile_crs features_crs = get_shapefile_crs(features_shapefile) if features_crs != crs: reproject_filter = True except: features_crs = get_proj_str(features_shapefile) reproject_filter = True filter = None if parent_model is not None: if reproject_filter: filter = project(parent_model.modelgrid.bbox, parent_model.modelgrid.crs, features_crs).bounds else: filter = parent_model.modelgrid.bbox.bounds shp2df_kwargs = {'dest_crs': crs} shp2df_kwargs = get_input_arguments(shp2df_kwargs, shp2df) df = shp2df(features_shapefile, filter=filter, **shp2df_kwargs) # optionally subset shapefile data to specified features if id_column is not None and include_ids is not None: df = df.loc[df[id_column].isin(include_ids)] # use all features by default features = df.geometry.tolist() # convert multiple features to a MultiPolygon if isinstance(features, list): if len(features) > 1: features = MultiPolygon(features) else: features = features[0] # size the grid based on the bbox for features x1, y1, x2, y2 = features.bounds L = buffer # distance from area of interest to boundary xul = x1 - L yul = y2 + L height_m = np.round(yul - (y1 - L), 4) # initial model height from buffer distance width_m = np.round((x2 + L) - xul, 4) rotation = 0. # rotation not supported with this option # align model with parent grid if there is a parent model # (and not snapping to national hydrologic grid) if parent_model is not None and not snap_to_NHG: # get location of coinciding cell in parent model for upper left pi, pj = parent_model.modelgrid.intersect(xul, yul) verts = np.array(parent_model.modelgrid.get_cell_vertices(pi, pj)) xul, yul = verts[:, 0].min(), verts[:, 1].max() # adjust the dimensions to align remaining corners def roundup(number, increment): return int(np.ceil(number / increment) * increment) height = roundup(height_m, parent_delr_m) width = roundup(width_m, parent_delc_m) # update nrow, ncol after snapping to parent grid if regular: nrow = int(height / delc_m) # h is in meters ncol = int(width / delr_m) # set the grid configuration dictionary # spacing is in meters (consistent with projected CRS) # (modelgrid object will be updated automatically from this dictionary) #if rotation == 0.: # xll = xul # yll = yul - model.height grid_cfg = { 'nrow': int(nrow), 'ncol': int(ncol), 'nlay': nlay, 'delr': delr_m, 'delc': delc_m, 'xoff': xoff, 'yoff': yoff, 'xul': xul, 'yul': yul, 'rotation': rotation, 'lenuni': 2 } if regular: grid_cfg['delr'] = np.ones(grid_cfg['ncol'], dtype=float) * grid_cfg['delr'] grid_cfg['delc'] = np.ones(grid_cfg['nrow'], dtype=float) * grid_cfg['delc'] grid_cfg['delr'] = grid_cfg['delr'].tolist() # for serializing to json grid_cfg['delc'] = grid_cfg['delc'].tolist() # renames for flopy modelgrid renames = {'rotation': 'angrot'} for k, v in renames.items(): if k in grid_cfg: grid_cfg[v] = grid_cfg.pop(k) # add epsg or wkt if there isn't an epsg if epsg is not None: grid_cfg['epsg'] = epsg elif crs is not None: if 'epsg' in crs.srs.lower(): grid_cfg['epsg'] = int(crs.srs.split(':')[1]) else: grid_cfg['wkt'] = crs.srs else: warnings.warn('No coordinate system reference provided for model grid!' 'Model input data may not be mapped correctly.') # set up the model grid instance grid_cfg['top'] = top grid_cfg['botm'] = botm grid_cfg.update(kwargs) # update with any kwargs from function call kwargs = get_input_arguments(grid_cfg, MFsetupGrid) modelgrid = MFsetupGrid(**kwargs) modelgrid.cfg = grid_cfg # write grid info to json, and shapefile of bbox # omit top and botm arrays from json represenation of grid # (just for horizontal disc.) del grid_cfg['top'] del grid_cfg['botm'] fileio.dump(grid_file, grid_cfg) if bbox_shapefile is not None: write_bbox_shapefile(modelgrid, bbox_shapefile) print("finished in {:.2f}s\n".format(time.time() - t0)) return modelgrid