def _load_shapefile(self, shp, index_field, convert_coordinates, remove_offset, simplify): df = shp2df(shp) if index_field is not None: df.index = df[index_field] proj4 = get_proj4(shp) if proj4 != self.proj4: df['geometry'] = projectdf(df, proj4, self.proj4) # convert projected coordinate units and/or get rid z values if the shapefile has them if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z: df['geometry'] = [transform(lambda x, y, z=None: (x * convert_coordinates, y * convert_coordinates), g) for g in df.geometry] # remove model offset from projected coordinates (llcorner = 0,0) if remove_offset: df['geometry'] = [translate(g, -1 * self.extent_proj[0], -1 * self.extent_proj[1]) for g in df.geometry] if simplify > 0: df['geometry'] = [g.simplify(simplify) for g in df.geometry] return df
def _load_shapefile(self, shp, index_field, convert_coordinates, remove_offset, simplify): df = shp2df(shp) if index_field is not None: df.index = df[index_field] proj4 = get_proj4(shp) if proj4 != self.proj4: df['geometry'] = projectdf(df, proj4, self.proj4) # convert projected coordinate units and/or get rid z values if the shapefile has them if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z: df['geometry'] = [ transform(lambda x, y, z=None: (x * convert_coordinates, y * convert_coordinates), g) for g in df.geometry ] # remove model offset from projected coordinates (llcorner = 0,0) if remove_offset: df['geometry'] = [ translate(g, -1 * self.extent_proj[0], -1 * self.extent_proj[1]) for g in df.geometry ] if simplify > 0: df['geometry'] = [g.simplify(simplify) for g in df.geometry] return df
def __init__( self, NHDFlowline, PlusFlowlineVAA, PlusFlow, mf_grid=None, mf_grid_node_col=None, nrows=None, ncols=None, mfdis=None, xul=None, yul=None, rot=0, model_domain=None, flowlines_proj4=None, mfgrid_proj4=None, domain_proj4=None, mf_units_mult=1, ): """Class for working with information from NHDPlus v2. See the user's guide for more information: <http://www.horizon-systems.com/NHDPlus/NHDPlusV2_documentation.php#NHDPlusV2 User Guide> Parameters ========== NHDFlowline : str, list of strings or dataframe Shapefile, list of shapefiles, or dataframe defining SFR network; assigned to the Flowline attribute. PlusFlowlineVAA : str, list of strings or dataframe DBF file, list of DBF files with NHDPlus attribute information; assigned to PlusFlowlineVAA attribute. PlusFlow : str, list of strings or dataframe DBF file, list of DBF files with routing information; assigned to PlusFlow attribute. mf_grid : str or dataframe Shapefile or dataframe containing MODFLOW grid mf_grid_node_col : str Column in grid shapefile or dataframe with unique node numbers. In case the grid isn't sorted! (which will result in mixup if rows and columns are assigned later using the node numbers) nrows : int (structured grids) Number of model rows ncols : int (structured grids) Number of model columns mfdis : str MODFLOW discretization file (not yet supported for this class) xul : float, optional x offset of upper left corner of grid. Only needed if using mfdis instead of shapefile yul : float, optional y offset of upper left corner of grid. Only needed if using mfdis instead of shapefile rot : float, optional (default 0) Grid rotation; only needed if using mfdis instead of shapefile. model_domain : str (shapefile) or shapely polygon, optional Polygon defining area in which to create SFR cells. Default is to create SFR at all intersections between the model grid and NHD flowlines. flowlines_proj4 : str, optional Proj4 string for coordinate system of NHDFlowlines. Only needed if flowlines are supplied in a dataframe. domain_proj4 : str, optional Proj4 string for coordinate system of model_domain. Only needed if model_domain is supplied as a polygon. mf_units_mult : float multiplier to convert GIS units to MODFLOW units """ self.Flowline = NHDFlowline self.PlusFlowlineVAA = PlusFlowlineVAA self.PlusFlow = PlusFlow self.fl_cols = [ "COMID", "FCODE", "FDATE", "FLOWDIR", "FTYPE", "GNIS_ID", "GNIS_NAME", "LENGTHKM", "REACHCODE", "RESOLUTION", "WBAREACOMI", "geometry", ] self.pfvaa_cols = ["ArbolateSu", "Hydroseq", "DnHydroseq", "LevelPathI", "StreamOrde"] self.mf_grid = mf_grid self.model_domain = model_domain self.nrows = nrows self.ncols = ncols self.mfdis = mfdis self.xul = xul self.yul = yul self.rot = rot self.mf_units_mult = mf_units_mult self.GISunits = None self.to_km = None # converts GIS units to km for arbolate sum self.fl_proj4 = flowlines_proj4 self.mf_grid_proj4 = mfgrid_proj4 self.domain_proj4 = domain_proj4 print "Reading input..." # handle dataframes or shapefiles as arguments # get proj4 for any shapefiles that are submitted for attr, input in {"fl": NHDFlowline, "pf": PlusFlow, "pfvaa": PlusFlowlineVAA, "grid": mf_grid}.iteritems(): if isinstance(input, pd.DataFrame): self.__dict__[attr] = input else: self.__dict__[attr] = shp2df(input) if isinstance(model_domain, Polygon): self.domain = model_domain else: self.domain = shape(fiona.open(model_domain).next()["geometry"]) self.domain_proj4 = get_proj4(model_domain) # sort and pair down the grid if mf_grid_node_col is not None: self.grid.sort(mf_grid_node_col, inplace=True) self.grid.index = self.grid[mf_grid_node_col].values self.grid = self.grid[["geometry"]] # get projections if self.mf_grid_proj4 is None and not isinstance(mf_grid, pd.DataFrame): self.mf_grid_proj4 = get_proj4(mf_grid) if self.fl_proj4 is None: if isinstance(NHDFlowline, list): self.fl_proj4 = get_proj4(NHDFlowline[0]) elif not isinstance(NHDFlowline, pd.DataFrame): self.fl_proj4 = get_proj4(NHDFlowline) # set the indices for attr, index in {"fl": "COMID", "pfvaa": "ComID"}.iteritems(): if not self.__dict__[attr].index.name == index: self.__dict__[attr].index = self.__dict__[attr][index] # first check that grid is in projected units if self.mf_grid_proj4.split("proj=")[1].split()[0].strip() == "longlat": raise ProjectionError(self.mf_grid) # reproject the NHD Flowlines and model domain to model grid if they aren't # (prob a better way to check for same projection) # set GIS units from modflow grid projection (used for arbolate sum computation) # assumes either m or ft! self.GISunits = parse_proj4_units(self.mf_grid_proj4) self.to_km = [0.001 if self.GISunits == "m" else 0.001 / 0.3048][0] if different_projections(self.fl_proj4, self.mf_grid_proj4): print "reprojecting NHDFlowlines from\n{}\nto\n{}...".format(self.fl_proj4, self.mf_grid_proj4) self.fl["geometry"] = projectdf(self.fl, self.fl_proj4, self.mf_grid_proj4) if model_domain is not None and different_projections(self.domain_proj4, self.mf_grid_proj4): print "reprojecting model domain from\n{}\nto\n{}...".format(self.domain_proj4, self.mf_grid_proj4) self.domain = project(self.domain, self.domain_proj4, self.mf_grid_proj4)
def make_collection(self, shp, index_field=None, s=20, fc='0.8', ec='k', lw=0.5, alpha=0.5, color_field=None, cbar=False, clim=(), cmap='jet', cbar_label=None, simplify_patches=100, zorder=5, convert_coordinates=1, remove_offset=True, collection_name=None, **kwargs): if collection_name is None: collection_name = os.path.split(shp)[-1].split('.')[0] df = shp2df(shp) if index_field is not None: df.index = df[index_field] proj4 = get_proj4(shp) if proj4 != self.proj4: df['geometry'] = projectdf(df, proj4, self.proj4) # convert projected coordinate units and/or get rid z values if the shapefile has them if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z: df['geometry'] = [transform(lambda x, y, z=None: (x * convert_coordinates, y * convert_coordinates), g) for g in df.geometry] # remove model offset from projected coordinates (llcorner = 0,0) if remove_offset: df['geometry'] = [translate(g, -1 * self.extent_proj[0], -1 * self.extent_proj[1]) for g in df.geometry] if simplify_patches > 0: df['geometry'] = [g.simplify(simplify_patches) for g in df.geometry] if 'Polygon' in df.iloc[0].geometry.type: print("building PatchCollection...") inds = [] patches = [] for i, g in df.geometry.iteritems(): if g.type != 'MultiPolygon': inds.append(i) patches.append(PolygonPatch(g)) else: for part in g.geoms: inds.append(i) patches.append(PolygonPatch(part)) collection = PatchCollection(patches, cmap=cmap, facecolor=fc, linewidth=lw, edgecolor=ec, alpha=alpha, ) elif 'LineString' in df.geometry[0].type: print("building LineCollection...") inds = [] lines = [] for i, g in df.geometry.iteritems(): if 'Multi' not in g.type: x, y = g.xy inds.append(i) lines.append(list(zip(x, y))) # plot each line in a multilinestring else: for l in g: x, y = l.xy inds.append(i) lines.append(list(zip(x, y))) collection = LineCollection(lines, colors=ec, linewidths=lw, alpha=alpha, zorder=zorder, **kwargs) #lc.set_edgecolor(ec) #lc.set_alpha(alpha) #lc.set_lw(lw) # set the color scheme (could set line thickness by same proceedure) if fc in df.columns: colors = np.array([df[fc][ind] for ind in inds]) collection.set_array(colors) else: print("plotting points...") x = np.array([g.x for g in df.geometry]) y = np.array([g.y for g in df.geometry]) collection = self.ax.scatter(x, y, s=s, c=fc, ec=ec, lw=lw, alpha=alpha, zorder=zorder, **kwargs) inds = list(range(len(x))) self.layers[collection_name] = df self.collections[collection_name] = collection self.collection_inds[collection_name] = inds return collection
def make_collection(self, shp, index_field=None, s=20, fc='0.8', ec='k', lw=0.5, alpha=0.5, color_field=None, cbar=False, clim=(), cmap='jet', cbar_label=None, simplify_patches=100, zorder=5, convert_coordinates=1, remove_offset=True, collection_name=None, **kwargs): if collection_name is None: collection_name = os.path.split(shp)[-1].split('.')[0] df = shp2df(shp) if index_field is not None: df.index = df[index_field] proj4 = get_proj4(shp) if proj4 != self.proj4: df['geometry'] = projectdf(df, proj4, self.proj4) # convert projected coordinate units and/or get rid z values if the shapefile has them if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z: df['geometry'] = [ transform(lambda x, y, z=None: (x * convert_coordinates, y * convert_coordinates), g) for g in df.geometry ] # remove model offset from projected coordinates (llcorner = 0,0) if remove_offset: df['geometry'] = [ translate(g, -1 * self.extent_proj[0], -1 * self.extent_proj[1]) for g in df.geometry ] if simplify_patches > 0: df['geometry'] = [ g.simplify(simplify_patches) for g in df.geometry ] if 'Polygon' in df.iloc[0].geometry.type: print("building PatchCollection...") inds = [] patches = [] for i, g in df.geometry.iteritems(): if g.type != 'MultiPolygon': inds.append(i) patches.append(PolygonPatch(g)) else: for part in g.geoms: inds.append(i) patches.append(PolygonPatch(part)) collection = PatchCollection( patches, cmap=cmap, facecolor=fc, linewidth=lw, edgecolor=ec, alpha=alpha, ) elif 'LineString' in df.geometry[0].type: print("building LineCollection...") inds = [] lines = [] for i, g in df.geometry.iteritems(): if 'Multi' not in g.type: x, y = g.xy inds.append(i) lines.append(list(zip(x, y))) # plot each line in a multilinestring else: for l in g: x, y = l.xy inds.append(i) lines.append(list(zip(x, y))) collection = LineCollection(lines, colors=ec, linewidths=lw, alpha=alpha, zorder=zorder, **kwargs) #lc.set_edgecolor(ec) #lc.set_alpha(alpha) #lc.set_lw(lw) # set the color scheme (could set line thickness by same proceedure) if fc in df.columns: colors = np.array([df[fc][ind] for ind in inds]) collection.set_array(colors) else: print("plotting points...") x = np.array([g.x for g in df.geometry]) y = np.array([g.y for g in df.geometry]) collection = self.ax.scatter(x, y, s=s, c=fc, ec=ec, lw=lw, alpha=alpha, zorder=zorder, **kwargs) inds = list(range(len(x))) self.layers[collection_name] = df self.collections[collection_name] = collection self.collection_inds[collection_name] = inds return collection
def __init__(self, NHDFlowline, PlusFlowlineVAA, PlusFlow, mf_grid=None, mf_grid_node_col=None, nrows=None, ncols=None, mfdis=None, xul=None, yul=None, rot=0, model_domain=None, flowlines_proj4=None, mfgrid_proj4=None, domain_proj4=None, mf_units_mult=1): """Class for working with information from NHDPlus v2. See the user's guide for more information: <http://www.horizon-systems.com/NHDPlus/NHDPlusV2_documentation.php#NHDPlusV2 User Guide> Parameters ========== NHDFlowline : str, list of strings or dataframe Shapefile, list of shapefiles, or dataframe defining SFR network; assigned to the Flowline attribute. PlusFlowlineVAA : str, list of strings or dataframe DBF file, list of DBF files with NHDPlus attribute information; assigned to PlusFlowlineVAA attribute. PlusFlow : str, list of strings or dataframe DBF file, list of DBF files with routing information; assigned to PlusFlow attribute. mf_grid : str or dataframe Shapefile or dataframe containing MODFLOW grid mf_grid_node_col : str Column in grid shapefile or dataframe with unique node numbers. In case the grid isn't sorted! (which will result in mixup if rows and columns are assigned later using the node numbers) nrows : int (structured grids) Number of model rows ncols : int (structured grids) Number of model columns mfdis : str MODFLOW discretization file (not yet supported for this class) xul : float, optional x offset of upper left corner of grid. Only needed if using mfdis instead of shapefile yul : float, optional y offset of upper left corner of grid. Only needed if using mfdis instead of shapefile rot : float, optional (default 0) Grid rotation; only needed if using mfdis instead of shapefile. model_domain : str (shapefile) or shapely polygon, optional Polygon defining area in which to create SFR cells. Default is to create SFR at all intersections between the model grid and NHD flowlines. flowlines_proj4 : str, optional Proj4 string for coordinate system of NHDFlowlines. Only needed if flowlines are supplied in a dataframe. domain_proj4 : str, optional Proj4 string for coordinate system of model_domain. Only needed if model_domain is supplied as a polygon. mf_units_mult : float multiplier to convert GIS units to MODFLOW units """ self.Flowline = NHDFlowline self.PlusFlowlineVAA = PlusFlowlineVAA self.PlusFlow = PlusFlow self.fl_cols = ['COMID', 'FCODE', 'FDATE', 'FLOWDIR', 'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM', 'REACHCODE', 'RESOLUTION', 'WBAREACOMI', 'geometry'] self.pfvaa_cols = ['ArbolateSu', 'Hydroseq', 'DnHydroseq', 'LevelPathI', 'StreamOrde'] self.mf_grid = mf_grid self.model_domain = model_domain self.nrows = nrows self.ncols = ncols self.mfdis = mfdis self.xul = xul self.yul = yul self.rot = rot self.mf_units_mult = mf_units_mult self.GISunits = None self.to_km = None # converts GIS units to km for arbolate sum self.fl_proj4 = flowlines_proj4 self.mf_grid_proj4 = mfgrid_proj4 self.domain_proj4 = domain_proj4 print "Reading input..." # handle dataframes or shapefiles as arguments # get proj4 for any shapefiles that are submitted for attr, input in {'fl': NHDFlowline, 'pf': PlusFlow, 'pfvaa': PlusFlowlineVAA, 'grid': mf_grid}.iteritems(): if isinstance(input, pd.DataFrame): self.__dict__[attr] = input else: self.__dict__[attr] = shp2df(input) if isinstance(model_domain, Polygon): self.domain = model_domain elif isinstance(model_domain, str): self.domain = shape(fiona.open(model_domain).next()['geometry']) self.domain_proj4 = get_proj4(model_domain) else: #print 'setting model domain to extent of grid...' #self.domain = unary_union(self.grid.geometry.tolist()) # sort and pair down the grid if mf_grid_node_col is not None: self.grid.sort(mf_grid_node_col, inplace=True) self.grid.index = self.grid[mf_grid_node_col].values self.grid = self.grid[['geometry']] # get projections if self.mf_grid_proj4 is None and not isinstance(mf_grid, pd.DataFrame): self.mf_grid_proj4 = get_proj4(mf_grid) if self.fl_proj4 is None: if isinstance(NHDFlowline, list): self.fl_proj4 = get_proj4(NHDFlowline[0]) elif not isinstance(NHDFlowline, pd.DataFrame): self.fl_proj4 = get_proj4(NHDFlowline) # set the indices for attr, index in {'fl': 'COMID', 'pfvaa': 'ComID'}.iteritems(): if not self.__dict__[attr].index.name == index: self.__dict__[attr].index = self.__dict__[attr][index] # first check that grid is in projected units if self.mf_grid_proj4.split('proj=')[1].split()[0].strip() == 'longlat': raise ProjectionError(self.mf_grid) # reproject the NHD Flowlines and model domain to model grid if they aren't # (prob a better way to check for same projection) # set GIS units from modflow grid projection (used for arbolate sum computation) # assumes either m or ft! self.GISunits = parse_proj4_units(self.mf_grid_proj4) self.to_km = [0.001 if self.GISunits == 'm' else 0.001/0.3048][0] if different_projections(self.fl_proj4, self.mf_grid_proj4): print "reprojecting NHDFlowlines from\n{}\nto\n{}...".format(self.fl_proj4, self.mf_grid_proj4) self.fl['geometry'] = projectdf(self.fl, self.fl_proj4, self.mf_grid_proj4) if model_domain is not None \ and different_projections(self.domain_proj4, self.mf_grid_proj4): print "reprojecting model domain from\n{}\nto\n{}...".format(self.domain_proj4, self.mf_grid_proj4) self.domain = project(self.domain, self.domain_proj4, self.mf_grid_proj4) def list_updown_comids(self): # setup local variables and cull plusflow table to comids in model comids = self.df.index.tolist() pf = self.pf.ix[(self.pf.FROMCOMID.isin(comids)) | (self.pf.TOCOMID.isin(comids))].copy() # subset PlusFlow entries for comids that are not in flowlines dataset # comids may be missing because they are outside of the model # or if the flowlines dataset was edited (resulting in breaks in the routing) missing_tocomids = ~pf.TOCOMID.isin(comids) & (pf.TOCOMID != 0) missing = pf.ix[missing_tocomids, ['FROMCOMID', 'TOCOMID']].copy() # recursively crawl the PlusFlow table # to try to find a downstream comid in the flowlines dataest missing['nextCOMID'] = [find_next(tc, self.pf, comids) for tc in missing.TOCOMID] pf.loc[missing_tocomids, 'TOCOMID'] = missing.nextCOMID # set any remaining comids not in model to zero # (outlets or inlets from outside model) #pf.loc[~pf.TOCOMID.isin(comids), 'TOCOMID'] = 0 (these should all be handled above) pf.loc[~pf.FROMCOMID.isin(comids), 'FROMCOMID'] = 0 tocomid = pf.TOCOMID.values fromcomid = pf.FROMCOMID.values self.df['dncomids'] = [tocomid[fromcomid == c].tolist() for c in comids] self.df['upcomids'] = [fromcomid[tocomid == c].tolist() for c in comids] def assign_segments(self): # create segment numbers self.df.sort('COMID', inplace=True) self.df['segment'] = np.arange(len(self.df)) + 1 # reduce dncomids to 1 per segment braids = self.df[np.array([len(d) for d in self.df.dncomids]) > 1] for i, r in braids.iterrows(): # select the dncomid that has a matching levelpath matching_levelpaths = np.array(r.dncomids)[self.df.ix[self.df.COMID.isin(r.dncomids), 'LevelPathI'].values == r.LevelPathI] # if none match, select the first dncomid if len(matching_levelpaths) == 0: matching_levelpaths = [r.dncomids[0]] self.df.set_value(i, 'dncomids', matching_levelpaths) # assign upsegs and outsegs based on NHDPlus routing self.df['upsegs'] = [[self.df.segment[c] if c !=0 else 0 for c in comids] for comids in self.df.upcomids] self.df['dnsegs'] = [[self.df.segment[c] if c !=0 else 0 for c in comids] for comids in self.df.dncomids] # make a column of outseg integers self.df['outseg'] = [d[0] for d in self.df.dnsegs] self.df.sort('segment', inplace=True) def to_sfr(self, roughness=0.037, streambed_thickness=1, streambedK=1, icalc=1, iupseg=0, iprior=0, nstrpts=0, flow=0, runoff=0, etsw=0, pptsw=0, roughch=0, roughbk=0, cdepth=0, fdepth=0, awdth=0, bwdth=0): # create a working dataframe self.df = self.fl[self.fl_cols].join(self.pfvaa[self.pfvaa_cols], how='inner') print '\nclipping flowlines to active area...' inside = [g.intersects(self.domain) for g in self.df.geometry] self.df = self.df.ix[inside].copy() self.df.sort('COMID', inplace=True) flowline_geoms = self.df.geometry.tolist() grid_geoms = self.grid.geometry.tolist() print "intersecting flowlines with grid cells..." grid_intersections = GISops.intersect_rtree(grid_geoms, flowline_geoms) print "setting up segments..." self.list_updown_comids() self.assign_segments() fl_segments = self.df.segment.tolist() fl_comids = self.df.COMID.tolist() m1 = make_mat1(flowline_geoms, fl_segments, fl_comids, grid_intersections, grid_geoms) print "computing widths..." m1['length'] = np.array([g.length for g in m1.geometry]) lengths = m1[['segment', 'length']].copy() groups = lengths.groupby('segment') reach_asums = np.concatenate([np.cumsum(grp.length.values[::-1])[::-1] for s, grp in groups]) segment_asums = np.array([self.df.ArbolateSu.values[s-1] for s in m1.segment.values]) reach_asums = -1 * self.to_km * reach_asums + segment_asums # arbolate sums are computed in km width = width_from_arbolate(reach_asums) # widths are returned in m if self.GISunits != 'm': width = width / 0.3048 m1['width'] = width * self.mf_units_mult m1['length'] = m1.length * self.mf_units_mult m1['roughness'] = roughness m1['sbthick'] = streambed_thickness m1['sbK'] = streambedK m1['sbtop'] = 0 if self.nrows is not None: m1['row'] = np.floor(m1.node / self.ncols) + 1 if self.ncols is not None: column = m1.node.values % self.ncols column[column == 0] = self.ncols # last column has remainder of 0 m1['column'] = column m1['layer'] = 1 self.m1 = m1 print "setting up Mat2..." self.m2 = self.df[['segment', 'outseg']] self.m2['icalc'] = icalc self.m2.index = self.m2.segment print 'Done' def write_tables(self, basename='SFR'): """Write tables with SFR reach (Mat1) and segment (Mat2) information out to csv files. Parameters ---------- basename: string e.g. Mat1 is written to <basename>Mat1.csv """ m1_cols = ['node', 'layer', 'segment', 'reach', 'sbtop', 'width', 'length', 'sbthick', 'sbK', 'roughness', 'reachID'] m2_cols = ['segment', 'icalc', 'outseg'] if self.nrows is not None: m1_cols.insert(1, 'row') if self.ncols is not None: m1_cols.insert(2, 'column') print "writing Mat1 to {0}{1}, Mat2 to {0}{2}".format(basename, 'Mat1.csv', 'Mat2.csv') self.m1[m1_cols].to_csv(basename + 'Mat1.csv', index=False) self.m2[m2_cols].to_csv(basename + 'Mat2.csv', index=False) def write_linework_shapefile(self, basename='SFR'): """Write a shapefile containing linework for each SFR reach, with segment, reach, model node number, and NHDPlus COMID attribute information Parameters ---------- basename: string Output will be written to <basename>.shp """ print "writing reach geometries to {}".format(basename+'.shp') df2shp(self.m1[['reachID', 'node', 'segment', 'reach', 'comid', 'geometry']], basename+'.shp', proj4=self.mf_grid_proj4)
def baseflow_summary(self, field_measurements=None, q90_window=20, output_proj4=None): if field_measurements is not None: self.field_measurements = field_measurements if self.field_measurements['measurement_dt'].dtype != 'datetime64[ns]': self.field_measurements['measurement_dt'] = \ pd.to_datetime(self.field_measurements.measurement_dt) fm = self.field_measurements field_sites = self.field_sites.copy() # reprojected the output X, Y coordinates print('reprojecting output from\n{}\nto\n{}...'.format(self.proj4, output_proj4)) if output_proj4 is not None: field_sites['geometry'] = projectdf(field_sites, self.proj4, output_proj4) fm_site_no = [] Qm = [] measurement_dt = [] measured_rating_diff = [] width=[] channel_material = [] drainage_area = [] station_nm = [] index_station = [] indexQr = [] indexQ90 = [] X, Y = [], [] for i in range(len(fm)): mdt = fm.measurement_dt.tolist()[i] Dt = dt.datetime(mdt.year, mdt.month, mdt.day) #Find the five closest stations site_no = fm.site_no.tolist()[i] print(site_no) site_pt = self.field_sites.loc[site_no, 'geometry_utm'] #calculate the distances distances = [] for index, row in self.dv_sites.iterrows(): index_st = index idx_pt = self.dv_sites.loc[index, 'geometry_utm'] distances.append({'site': site_no, 'index_st': index_st, 'site_pt': site_pt, 'idx_pt':idx_pt, 'datetime': Dt}) distances = pd.DataFrame(distances) dist = [idx_pt.distance(site_pt) for idx_pt, site_pt in zip(distances.idx_pt.values, distances.site_pt.values)] distances['distance'] = dist distances.sort_values distances.sort_values(by=['distance'], inplace=True) bs_sites = distances.index_st[0:5].tolist() for site_no, data in list(self.dvs.items()): #First check if in the list of five closest points if site_no in bs_sites: # check if index station covers measurement date try: dv = data.ix[Dt] except KeyError: continue dv = data.ix[Dt] site_no = dv.site_no DDcd = [k for k in list(data.keys()) if '00060' in k and not 'cd' in k][0] try: Qr = float(dv[DDcd]) # handle ice and other non numbers except: continue # get q90 values for window q90start = pd.Timestamp(Dt) - pd.Timedelta(0.5 * q90_window, unit='Y') q90end = pd.Timestamp(Dt) + pd.Timedelta(0.5 * q90_window, unit='Y') values = pd.to_numeric(data.ix[q90start:q90end, DDcd], errors='coerce') q90 = values.quantile(q=0.1) # append last to avoid mismatches in length site_info = field_sites.ix[fm.site_no.values[i]] fm_site_no.append(fm.site_no.values[i]) station_nm.append(site_info['station_nm']) Qm.append(fm.discharge_va.values[i]) measurement_dt.append(fm.measurement_dt.tolist()[i]) measured_rating_diff.append(fm.measured_rating_diff.values[i]) width.append(fm.chan_width.values[i]) channel_material.append(fm.chan_material.values[i]) drainage_area.append(site_info['drain_area_va']) index_station.append(site_no) indexQr.append(Qr) indexQ90.append(q90) X.append(site_info['geometry'].xy[0][0]) Y.append(site_info['geometry'].xy[1][0]) else: pass df = pd.DataFrame({'site_no': fm_site_no, 'station_nm': station_nm, 'datetime': measurement_dt, 'Qm': Qm, 'quality': measured_rating_diff, 'chan_width': width, 'chan_material': channel_material, 'drn_area': drainage_area, 'idx_station': index_station, 'indexQr': indexQr, 'indexQ90': indexQ90, 'X': X, 'Y': Y}) df['est_error'] = [self.est_error.get(q.lower(), self.default_error) for q in df.quality] df = df[['site_no', 'datetime', 'Qm', 'quality', 'est_error', 'idx_station', 'indexQr', 'indexQ90', 'chan_width', 'chan_material', 'drn_area', 'station_nm', 'X', 'Y']] return df
def __init__(self, NHDFlowline=None, PlusFlowlineVAA=None, PlusFlow=None, NHDFcode=None, elevslope=None, mf_grid=None, mf_grid_node_col=None, nrows=None, ncols=None, mfdis=None, xul=None, yul=None, rot=0, model_domain=None, flowlines_proj4=None, mfgrid_proj4=None, domain_proj4=None, mf_units='feet'): """Class for working with information from NHDPlus v2. See the user's guide for more information: <http://www.horizon-systems.com/NHDPlus/NHDPlusV2_documentation.php#NHDPlusV2 User Guide> Parameters ========== NHDFlowline : str, list of strings or dataframe Shapefile, list of shapefiles, or dataframe defining SFR network; assigned to the Flowline attribute. PlusFlowlineVAA : str, list of strings or dataframe DBF file, list of DBF files with NHDPlus attribute information; assigned to PlusFlowlineVAA attribute. PlusFlow : str, list of strings or dataframe DBF file, list of DBF files with routing information; assigned to PlusFlow attribute. mf_grid : str or dataframe Shapefile or dataframe containing MODFLOW grid mf_grid_node_col : str Column in grid shapefile or dataframe with unique node numbers. In case the grid isn't sorted! (which will result in mixup if rows and columns are assigned later using the node numbers) nrows : int (structured grids) Number of model rows ncols : int (structured grids) Number of model columns mfdis : str MODFLOW discretization file (not yet supported for this class) xul : float, optional x offset of upper left corner of grid. Only needed if using mfdis instead of shapefile yul : float, optional y offset of upper left corner of grid. Only needed if using mfdis instead of shapefile rot : float, optional (default 0) Grid rotation; only needed if using mfdis instead of shapefile. model_domain : str (shapefile) or shapely polygon, optional Polygon defining area in which to create SFR cells. Default is to create SFR at all intersections between the model grid and NHD flowlines. flowlines_proj4 : str, optional Proj4 string for coordinate system of NHDFlowlines. Only needed if flowlines are supplied in a dataframe. domain_proj4 : str, optional Proj4 string for coordinate system of model_domain. Only needed if model_domain is supplied as a polygon. mf_units : str, 'feet' or 'meters' Length units of MODFLOW model """ self.Flowline = NHDFlowline self.PlusFlowlineVAA = PlusFlowlineVAA self.PlusFlow = PlusFlow self.elevslope = elevslope self.fl_cols = ['COMID', 'FCODE', 'FDATE', 'FLOWDIR', 'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM', 'REACHCODE', 'RESOLUTION', 'WBAREACOMI', 'geometry'] self.pfvaa_cols = ['ArbolateSu', 'Hydroseq', 'DnHydroseq', 'LevelPathI', 'StreamOrde'] self.mf_grid = mf_grid self.model_domain = model_domain self.nrows = nrows self.ncols = ncols self.mfdis = mfdis self.xul = xul self.yul = yul self.rot = rot # unit conversions (set below after grid projection is verified) self.mf_units = mf_units self.mf_units_mult = 1.0 # go from GIS units to model units self.GISunits = None # self.to_km = None # converts GIS units to km for arbolate sum self.fl_proj4 = flowlines_proj4 self.mf_grid_proj4 = mfgrid_proj4 self.domain_proj4 = domain_proj4 print("Reading input...") # handle dataframes or shapefiles as arguments # get proj4 for any shapefiles that are submitted for attr, input in {'fl': NHDFlowline, 'pf': PlusFlow, 'pfvaa': PlusFlowlineVAA, 'elevs': elevslope, 'grid': mf_grid}.items(): if isinstance(input, pd.DataFrame): self.__dict__[attr] = input else: self.__dict__[attr] = shp2df(input) if isinstance(model_domain, Polygon): self.domain = model_domain elif isinstance(model_domain, str): self.domain = shape(fiona.open(model_domain).next()['geometry']) self.domain_proj4 = get_proj4(model_domain) else: print('setting model domain to extent of grid ' \ 'by performing unary union of grid cell geometries...\n' \ '(may take a few minutes for large grids)') # add tiny buffer to overcome floating point errors in gridcell geometries # (otherwise a multipolygon feature may be returned) geoms = [g.buffer(0.001) for g in self.grid.geometry.tolist()] self.domain = unary_union(geoms) # sort and pair down the grid if mf_grid_node_col is not None: self.grid.sort_values(by=mf_grid_node_col, inplace=True) self.grid.index = self.grid[mf_grid_node_col].values else: print('Warning: Node field for grid shape file not supplied. \ Node numbers will be assigned using index. \ This may result in incorrect location of SFR reaches.') self.grid = self.grid[['geometry']] # get projections if self.mf_grid_proj4 is None and not isinstance(mf_grid, pd.DataFrame): self.mf_grid_proj4 = get_proj4(mf_grid) if self.fl_proj4 is None: if isinstance(NHDFlowline, list): self.fl_proj4 = get_proj4(NHDFlowline[0]) elif not isinstance(NHDFlowline, pd.DataFrame): self.fl_proj4 = get_proj4(NHDFlowline) # set the indices for attr, index in {'fl': 'COMID', 'pfvaa': 'ComID', 'elevs': 'COMID'}.items(): if not self.__dict__[attr].index.name == index: self.__dict__[attr].index = self.__dict__[attr][index] # first check that grid is in projected units if self.mf_grid_proj4.split('proj=')[1].split()[0].strip() == 'longlat': raise ProjectionError(self.mf_grid) # reproject the NHD Flowlines and model domain to model grid if they aren't # (prob a better way to check for same projection) # set GIS units from modflow grid projection (used for arbolate sum computation) # assumes either m or ft! self.GISunits = parse_proj4_units(self.mf_grid_proj4) self.mf_units_mult = 1/0.3048 if self.GISunits == 'm' and self.mf_units == 'feet' \ else 0.3048 if not self.GISunits == 'm' and self.mf_units == 'meters' \ else 1.0 self.to_km = 0.001 if self.GISunits == 'm' else 0.001/0.3048 # convert the elevations from elevslope table self.elevs['Max'] = self.elevs.MAXELEVSMO * self.convert_elevslope_to_model_units[self.mf_units] self.elevs['Min'] = self.elevs.MINELEVSMO * self.convert_elevslope_to_model_units[self.mf_units] if different_projections(self.fl_proj4, self.mf_grid_proj4): print("reprojecting NHDFlowlines from\n{}\nto\n{}...".format(self.fl_proj4, self.mf_grid_proj4)) self.fl['geometry'] = projectdf(self.fl, self.fl_proj4, self.mf_grid_proj4) if model_domain is not None \ and different_projections(self.domain_proj4, self.mf_grid_proj4): print("reprojecting model domain from\n{}\nto\n{}...".format(self.domain_proj4, self.mf_grid_proj4)) self.domain = project(self.domain, self.domain_proj4, self.mf_grid_proj4)
def baseflow_summary(self, field_measurements=None, dvs=None, q90_window=20, output_proj4=None): if field_measurements is None: fm = self.field_measurements else: fm = field_measurements if dvs is None: dvs = self.dvs if fm['measurement_dt'].dtype != 'datetime64[ns]': fm['measurement_dt'] = pd.to_datetime(fm.measurement_dt) field_sites = self.field_sites.copy() # reprojected the output X, Y coordinates print('reprojecting output from\n{}\nto\n{}...'.format(self.proj4, output_proj4)) if output_proj4 is not None: field_sites['geometry'] = projectdf(field_sites, self.proj4, output_proj4) fm_site_no = [] Qm = [] measurement_dt = [] measured_rating_diff = [] drainage_area = [] station_nm = [] index_station = [] indexQr = [] indexQ90 = [] X, Y = [], [] for i in range(len(fm)): mdt = fm.measurement_dt.tolist()[i] Dt = dt.datetime(mdt.year, mdt.month, mdt.day) for site_no, data in list(dvs.items()): # check if index station covers measurement date try: dv = data.ix[Dt] except KeyError: continue dv = data.ix[Dt] site_no = dv.site_no DDcd = [k for k in list(data.keys()) if '00060' in k and not 'cd' in k][0] try: Qr = float(dv[DDcd]) # handle ice and other non numbers except: continue # get q90 values for window q90start = pd.Timestamp(Dt) - pd.Timedelta(0.5 * q90_window, unit='Y') q90end = pd.Timestamp(Dt) + pd.Timedelta(0.5 * q90_window, unit='Y') values = pd.to_numeric(data.ix[q90start:q90end, DDcd], errors='coerce') q90 = values.quantile(q=0.1) # append last to avoid mismatches in length site_info = field_sites.ix[fm.site_no.values[i]] fm_site_no.append(fm.site_no.values[i]) station_nm.append(site_info['station_nm']) Qm.append(fm.discharge_va.values[i]) measurement_dt.append(fm.measurement_dt.tolist()[i]) measured_rating_diff.append(fm.measured_rating_diff.values[i]) drainage_area.append(site_info['drain_area_va']) index_station.append(site_no) indexQr.append(Qr) indexQ90.append(q90) X.append(site_info['geometry'].xy[0][0]) Y.append(site_info['geometry'].xy[1][0]) df = pd.DataFrame({'site_no': fm_site_no, 'station_nm': station_nm, 'datetime': measurement_dt, 'Qm': Qm, 'quality': measured_rating_diff, 'drn_area': drainage_area, 'idx_station': index_station, 'indexQr': indexQr, 'indexQ90': indexQ90, 'X': X, 'Y': Y}) df['est_error'] = [self.est_error.get(q.lower(), self.default_error) for q in df.quality] df = df[['site_no', 'datetime', 'Qm', 'quality', 'est_error', 'idx_station', 'indexQr', 'indexQ90', 'drn_area', 'station_nm', 'X', 'Y']] return df