def to_geojson_features(shapefilepath): '''reads the given shape file ('.shp') and returns it as a list of geojson features of the form: ``` { "type": "Feature", "geometry": { "type": "Point", "coordinates": [125.6, 10.1] }, "properties": { "name": "Dinagat Islands" } } ``` ''' shp = Reader(shapefilepath) # open the shapefile shapes = shp.shapes() # get all the polygons (class shapefile._Shape) records = shp.records() fields = [field[0] for field in shp.fields[1:]] assert len(shapes) == len(records) # Reminder: geojson syntax: http://geojson.org/: return [ { "type": "Feature", 'geometry': mapping(shape(s)), # https://stackoverflow.com/a/40631091 'properties': dict(zip(fields, r)) } for s, r in zip(shapes, records) ]
def get_coast_polygons(resolution): polymeta = [] polybounds = [] for level in [1, 2, 3, 5]: filename = os.path.join(GSHHS_DIR, 'GSHHS_shp/', resolution, 'GSHHS_{}_L{}'.format(resolution, level)) print filename shf = Reader(filename) fields = shf.fields try: shf.shapeRecords() except: continue for shprec in shf.shapeRecords(): shp = shprec.shape rec = shprec.record parts = shp.parts.tolist() if parts != [0]: print 'multipart polygon' raise SystemExit verts = shp.points lons, lats = list(zip(*verts)) north = max(lats) south = min(lats) attdict = {} for r, key in zip(rec, fields[1:]): attdict[key[0]] = r area = attdict['area'] id = attdict['id'] polymeta.append([level, area, south, north, len(lons), id]) b = np.empty((len(lons), 2), np.float32) b[:, 0] = lons b[:, 1] = lats if lsd is not None: b = quantize(b, lsd) polybounds.append(b) # Manual fix for incorrect Antarctica polygons at full resolution # This issue is only present in the shapefile version and may be fixed # in future versions of GSHHS! if resolution == 'f' and level == 5: i = [item[-1] for item in polymeta].index('4-E') coords = polybounds[i][2:-1, :] coords = np.vstack([coords, [180.0, -90.0], [0.0, -90.0]]).astype(np.float32) polybounds[i] = coords polymeta[i][-2] = len(coords) j = [item[-1] for item in polymeta].index('4-W') coords = polybounds[j][3:, :] np.savetxt('coordinates.txt', coords) coords = np.vstack([ coords, [0.0, coords[-1][1]], [0.0, -90.0], [-180.0, -90.0], coords[0] ]).astype(np.float32) polybounds[j] = coords polymeta[j][-2] = len(coords) return polybounds, polymeta
def get_wdb_boundaries(resolution, level, rivers=False): polymeta = [] polybounds = [] if rivers: filename = "WDBII_shp/%s/WDBII_river_%s_L%02i" % (resolution, resolution, level) else: filename = "WDBII_shp/%s/WDBII_border_%s_L%s" % (resolution, resolution, level) print filename shf = Reader(filename) fields = shf.fields for shprec in shf.shapeRecords(): shp = shprec.shape rec = shprec.record parts = shp.parts.tolist() if parts != [0]: print "multipart polygon" raise SystemExit verts = shp.points lons, lats = list(zip(*verts)) north = max(lats) south = min(lats) attdict = {} for r, key in zip(rec, fields[1:]): attdict[key[0]] = r area = -1 id = attdict["id"] polymeta.append([-1, -1, south, north, len(lons), id]) b = np.empty((len(lons), 2), np.float32) b[:, 0] = lons b[:, 1] = lats if lsd is not None: b = quantize(b, lsd) polybounds.append(b) return polybounds, polymeta
def get_coast_polygons(resolution): polymeta = []; polybounds = [] for level in [1,2,3,5]: filename = os.path.join(GSHHS_DIR, 'GSHHS_shp/', resolution, 'GSHHS_{}_L{}'.format(resolution, level)) print filename shf = Reader(filename) fields = shf.fields try: shf.shapeRecords() except: continue for shprec in shf.shapeRecords(): shp = shprec.shape; rec = shprec.record parts = shp.parts.tolist() if parts != [0]: print 'multipart polygon' raise SystemExit verts = shp.points lons, lats = list(zip(*verts)) north = max(lats); south = min(lats) attdict={} for r,key in zip(rec,fields[1:]): attdict[key[0]]=r area = attdict['area'] id = attdict['id'] polymeta.append([level,area,south,north,len(lons),id]) b = np.empty((len(lons),2),np.float32) b[:,0] = lons; b[:,1] = lats if lsd is not None: b = quantize(b,lsd) polybounds.append(b) # Manual fix for incorrect Antarctica polygons at full resolution # This issue is only present in the shapefile version and may be fixed # in future versions of GSHHS! if resolution == 'f' and level == 5: i = [item[-1] for item in polymeta].index('4-E') coords = polybounds[i][2:-1, :] coords = np.vstack([coords, [180.0, -90.0], [0.0, -90.0]]).astype(np.float32) polybounds[i] = coords polymeta[i][-2] = len(coords) j = [item[-1] for item in polymeta].index('4-W') coords = polybounds[j][3:, :] np.savetxt('coordinates.txt', coords) coords = np.vstack([coords, [0.0, coords[-1][1]], [0.0, -90.0], [-180.0, -90.0], coords[0]]).astype(np.float32) polybounds[j] = coords polymeta[j][-2] = len(coords) return polybounds, polymeta
def merge_shapes( inputfile, outputfile=None, overwrite=False, verbose=True, vverbose=False, ): """ Merges all the shapes in a shapefile into a single shape. """ if outputfile is None: output = '{}/merged'.format(os.getcwd()) if os.path.isfile(outputfile + '.shp') and not overwrite: if verbose: print('combined watershed shapefile {} exists'.format(outputfile)) return if verbose: print('combining shapes from {}\n'.format(inputfile) + 'this may take a while...\n') # start by copying the projection files shutil.copy(inputfile + '.prj', outputfile + '.prj') # load the catchment and flowline shapefiles r = Reader(inputfile, shapeType=5) try: combined = combine_shapes(r.shapes(), verbose=vverbose) except: print('error: unable to combine shapes') raise # create the new file with the merged shapes w = Writer(shapeType=5) w.poly(shapeType=5, parts=[combined]) # copy the fields from the original and then the first record; note this # can be adapted as needed for field in r.fields: w.field(*field) w.record(*r.record(0)) w.save(outputfile) if verbose: its = inputfile, outputfile print('successfully combined shapes from {} to {}\n'.format(*its))
def extract_catchments( self, source, destination, flowlinefile, verbose=True, ): """ Extracts the catchments from the source data file to the destination using the list of comids for the query. """ # make a list of the comids comids = self.get_comids(flowlinefile) # open the catchment shapefile if verbose: print('reading the catchment shapefile\n') shapefile = Reader(source) # get the index of the feature id, which links to the flowline comid featureid_index = shapefile.fields.index(['FEATUREID', 'N', 9, 0]) - 1 # go through the comids from the flowlines and add the corresponding # catchment to the catchment list if verbose: print('searching the catchments in the watershed\n') records = shapefile.records() indices = [] i = 0 for record in records: if record[featureid_index] in comids: indices.append(i) i += 1 if len(indices) == 0: print('query returned no values, returning\n') raise # create the new shapefile if verbose: print('writing the new catchment shapefile\n') w = Writer() for field in shapefile.fields: w.field(*field) for i in indices: shape = shapefile.shape(i) w.poly(shapeType=5, parts=[shape.points]) w.record(*records[i]) w.save(destination)
def load(self): def _get_points(shape): points = shape.points if hasattr(shape, "z"): x, y = zip(*points) return list(zip(x, y, shape.z)) return points self._columns = {} self._data = [] path = as_path(self.url) if path is None: return False # get SRID srid = -1 srid_vertical = -1 # TODO try to discover SRID from .prj file # get geometries & data geo_column = "Geometry" sf = Reader(path) names = [] for idx, name in enumerate(sf.fields[1:]): self._columns[idx + 1] = name[0] names.append(name[0]) while geo_column in names: geo_column = geo_column + "_" shp_type = sf.shape(0).shapeType shapes_wkt = { # WKT formatting for geometric shapes 1: "POINT(%s)", 3: "LINESTRING(%s)", 5: "POLYGON((%s))", 8: "MULTIPOINT(%s)", 11: "POINTZ(%s)", 13: "LINESTRINGZ(%s)", 15: "POLYGONZ((%s))", 18: "MULTIPOINTZ(%s)", 21: "POINTM(%s)", 23: "LINESTRINGM(%s)", 25: "POLYGONM((%s))", 28: "MULTIPOINTM(%s)", } if not shp_type in shapes_wkt: raise Exception("Unrecognized shapefile type") geometries = [shapes_wkt[shp_type] % ", ".join([" ".join([str(p) for p in point]) for point in _get_points(shape)]) for shape in sf.shapes()] # geometries = [wkt definition, ...] in order of records for i, record in enumerate(sf.records()): self._data.append(dict([(idx, DString(str(record[idx - 1]).strip())) for idx in self._columns])) self._data[-1][0] = DGeometry(geometries[i], srid = srid, srid_vertical = srid_vertical) self._columns[0] = geo_column return True
def set_metadata( self, gagefile, ): """ Opens the gage file with the station metadata. """ # metadata for stations self.gages = [] self.day1s = [] self.dayns = [] self.drains = [] self.states = [] self.sites = [] self.nwiss = [] self.aves = [] self.names = [] gagereader = Reader(gagefile, shapeType=1) # get the fields with pertinent info day1_index = gagereader.fields.index(['DAY1', 'N', 19, 0]) - 1 dayn_index = gagereader.fields.index(['DAYN', 'N', 19, 0]) - 1 drain_index = gagereader.fields.index(['DA_SQ_MILE', 'N', 19, 2]) - 1 HUC8_index = gagereader.fields.index(['HUC', 'C', 8, 0]) - 1 state_index = gagereader.fields.index(['STATE', 'C', 2, 0]) - 1 site_index = gagereader.fields.index(['SITE_NO', 'C', 15, 0]) - 1 nwis_index = gagereader.fields.index(['NWISWEB', 'C', 75, 0]) - 1 ave_index = gagereader.fields.index(['AVE', 'N', 19, 3]) - 1 name_index = gagereader.fields.index(['STATION_NM', 'C', 60, 0]) - 1 # iterate through the records for r in gagereader.records(): gage = r[site_index] day1 = r[day1_index] dayn = r[dayn_index] drain = r[drain_index] state = r[state_index] nwis = r[nwis_index] ave = r[ave_index] name = r[name_index] site = r[site_index] self.gages.append(gage) self.day1s.append(day1) self.dayns.append(dayn) self.drains.append(drain) self.states.append(state) self.sites.append(site) self.nwiss.append(nwis) self.aves.append(ave) self.names.append(name)
def extract_flowlines(self, source, destination, HUC8, verbose = True): """Extracts flowlines from the source datafile to the destination using the HUC8 for the query.""" # open the flowline file if verbose: print('reading the flowline file\n') shapefile = Reader(source, shapeType = 3) records = shapefile.records() # figure out which field codes are the Reach code and comid reach_index = shapefile.fields.index(['REACHCODE', 'C', 14, 0]) - 1 # go through the reach indices, add add them to the list of flowlines # if in the watershed; also make a list of the corresponding comids if verbose: print('searching for flowlines in the watershed\n') indices = [] i = 0 for record in records: if record[reach_index][:8] == HUC8: indices.append(i) i+=1 if len(indices) == 0: if verbose: print('error: query returned no values') raise # write the data from the HUC8 to a new shapefile w = Writer(shapeType = 3) for field in shapefile.fields: w.field(*field) for i in indices: shape = shapefile.shape(i) w.poly(shapeType = 3, parts = [shape.points]) record = records[i] # little work around for blank GNIS_ID and GNIS_NAME values if isinstance(record[3], bytes): record[3] = record[3].decode('utf-8') if isinstance(record[4], bytes): record[4] = record[4].decode('utf-8') w.record(*record) w.save(destination) if verbose: l = len(indices) print('queried {} flowlines from original shapefile\n'.format(l))
def pull_data(): roads = Reader(shproot + "roads_wgs.shp") print("HEADER:", "\n".join(str(f) for f in roads.fields), sep="\n") shaperecs = roads.shapeRecords() with open(projectroot + "foutkm.csv") as infl: lines = [line.split("\t") for line in infl] return shaperecs
def extract_nsrdb(directory, HUC8, start, end, space=0.1, plot=True, verbose=True, vverbose=False): """Makes pickled instances of the GageStation class for all the gages meeting the calibration criteria for an 8-digit watershed.""" if verbose: print('\nextracting solar radiation data from NREL\n') # paths for the watershed shapefiles boundaryfile = '{0}/{1}/{1}boundaries'.format(directory, HUC8) solarfile = '{0}/{1}/{1}solarstations'.format(directory, HUC8) # make a folder for the files d = '{0}/{1}/NSRDB'.format(directory, HUC8) if not os.path.isdir(d): os.mkdir(d) boundaryreader = Reader(boundaryfile) stations = [] while len(stations) == 0: bbox = get_boundaries(boundaryreader.shapes(), space=space) stations = find_nsrdb(bbox, dates=(start, end)) space += 0.2 # download the data print('') for station in stations: if not os.path.isfile('{}/{}'.format(d, station.usaf)): station.download_data(d, dates=(start, end)) # plot it up from pyhspf.preprocessing.climateplots import plot_nsrdb for station in stations: p = '{}/{}'.format(d, station.usaf) if not os.path.isfile(p + '.png'): with open(p, 'rb') as f: s = pickle.load(f) try: plot_nsrdb(s, start, end, output=p) except: print('unable to plot', s.station)
def extract_precip3240(directory, HUC8, start, end, NCDC='ftp://ftp.ncdc.noaa.gov/pub/data', clean=False, space=0.2, verbose=True): """Makes a point shapefile of the stations from a csv file of hourly precipitation data from NCDC within the bounding box of the watershed.""" if os.name == 'nt': decompress = decompress7z else: decompress = decompresszcat d = '{}/{}/precip3240'.format(directory, HUC8) if not os.path.isdir(d): os.mkdir(d) # open up the bounding box for the watershed boundaryfile = '{0}/{1}/{1}boundaries'.format(directory, HUC8) boundaryreader = Reader(boundaryfile) bbox = get_boundaries(boundaryreader.shapes(), space=space) # find the precipitation stations in the bounding box stations = find_precip3240(bbox, verbose=verbose) if verbose: print('') # make a list of all the states since that's how the NCDC data are stored states = list(set([s.code for s in stations])) # download the state data for each year for state in states: download_state_precip3240(state, d, verbose=verbose) archives = [ '{}/{}'.format(d, a) for a in os.listdir(d) if a[-6:] == '.tar.Z' ] for a in archives: # decompress the archive if not os.path.isfile(a[:-2]): decompress(a, d) if verbose: print('') # import the data for station in stations: station.import_data(d, start, end)
def set_metadata(self, gagefile, ): """ Opens the gage file with the station metadata. """ # metadata for stations self.gages = [] self.day1s = [] self.dayns = [] self.drains = [] self.states = [] self.sites = [] self.nwiss = [] self.aves = [] self.names = [] gagereader = Reader(gagefile, shapeType = 1) # get the fields with pertinent info day1_index = gagereader.fields.index(['DAY1', 'N', 19, 0]) - 1 dayn_index = gagereader.fields.index(['DAYN', 'N', 19, 0]) - 1 drain_index = gagereader.fields.index(['DA_SQ_MILE', 'N', 19, 2]) - 1 HUC8_index = gagereader.fields.index(['HUC', 'C', 8, 0]) - 1 state_index = gagereader.fields.index(['STATE', 'C', 2, 0]) - 1 site_index = gagereader.fields.index(['SITE_NO', 'C', 15, 0]) - 1 nwis_index = gagereader.fields.index(['NWISWEB', 'C', 75, 0]) - 1 ave_index = gagereader.fields.index(['AVE', 'N', 19, 3]) - 1 name_index = gagereader.fields.index(['STATION_NM', 'C', 60, 0]) - 1 # iterate through the records for r in gagereader.records(): gage = r[site_index] day1 = r[day1_index] dayn = r[dayn_index] drain = r[drain_index] state = r[state_index] nwis = r[nwis_index] ave = r[ave_index] name = r[name_index] site = r[site_index] self.gages.append(gage) self.day1s.append(day1) self.dayns.append(dayn) self.drains.append(drain) self.states.append(state) self.sites.append(site) self.nwiss.append(nwis) self.aves.append(ave) self.names.append(name)
def extract_catchments(self, source, destination, flowlinefile, verbose = True, ): """ Extracts the catchments from the source data file to the destination using the list of comids for the query. """ # make a list of the comids comids = self.get_comids(flowlinefile) # open the catchment shapefile if verbose: print('reading the catchment shapefile\n') shapefile = Reader(source) # get the index of the feature id, which links to the flowline comid featureid_index = shapefile.fields.index(['FEATUREID', 'N', 9, 0]) - 1 # go through the comids from the flowlines and add the corresponding # catchment to the catchment list if verbose: print('searching the catchments in the watershed\n') records = shapefile.records() indices = [] i = 0 for record in records: if record[featureid_index] in comids: indices.append(i) i+=1 if len(indices) == 0: print('query returned no values, returning\n') raise # create the new shapefile if verbose: print('writing the new catchment shapefile\n') w = Writer() for field in shapefile.fields: w.field(*field) for i in indices: shape = shapefile.shape(i) w.poly(shapeType = 5, parts = [shape.points]) w.record(*records[i]) w.save(destination)
def extract_raw(source, destination, HUC8, plot=True, save=True, verbose=True): """Extracts the grid data for the HUC8.""" # make a new directory for the HUC8 d = '{}/{}/NRCM'.format(destination, HUC8) if not os.path.isdir(d): os.mkdir(d) # make a "raw directory" for the unaltered info raw = '{}/raw'.format(d) if not os.path.isdir(raw): os.mkdir(raw) if verbose: print('extracting NRCM predictions...\n') # use the boundary file to find the bounding box for the grid points boundaryfile = '{0}/{1}/{1}boundaries'.format(destination, HUC8) subbasinfile = '{0}/{1}/{1}subbasins'.format(destination, HUC8) space = 0.1 sf = Reader(boundaryfile) bbox = get_boundaries(sf.shapes(), space=space) xmin, ymin, xmax, ymax = bbox if verbose and not os.path.isdir(raw): print('bounding box =', xmin, ymin, xmax, ymax, '\n') lats, lons = [], [] for f in os.listdir(source): i = f.index('_') lon = float(f[:i]) lat = float(f[i + 1:]) if inside_box([xmin, ymin], [xmax, ymax], [lon, lat]): lats.append(lat) lons.append(lon) if not os.path.isfile('{}/{}'.format(raw, f)): shutil.copy('{}/{}'.format(source, f), '{}/{}'.format(raw, f)) if plot: if save: output = '{}/gridpoints'.format(d) else: output = None if not os.path.isfile(output): plot_NRCM(lons, lats, bfile=boundaryfile, sfile=subbasinfile, output=output, show=False)
def merge_shapes(inputfile, outputfile = None, overwrite = False, verbose = True, vverbose = False, ): """ Merges all the shapes in a shapefile into a single shape. """ if outputfile is None: output = '{}/merged'.format(os.getcwd()) if os.path.isfile(outputfile + '.shp') and not overwrite: if verbose: print('combined watershed shapefile {} exists'.format(outputfile)) return if verbose: print('combining shapes from {}\n'.format(inputfile) + 'this may take a while...\n') # start by copying the projection files shutil.copy(inputfile + '.prj', outputfile + '.prj') # load the catchment and flowline shapefiles r = Reader(inputfile, shapeType = 5) try: combined = combine_shapes(r.shapes(), verbose = vverbose) except: print('error: unable to combine shapes') raise # create the new file with the merged shapes w = Writer(shapeType = 5) w.poly(shapeType = 5, parts = [combined]) # copy the fields from the original and then the first record; note this # can be adapted as needed for field in r.fields: w.field(*field) w.record(*r.record(0)) w.save(outputfile) if verbose: its = inputfile, outputfile print('successfully combined shapes from {} to {}\n'.format(*its))
def city_shape_from_record(reader: shapefile.Reader, city: str) -> dict: """ Get a geoJSON for a city from a shapefile :param reader: A shapefile reader :param city: The name of a city in the shapefile :return: A geoJSON of the city shape """ for i, r in enumerate(reader.iterRecords()): if r[3] == city: return reader.shape(i).__geo_interface__ raise KeyError(f'{city} does not exist in the shapefile')
def extract_raw(source, destination, HUC8, plot = True, save = True, verbose = True): """Extracts the grid data for the HUC8.""" # make a new directory for the HUC8 d = '{}/{}/NRCM'.format(destination, HUC8) if not os.path.isdir(d): os.mkdir(d) # make a "raw directory" for the unaltered info raw = '{}/raw'.format(d) if not os.path.isdir(raw): os.mkdir(raw) if verbose: print('extracting NRCM predictions...\n') # use the boundary file to find the bounding box for the grid points boundaryfile = '{0}/{1}/{1}boundaries'.format(destination, HUC8) subbasinfile = '{0}/{1}/{1}subbasins'.format(destination, HUC8) space = 0.1 sf = Reader(boundaryfile) bbox = get_boundaries(sf.shapes(), space = space) xmin, ymin, xmax, ymax = bbox if verbose and not os.path.isdir(raw): print('bounding box =', xmin, ymin, xmax, ymax, '\n') lats, lons = [], [] for f in os.listdir(source): i = f.index('_') lon = float(f[:i]) lat = float(f[i+1:]) if inside_box([xmin, ymin], [xmax, ymax], [lon, lat]): lats.append(lat) lons.append(lon) if not os.path.isfile('{}/{}'.format(raw, f)): shutil.copy('{}/{}'.format(source, f), '{}/{}'.format(raw, f)) if plot: if save: output = '{}/gridpoints'.format(d) else: output = None if not os.path.isfile(output): plot_NRCM(lons, lats, bfile = boundaryfile, sfile = subbasinfile, output = output, show = False)
def clip_value(in_file, ot_dir, min_height, max_height): """ オンライン学習4 ベクタデータのフィルタリング 浸水・土砂崩れベクタデータをGISデータの属性値(値)を使用してフィルタリングするプログラムを実行します。 関数 : clip_value 引数1 : 浸水・土砂崩れベクタデータ(*.shp) 引数2 : 出力ディレクトリ名 引数3 : 出力対象となる値の最小値 引数4 : 出力対象となる値の最大値 """ # Get actual file path in_file = path.join(DATA_PATH_BASE, in_file) ot_dir = path.join(DATA_PATH_BASE, ot_dir) makedirs(ot_dir, exist_ok=True) ot_file = path.join( ot_dir, "{0}v.tif".format(path.splitext(path.basename(in_file))[0])) reader = ShpReader(in_file, encoding='cp932') writer = ShpWriter(ot_file, encoding='cp932') # Create DBF schema height_col_id = None for i, col in enumerate( (col for col in reader.fields if col[0] != "DeletionFlag")): if col[0] != "DeletionFlag": writer.field(col[0], col[1], col[2], col[3]) if col[0] == "height": height_col_id = i if height_col_id is None: print("height column not found in polygon shapefile") return # Filtering n_mesh = reader.numRecords cnt_mesh = 0 for data in reader.iterShapeRecords(): height = data.record[height_col_id] if (height is not None) and (min_height <= height <= max_height): # This polygon is output target. writer.shape(data.shape) writer.record(*data.record) cnt_mesh = cnt_mesh + 1 if cnt_mesh % 100000 == 0: print("{0}K / {1}K".format(cnt_mesh / 1000, n_mesh / 1000)) writer.close()
def find_flowlines(gagefile, flowfile): """Determines the COMIDS of the flowlines in the flowline shapefile that correspond to the USGS gages from the gage shapefile. """ flowlines = Reader(flowfile, shapeType = 3) outlets = Reader(gagefile, shapeType = 1) points = [outlet.points[0] for outlet in outlets.shapes()] records = outlets.records() lines = flowlines.shapes() # find the indices of closest flowline for each point indices = [closest_index(point, lines) for point in points] # make a dictionary linking the outlet site index numbers to the # corresponding flowline comids comid_index = flowlines.fields.index(['COMID', 'N', 9, 0]) - 1 comids =[] for i in indices: if i is not None: comids.append(flowlines.record(i)[comid_index]) else: comids.append(None) return comids
def get_wdb_boundaries(resolution, level, rivers=False): polymeta = [] polybounds = [] if rivers: filename = os.path.join( GSHHS_DIR, 'WDBII_shp', resolution, 'WDBII_river_{}_L{:02}'.format(resolution, level)) else: filename = os.path.join( GSHHS_DIR, 'WDBII_shp', resolution, 'WDBII_border_{}_L{}'.format(resolution, level)) print filename shf = Reader(filename) fields = shf.fields for shprec in shf.shapeRecords(): shp = shprec.shape rec = shprec.record parts = shp.parts.tolist() if parts != [0]: print 'multipart polygon' raise SystemExit verts = shp.points # Detect degenerate lines that are actually points... if len(verts) == 2 and np.allclose(verts[0], verts[1]): print 'Skipping degenerate line...' continue lons, lats = list(zip(*verts)) north = max(lats) south = min(lats) attdict = {} for r, key in zip(rec, fields[1:]): attdict[key[0]] = r area = -1 poly_id = attdict['id'] b = np.empty((len(lons), 2), np.float32) b[:, 0] = lons b[:, 1] = lats if not rivers: b = interpolate_long_segments(b, resolution) if lsd is not None: b = quantize(b, lsd) polymeta.append([-1, -1, south, north, len(b), poly_id]) polybounds.append(b) return polybounds, polymeta
def __main__(): shpdir = '/Users/Theo/' + \ 'Instituten-Groepen-Overleggen/HYGEA/Consult/2017/' + \ 'DEME-julianakanaal/REGIS/Limburg - REGIS II v2.2/shapes' shpnm = 'steilrandstukken.shp' shpnm = 'Steilrand.shp' shpnm = 'SteilrandGebieden.dbf' shapefileName =os.path.join(shpdir, shpnm) rdr = Reader(shapefileName) fldNms = [p[0] for p in rdr.fields][1:] print(fldNms) kwargs = {'title': os.path.basename(shapefileName), 'grid': True, 'xticks': 1000., 'yticks': 1000., 'xlabel': 'x RD [m]', 'ylabel': 'y RD [m]', 'edgecolor': 'y', 'facecolor' : 'r', 'alpha': 0.5} plotshapes(rdr, **kwargs)
def _extract_full_geom(shp: shapefile.Reader) -> MultiPolygon: """ Extracts a full geom from a shp reader :param shp: shapefile.Reader :return: Multipolygon """ return shape(shp.shapeRecord(0).shape.__geo_interface__)
def _get_recinfo(shp: shapefile.Reader) -> Tuple[List[str], List[np.dtype]]: field_list = shp.fields[1:] labels, type_strings, nbytes, decimals = zip(*field_list) record0 = shp.record(0) types_from_data = [type(k) for k in record0] type_list = [_extract_type(t, l) for t, l in zip(types_from_data, nbytes)] return labels, type_list
def get_comids(self, flowlinefile): """Finds the comids from the flowline file.""" # open the file shapefile = Reader(flowlinefile) # find the index of the comids comid_index = shapefile.fields.index(['COMID', 'N', 9, 0]) - 1 # make a list of the comids comids = [r[comid_index] for r in shapefile.records()] return comids
def load(): # Determine paths. file_dir = os.path.dirname(os.path.abspath(__file__)) locs_path = os.path.join(file_dir, 'data', 'IDSTA.shp') data_path = os.path.join(file_dir, 'data', 'HRtemp2006.txt') # Load locations. sf = Reader(locs_path) names, lons, lats = [], [], [] for sr in sf.shapeRecords(): name = sr.record.as_dict()['IDT_AK'] lon, lat = sr.shape.points[0] names.append(name) lons.append(lon) lats.append(lat) locs = pd.DataFrame({ 'lon': lons, 'lat': lats }, index=pd.Index(names, name='node')) # Read data. df = pd.read_csv(data_path, sep='\t') # Rename things. df = pd.DataFrame({ 'node': df['IDT_AK'], 'date': df['DATE'], 'temp': df['MDTEMP'] }) # Make columns nodes. df = df.set_index(['date', 'node']).unstack('node')['temp'] # Drop outputs with missing values, which are only a few. df = df.dropna(axis=1) # Parse dates and convert to day in the year 2006. xs = [datetime.datetime.strptime(x, '%Y-%m-%d') for x in df.index] start = datetime.datetime(year=2006, month=1, day=1) df['day'] = [(x - start).total_seconds() / 3600 / 24 + 1 for x in xs] df = df.set_index('day').sort_index() # Filter locations by kept nodes. locs = locs.reindex(df.columns, axis=0) return locs, df
def main(shp): shp, _ = os.path.splitext(shp) with IO() as shpio, IO() as dbfio: # Don't overwrite existing .shp, .dbf with Reader(shp) as r, Writer(shp=shpio, dbf=dbfio, shx=shp+'.shx') as w: w.fields = r.fields[1:] # skip first deletion field for rec in r.iterShapeRecords(): w.record(*rec.record) w.shape(rec.shape)
def create_buffer(reader: shapefile.Reader) -> list: fields = reader.fields[1:] field_names = [field[0] for field in fields] buffer = [] for shape_record in reader.shapeRecords(): buffer.append(create_feature(shape_record, field_names)) return buffer
def extraction_poste(chemin): '''Source : https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5 Nom fichier : laposte_hexasmal Données : nom de la commune code INSEE précision du lieu dit le code postal de la commune le libellé acheminement lieu dit ''' sf = Reader(chemin) shapeRecs = sf.shapeRecords() codes_postaux = [] for com in shapeRecs: info = com.record codes_postaux.append(info) return codes_postaux
def extract_shapefile(self, shapefile, output): """Extracts the dams within the bounding box of the shapefile.""" if not os.path.isfile(output + '.shp'): r = Reader(shapefile) bboxes = [r.shape(i).bbox for i in range(len(r.records()))] xmin = min([w for w, x, y, z in bboxes]) ymin = min([x for w, x, y, z in bboxes]) xmax = max([y for w, x, y, z in bboxes]) ymax = max([z for w, x, y, z in bboxes]) self.extract_bbox([xmin, ymin, xmax, ymax], output) else: print('dam shapefile exists\n')
def get_wdb_boundaries(resolution,level,rivers=False): polymeta = []; polybounds = [] if rivers: filename = os.path.join(GSHHS_DIR, 'WDBII_shp', resolution, 'WDBII_river_{}_L{:02}'.format(resolution, level)) else: filename = os.path.join(GSHHS_DIR, 'WDBII_shp', resolution, 'WDBII_border_{}_L{}'.format(resolution, level)) print filename shf = Reader(filename) fields = shf.fields for shprec in shf.shapeRecords(): shp = shprec.shape; rec = shprec.record parts = shp.parts.tolist() if parts != [0]: print 'multipart polygon' raise SystemExit verts = shp.points # Detect degenerate lines that are actually points... if len(verts) == 2 and np.allclose(verts[0], verts[1]): print 'Skipping degenerate line...' continue lons, lats = list(zip(*verts)) north = max(lats); south = min(lats) attdict={} for r,key in zip(rec,fields[1:]): attdict[key[0]]=r area = -1 poly_id = attdict['id'] b = np.empty((len(lons),2),np.float32) b[:,0] = lons; b[:,1] = lats if not rivers: b = interpolate_long_segments(b, resolution) if lsd is not None: b = quantize(b,lsd) polymeta.append([-1,-1,south,north,len(b),poly_id]) polybounds.append(b) return polybounds, polymeta
def _convert_csv_to_dbf(input_file, output_file, mapping_file=None, mapping_from=None, mapping_to=None, print_data_dict=False): if output_file is None: name = new_file_ending(input_file.name, '.dbf') output_file = open(name, 'w') if mapping_file: #Read this file and map it try: from shapefile import Reader except ImportError: print "pyshp required for mapping feature" raise dbfr = Reader(dbf=mapping_file) #find field thath as the mapping_from name #use -1 because pyshp adds a column for flagging deleted fields name_i = _find_field_index_dbf(dbfr.fields, mapping_from) - 1 map_values = [rec[name_i] for rec in dbfr.iterRecords()] # Parse the csv. parser = csv_parser(handle=input_file) header, fieldspecs, records = parser.parse() if mapping_file: csv_name_i = header.index(mapping_to) #be conservative and make sure they match if len(records) != len(map_values): raise Exception('mapping records lengths must match') #reorder the records so they match the original #This will reaise an error if something does not map mapped_records = [None]*len(map_values) for i in xrange(len(map_values)): mv = map_values[i] try: old_i = collect(records, csv_name_i).index(mv) except ValueError: raise ValueError('Could not find record name %s in csv' % mv) mapped_records[i] = records[old_i] records = mapped_records # Write to dbf. dbfwriter(output_file, header, fieldspecs, records) if print_data_dict: parser.write_dd(input_file.name, output_file)
def extraction_geofla_commune(chemin): sf = Reader(chemin) shapeRecs = sf.shapeRecords() points = shapeRecs[5].shape.points record = shapeRecs[5].record liste_communes = [] for com in shapeRecs: info = com.record insee_com = info[2] nom_com_maj = info[3] nom_com_min = info[3] if b"Capitale d'\xe9tat" == info[4]: status = "capitale" elif b"Pr\xe9fecture de d\xe9partement" == info[4]: status = "prefecture_departement" elif "Commune simple" == info[4]: status = "commune_simple" elif b'Sous-pr\xe9fecture' == info[4]: status = "sous-prefecture" elif b'Pr\xe9fecture de r\xe9gion' == info[4]: status = "prefecture_region" x_chf_lieu, y_chf_lieu = transform(lambert_93, wgs_84, info[5], info[6]) x_centroid, y_centroid = transform(lambert_93, wgs_84, info[7], info[8]) z_moyen = info[9] superficie = info[10] population = info[11] code_arr = info[12] code_dep = info[13] code_reg = info[15] limite = conversion_lambert93_wgs84(com.shape.points) limite_com = creer_commune_json(info, limite, [y_centroid, x_centroid]) commune = {"insee_com" : insee_com, "nom_com_maj" : nom_com_maj,\ "nom_com_min" : nom_com_min, "status" : status,\ "x_chf_lieu" : x_chf_lieu, "y_chf_lieu" : y_chf_lieu,\ "x_centroid" : x_centroid, "y_centroid" : y_centroid,\ "z_min" : z_moyen, "z_max" : z_moyen,\ "z_moyen" : z_moyen, "superficie" : superficie,\ "population" : population, "code_arr" : code_arr,\ "code_dep" : code_dep, "code_reg" : code_reg,\ "limite_com" : limite_com, "code_postal" : insee_com} #print(commune) liste_communes.append(commune) return liste_communes
def extraction_shp_departement(chemin): '''Structure info_departement 0 : Numéro departement 1 : Nom departement en majuscule 2 : Code geographique de la prefecture 3 : Code region 4 : limite en coordonne wgs84''' sf = Reader(chemin) shapeRecs = sf.shapeRecords() points = shapeRecs[5].shape.points record = shapeRecs[5].record info_departement = [] for dep in shapeRecs: info = dep.record limite = conversion_lambert93_wgs84_dep(dep.shape.points) departement_json = creer_departement_json(info, limite) info_departement.append([info[1], info[2], info[3], info[9],\ departement_json]) return info_departement
def extraction_geofla_departement(chemin): sf = Reader(chemin) shapeRecs = sf.shapeRecords() points = shapeRecs[5].shape.points record = shapeRecs[5].record liste_departements = [] for dep in shapeRecs: info = dep.record code_dep = info[1] nom_dep_maj = info[2] nom_dep_min = info[2] numero_insee_prefecture = info[1] + info[3] x_centroid, y_centroid = transform(lambert_93, wgs_84, info[7], info[8]) code_reg = info[9] limite = conversion_lambert93_wgs84(dep.shape.points) limite_dep = creer_departement_json(info, limite, [y_centroid, x_centroid]) departement = {"code_dep" : code_dep, "nom_dep_maj": nom_dep_maj,\ "nom_dep_min" : nom_dep_min, "numero_insee_prefecture" : numero_insee_prefecture,\ "x_centroid" : x_centroid, "y_centroid" : y_centroid,\ "code_reg" : code_reg, "limite_dep" : limite_dep} liste_departements.append(departement) return liste_departements
def plot_NRCM(lons, lats, bfile = None, sfile = None, space = 0.05, show = False, output = None): fig = pyplot.figure() sub = fig.add_subplot(111, aspect = 'equal') sub.set_title('Nested Regional Climate Model Grid Points') sub.scatter(lons, lats, marker = '+', c = 'r', s = 40) if bfile is not None: sf = Reader(bfile) boundary = sf.shape(0).points sub.add_patch(make_patch(boundary, (1, 0, 0, 0), width = 1.2)) if sfile is not None: sf = Reader(sfile) for s in sf.shapes(): boundary = s.points sub.add_patch(make_patch(boundary, (1, 0, 0, 0), width = 0.2)) sub.set_xlabel('Longitude, Decimal Degrees', size = 13) sub.set_ylabel('Latitude, Decimal Degrees', size = 13) xmin, ymin, xmax, ymax = get_boundaries(sf.shapes(), space = space) pyplot.xlim([xmin, xmax]) pyplot.ylim([ymin, ymax]) if output is not None: pyplot.savefig(output) if show: pyplot.show() pyplot.clf() pyplot.close()
def get_coast_polygons(resolution): polymeta = [] polybounds = [] for level in [1, 2, 3, 4]: filename = "GSHHS_shp/%s/GSHHS_%s_L%s" % (resolution, resolution, level) # filename = 'WDBII_shp/%s/WDBII_border_%s_L%s' % (resolution, resolution, level) print filename shf = Reader(filename) fields = shf.fields try: shf.shapeRecords() except: continue for shprec in shf.shapeRecords(): shp = shprec.shape rec = shprec.record parts = shp.parts.tolist() if parts != [0]: print "multipart polygon" raise SystemExit verts = shp.points lons, lats = list(zip(*verts)) north = max(lats) south = min(lats) attdict = {} for r, key in zip(rec, fields[1:]): attdict[key[0]] = r area = attdict["area"] id = attdict["id"] polymeta.append([level, area, south, north, len(lons), id]) b = np.empty((len(lons), 2), np.float32) b[:, 0] = lons b[:, 1] = lats if lsd is not None: b = quantize(b, lsd) polybounds.append(b) return polybounds, polymeta
def extract_shapefile(self, shapefile, output): """Extracts the dams within the bounding box of the shapefile.""" if not os.path.isfile(output + '.shp'): if os.path.isfile(shapefile + '.shp'): r = Reader(shapefile) else: print('error: shapefile {} does not exist'.format(shapefile)) raise self.extract_bbox(r.bbox, output) else: print('dam shapefile {} exists\n'.format(output))
def find_NED(self, catchmentfile): """Parses the elevation rasters to find the one where the HUC8 is located.""" shapefile = Reader(catchmentfile) f = None for nedfile in self.nedfiles: t,v = get_raster_table(nedfile, shapefile.bbox, 'int32', quiet = True) if t is not None: f = nedfile break if f is not None: return f else: print('warning: unable to find NED file') raise
def trim_shapefile( in_path: Union[Path, str], join_on: str, include: list, out_path: Union[Path, str, None] = None, ) -> Union[Path, str]: """Trims a shapefile to only include shapes that match the given criteria. Shapes will be discarded unless their 'join_on' property is contained in the 'include' list. """ # Resolve the shapefile path (allows in_path to point to directory with same # name as nested shapefile) in_path = resolve_shapefile_path(in_path) # Construct new name if it was not provided if out_path is None: out_path = in_path.with_name(f"{in_path.name}_trimmed{in_path.suffix}") with Reader(str(in_path)) as r, Writer(str(out_path)) as w: w.fields = r.fields[1:] # don't copy deletion field if join_on not in [f[0] for f in w.fields]: raise ValueError(f"'join_on'={join_on} not in shapefile fields: {w.fields}") # Copy features if they match the criteria for feature in r.iterShapeRecords(): if feature.record[join_on] in include: w.record(*feature.record) w.shape(feature.shape) # PyShp doesn't manage .prj file, must copy manually. in_prj = in_path.with_suffix(".prj") if in_prj.exists(): out_prj = out_path.with_suffix(".prj") shutil.copy(in_prj, out_prj) return out_path
# let's get the daily tmin, tmax, dewpoint, wind speed and solar tmax = processor.aggregate("GSOD", "tmax", start, end) tmin = processor.aggregate("GSOD", "tmin", start, end) dewt = processor.aggregate("GSOD", "dewpoint", start, end) wind = processor.aggregate("GSOD", "wind", start, end) solar = processor.aggregate("NSRDB", "metstat", start, end) # use the ETCalculator to estimate the evapotranspiration time series calculator = ETCalculator() # some of the parameters in the Penman-Monteith Equation depend on the # geographic location so get the average longitude, latitude, and elevation sf = Reader(filename) # make a list of the fields for each shape fields = [f[0] for f in sf.fields] # get the area, centroid and elevation of each shape areas = [r[fields.index("AreaSqKm") - 1] for r in sf.records()] xs = [r[fields.index("CenX") - 1] for r in sf.records()] ys = [r[fields.index("CenY") - 1] for r in sf.records()] zs = [r[fields.index("AvgElevM") - 1] for r in sf.records()] # get the areal-weighted averages lon = sum([a * x for a, x in zip(areas, xs)]) / sum(areas)
# the extractor can also extract data to a new shapefile using a bounding box bbox = -78, 38, -75, 40 # output file name bboxfile = 'bbox' nidextractor.extract_bbox(bbox, bboxfile) # let's use pyshp to open up the patuxent shapefile and get some info about the # dams that we downloaded from shapefile import Reader sf = Reader(damfile) # these are the attributes of each dam stored in the NID name_index = sf.fields.index(['DAM_NAME', 'C', 65, 0]) - 1 nid_index = sf.fields.index(['NIDID', 'C', 7, 0]) - 1 lon_index = sf.fields.index(['LONGITUDE', 'N', 19, 11]) - 1 lat_index = sf.fields.index(['LATITUDE', 'N', 19, 11]) - 1 river_index = sf.fields.index(['RIVER', 'C', 65, 0]) - 1 owner_index = sf.fields.index(['OWN_NAME', 'C', 65, 0]) - 1 type_index = sf.fields.index(['DAM_TYPE', 'C', 10, 0]) - 1 purp_index = sf.fields.index(['PURPOSES', 'C', 254, 0]) - 1 year_index = sf.fields.index(['YR_COMPL', 'C', 10, 0]) - 1 high_index = sf.fields.index(['NID_HEIGHT', 'N', 19, 11]) - 1 mstor_index = sf.fields.index(['MAX_STOR', 'N', 19, 11]) - 1 nstor_index = sf.fields.index(['NORMAL_STO', 'N', 19, 11]) - 1
calculator.add_timeseries('tmin', 'daily', start, tmin) calculator.add_timeseries('tmax', 'daily', start, tmax) calculator.add_timeseries('dewpoint', 'daily', start, dewt) calculator.add_timeseries('wind', 'daily', start, wind) calculator.add_timeseries('solar', 'daily', start, solar) # the temperature and dewpoint are assumed to be in C, wind speed in m/s, and # solar radiation in W/m2; these are the units supplied by the other classes # in PyHSPF already so no manipulation is needed # some of the parameters in the Penman-Monteith Equation depend on the # geographic location so let's use the information in the shapefile to # provide the average longitude, latitude, and elevation sf = Reader(filename) # make a list of the fields for each shape fields = [f[0] for f in sf.fields] # get the area, centroid and elevation of each shape areas = [r[fields.index('AreaSqKm') - 1] for r in sf.records()] xs = [r[fields.index('CenX') - 1] for r in sf.records()] ys = [r[fields.index('CenY') - 1] for r in sf.records()] zs = [r[fields.index('AvgElevM') - 1] for r in sf.records()] # get the areal-weighted averages lon = sum([a * x for a, x in zip(areas, xs)]) / sum(areas)
cfpu = f.variables["fpu"][:] for c in ["maize", "wheat", "soy", "rice"]: careas[c] = f.variables["area_" + c][:] # find valid fpus tarea = 100 * (111.2 / 2) ** 2 * cos(pi * lats / 180) tarea = resize(tarea, (nlons, nlats)).T validfpus = [] for i in range(nfpu): hareafpu = harea[fpumap == fpu[i]].sum() tareafpu = tarea[fpumap == fpu[i]].sum() if hareafpu / tareafpu > percent / 100.0: validfpus.append(fpu[i]) # load shape file r = Reader(shapefile) shapes = r.shapes() records = r.records() models = ["epic", "gepic", "lpj-guess", "lpjml", "pdssat", "pegasus"] # exclude image gcms = ["gfdl-esm2m", "hadgem2-es", "ipsl-cm5a-lr", "miroc-esm-chem", "noresm1-m"] crops = ["maize", "wheat", "soy", "rice"] if crop == "all" else [crop] co2s = ["co2", "noco2"] hadgemidx = gcms.index("hadgem2-es") nm, ng, ncr, nco2 = len(models), len(gcms), len(crops), len(co2s) # variables sh = (nm, ng, ncr, 3, nfpu, nco2) dy26arr = masked_array(zeros(sh), mask=ones(sh))
def merge_shapes(inputfile, outputfile = None, overwrite = False, verbose = True, vverbose = False): """Merges all the shapes in a shapefile into a single shape.""" if outputfile is None: output = '{}/merged'.format(os.getcwd()) if os.path.isfile(outputfile + '.shp') and not overwrite: if verbose: print('combined watershed shapefile %s exists' % outputfile) return if verbose: print('combining shapes from {}\n'.format(inputfile) + 'this may take a while...\n') # start by copying the projection files shutil.copy(inputfile + '.prj', outputfile + '.prj') # load the catchment and flowline shapefiles r = Reader(inputfile, shapeType = 5) n = len(r.records()) try: shapes = [] records = [] bboxes = [] for i in range(n): shape = r.shape(i) record = r.record(i) shape_list = format_shape(shape.points) for sh in shape_list: shapes.append(sh) records.append(record) bboxes.append(shape.bbox) try: combined = combine_shapes(shapes, bboxes, verbose = vverbose) except: if verbose: print('trying alternate trace method') combined = combine_shapes(shapes, bboxes, skip = True, verbose = vverbose) except: if verbose: print('trying alternate trace method') shapes = [] records = [] bboxes = [] for i in range(n): shape = r.shape(i) record = r.record(i) shape_list = format_shape(shape.points, omit = True) for sh in shape_list: shapes.append(sh) records.append(record) bboxes.append(shape.bbox) try: combined = combine_shapes(shapes, bboxes, verbose = vverbose) except: if verbose: print('trying alternate trace method') combined = combine_shapes(shapes, bboxes, skip = True, verbose = vverbose) # create the new file with the merged shapes w = Writer(shapeType = 5) w.poly(shapeType = 5, parts = [combined]) # copy the fields from the original and then the first record; note this # can be adapted as needed for field in r.fields: w.field(*field) w.record(*r.record(0)) w.save(outputfile) if verbose: print('successfully combined shapes from %s to %s\n' % (inputfile, outputfile))
# download the metadata for all NWIS gages (will be skipped if it exists) extractor.download_metadata() # download all the data for the gage, including measured values of stage, # discharge, channel width, and channel area and save it to "gageid" file # (will be skipped if it already exists) extractor.download_gagedata(gageid, start, end, output = gageid) # need to know the reach length; so find the location of the gage, then find # the flowline in the shapefile and use the record info to get the length # first use the NWIS metadata file to get the latitude and longitude of the gage reader = Reader('{}/USGS_Streamgages-NHD_Locations.shp'.format(NWIS)) # find the record index for the NWIS gage ids i = [f[0] for f in reader.fields].index('SITE_NO') - 1 # find the index of the gage j = [r[i] for r in reader.records()].index(gageid) # use the index to get the latitude and longitude of the station x, y = reader.shape(j).points[0] print('location of gage {}: {:.4f}, {:.4f}\n'.format(gageid, x, y))
def plot_gage_subbasin(self, hspfmodel, folder): """Makes a plot of the subbasin area.""" subbasinfile = '{}/subbasins'.format(folder) boundaryfile = '{}/boundary'.format(folder) flowfile = '{}/flowlines'.format(folder) combinedfile = '{}/combined'.format(folder) watershedplot = '{}/watershed.png'.format(folder) # make a shapefile of the subbasins for the watershed f = '{0}/{1}/{1}subbasins'.format(self.directory, self.HUC8) for out in (subbasinfile, boundaryfile, flowfile, combinedfile): if not os.path.isfile(out + '.prj'): shutil.copy(f + '.prj', out + '.prj') if not os.path.isfile(subbasinfile + '.shp'): subshapes = [] subrecords = [] for subbasin in hspfmodel.subbasins: f = '{0}/{1}/{2}/combined'.format(self.directory, self.HUC8, subbasin) s = Reader(f, shapeType = 5) subshapes.append(s.shape(0).points) subrecords.append(s.record(0)) w = Writer(shapeType = 5) for field in s.fields: w.field(*field) for record in subrecords: w.record(*record) for shape in subshapes: w.poly(shapeType = 5, parts = [shape]) w.save(subbasinfile) if not os.path.isfile(combinedfile + '.shp'): fshapes = [] frecords = [] for subbasin in hspfmodel.subbasins: f = '{0}/{1}/{2}/combined_flowline'.format(self.directory, self.HUC8, subbasin) r = Reader(f, shapeType = 3) fshapes.append(r.shape(0).points) frecords.append(r.record(0)) w = Writer(shapeType = 3) for field in r.fields: w.field(*field) for record in frecords: w.record(*record) for shape in fshapes: w.poly(shapeType = 3, parts = [shape]) w.save(combinedfile) # merge the shapes into a watershed if not os.path.exists(boundaryfile + '.shp'): merge_shapes(subbasinfile, outputfile = boundaryfile) # make a flowline file for the subbasins for the watershed if not os.path.isfile(flowfile + '.shp'): shapes = [] records = [] for subbasin in hspfmodel.subbasins: f = '{0}/{1}/{2}/flowlines'.format(self.directory, self.HUC8, subbasin) r = Reader(f, shapeType = 3) for shape in r.shapes(): shapes.append(shape.points) for record in r.records(): records.append(record) w = Writer(shapeType = 3) for field in r.fields: w.field(*field) for record in records: w.record(*record) for shape in shapes: w.poly(shapeType = 3, parts = [shape]) w.save(flowfile) if not os.path.isfile(watershedplot): plot_gage_subbasin(folder, self.HUC8, self.gageid, hspfmodel, output = watershedplot)
def climate(self, HUC8, s, e, verbose = True, ): subbasinfile = '{}/subbasin_catchments'.format(self.hydrography) climatedata = '{}/{}/climate'.format(self.output, HUC8) # make a directory for the climate data and time series if not os.path.isdir(climatedata): os.mkdir(climatedata) # use the Climateprocessor to get the data climateprocessor = ClimateProcessor() climateprocessor.download_shapefile(subbasinfile, s, e, climatedata, space = 0.5) # make directories for hourly and daily aggregated timeseries hourly = '{}/hourly'.format(climatedata) daily = '{}/daily'.format(climatedata) if not os.path.isdir(hourly): os.mkdir(hourly) if not os.path.isdir(daily): os.mkdir(daily) # aggregate the daily GSOD tmin, tmax, dewpoint, and wind data tmin = '{}/tmin'.format(daily) tmax = '{}/tmax'.format(daily) dewt = '{}/dewpoint'.format(daily) wind = '{}/wind'.format(daily) if not os.path.isfile(tmin): ts = s, 1440, climateprocessor.aggregate('GSOD', 'tmin', s, e) with open(tmin, 'wb') as f: pickle.dump(ts, f) if not os.path.isfile(tmax): ts = s, 1440, climateprocessor.aggregate('GSOD', 'tmax', s, e) with open(tmax, 'wb') as f: pickle.dump(ts, f) if not os.path.isfile(dewt): ts = s, 1440, climateprocessor.aggregate('GSOD','dewpoint', s,e) with open(dewt, 'wb') as f: pickle.dump(ts, f) if not os.path.isfile(wind): ts = s, 1440, climateprocessor.aggregate('GSOD', 'wind', s, e) with open(wind, 'wb') as f: pickle.dump(ts, f) # aggregate the daily GHCND snowfall and snowdepth data snowfall = '{}/snowfall'.format(daily) snowdepth = '{}/snowdepth'.format(daily) if not os.path.isfile(snowfall): ts = s, 1440, climateprocessor.aggregate('GHCND','snowfall', s, e) with open(snowfall, 'wb') as f: pickle.dump(ts, f) if not os.path.isfile(snowdepth): ts = s, 1440,climateprocessor.aggregate('GHCND','snowdepth', s, e) with open(snowdepth, 'wb') as f: pickle.dump(ts, f) # find stations with pan evaporation data from GHCND evapstations = [] for k, v in climateprocessor.metadata.ghcndstations.items(): # check if the station has any evaporation data if v['evap'] > 0: # open up the file and get the data with open(k, 'rb') as f: station = pickle.load(f) data = station.make_timeseries('evaporation', s, e) # ignore datasets with no observations during the period observations = [v for v in data if v is not None] if len(observations) > 0: evapstations.append(k) # aggregate the hourly NSRDB metstat data hsolar = '{}/solar'.format(hourly) if not os.path.isfile(hsolar): ts = s, 60, climateprocessor.aggregate('NSRDB', 'metstat', s, e) with open(hsolar, 'wb') as f: pickle.dump(ts, f) # aggregate the hourly solar to daily dsolar = '{}/solar'.format(daily) if not os.path.isfile(dsolar): with open(hsolar, 'rb') as f: t, tstep, data = pickle.load(f) ts = s, 1440, [sum(data[i:i+24]) / 24 for i in range(0, 24 * (e-s).days, 24)] with open(dsolar, 'wb') as f: pickle.dump(ts, f) # aggregate the hourly precipitation for each subbasin using IDWA precip = '{}/hourlyprecipitation'.format(climatedata) if not os.path.isdir(precip): os.mkdir(precip) # use the subbasin shapefile to get the location of the centroids sf = Reader(subbasinfile) # index of the comid, latitude, and longitude records comid_index = [f[0] for f in sf.fields].index('ComID') - 1 lon_index = [f[0] for f in sf.fields].index('CenX') - 1 lat_index = [f[0] for f in sf.fields].index('CenY') - 1 elev_index = [f[0] for f in sf.fields].index('AvgElevM') - 1 area_index = [f[0] for f in sf.fields].index('AreaSqKm') - 1 # iterate through the shapefile records and aggregate the timeseries for i in range(len(sf.records())): record = sf.record(i) comid = record[comid_index] lon = record[lon_index] lat = record[lat_index] # check if the aggregated time series exists or calculate it subbasinprecip = '{}/{}'.format(precip, comid) if not os.path.isfile(subbasinprecip): if verbose: i = comid, lon, lat print('aggregating timeseries for comid ' + '{} at {}, {}\n'.format(*i)) p = climateprocessor.aggregate('precip3240', 'precip', s, e, method = 'IDWA', longitude = lon, latitude = lat) ts = s, 60, p with open(subbasinprecip, 'wb') as f: pickle.dump(ts, f) # make a directory for the evapotranspiration time series evapotranspiration = '{}/evapotranspiration'.format(climatedata) if not os.path.isdir(evapotranspiration): os.mkdir(evapotranspiration) # use the ETCalculator to calculate the ET time series etcalculator = ETCalculator() # get the centroid of the watershed from the subbasin shapefile areas = [r[area_index] for r in sf.records()] xs = [r[lon_index] for r in sf.records()] ys = [r[lat_index] for r in sf.records()] zs = [r[elev_index] for r in sf.records()] # get the areal-weighted averages lon = sum([a * x for a, x in zip(areas, xs)]) / sum(areas) lat = sum([a * y for a, y in zip(areas, ys)]) / sum(areas) elev = sum([a * z for a, z in zip(areas, zs)]) / sum(areas) # add them to the ETCalculator etcalculator.add_location(lon, lat, elev) # check if the daily RET exists; otherwise calculate it dRET = '{}/dailyRET'.format(evapotranspiration) if not os.path.isfile(dRET): # add the daily time series to the calculator with open(tmin, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('tmin', tstep, t, data) with open(tmax, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('tmax', tstep, t, data) with open(dewt, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('dewpoint', tstep, t, data) with open(wind, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('wind', tstep, t, data) with open(dsolar, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('solar', tstep, t, data) # calculate the daily RET etcalculator.penman_daily(s, e) ts = s, 1440, etcalculator.daily['RET'][1] with open(dRET, 'wb') as f: pickle.dump(ts, f) # disaggregate the daily temperature time series to hourly hourlytemp = '{}/temperature'.format(hourly) if not os.path.isfile(hourlytemp): if etcalculator.daily['tmin'] is None: with open(tmin, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('tmin', tstep, t, data) if etcalculator.daily['tmax'] is None: with open(tmax, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('tmax', tstep, t, data) data = etcalculator.interpolate_temperatures(s, e) tstep = 60 ts = t, tstep, data with open(hourlytemp, 'wb') as f: pickle.dump(ts, f) etcalculator.add_timeseries('temperature', tstep, t, data) # disaggregate the dewpoint and wind speed time series to hourly hourlydewt = '{}/dewpoint'.format(hourly) if not os.path.isfile(hourlydewt): if etcalculator.daily['dewpoint'] is None: with open(dewt, 'rb') as f: t, tstep, data = pickle.load(f) else: t, data = etcalculator.daily['dewpoint'] tstep = 60 data = [v for v in data for i in range(24)] ts = t, tstep, data with open(hourlydewt, 'wb') as f: pickle.dump(ts, f) etcalculator.add_timeseries('dewpoint', tstep, t, data) hourlywind = '{}/wind'.format(hourly) if not os.path.isfile(hourlywind): if etcalculator.daily['wind'] is None: with open(wind, 'rb') as f: t, tstep, data = pickle.load(f) else: t, data = etcalculator.daily['wind'] tstep = 60 data = [v for v in data for i in range(24)] ts = t, tstep, data with open(hourlywind, 'wb') as f: pickle.dump(ts, f) etcalculator.add_timeseries('wind', tstep, t, data) # check if the hourly RET exists; otherwise calculate it hRET = '{}/hourlyRET'.format(evapotranspiration) if not os.path.isfile(hRET): required = 'temperature', 'solar', 'dewpoint', 'wind' for tstype in required: if etcalculator.hourly[tstype] is None: name = '{}/{}'.format(hourly, tstype) with open(name, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries(tstype, tstep, t, data) # calculate and save the hourly RET etcalculator.penman_hourly(s, e) ts = s, 60, etcalculator.hourly['RET'][1] with open(hRET, 'wb') as f: pickle.dump(ts, f) # add the daily time series for the plot required = 'tmin', 'tmax', 'dewpoint', 'wind', 'solar' for tstype in required: if etcalculator.daily[tstype] is None: name = '{}/{}'.format(daily, tstype) with open(name, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries(tstype, tstep, t, data) # aggregate the hourly to daily for plotting hRET = etcalculator.hourly['RET'][1] dRET = [sum(hRET[i:i+24]) for i in range(0, len(hRET), 24)] etcalculator.add_timeseries('RET', 'daily', s, dRET) name = '{}/referenceET'.format(evapotranspiration) etcalculator.plotET(stations = evapstations, output = name, show = False) name = '{}/dayofyearET'.format(evapotranspiration) etcalculator.plotdayofyear(stations = evapstations, output = name, show = False) # calculate hourly PET for different land use categories lucs = ('corn', 'soybeans', 'grains', 'alfalfa', 'fallow', 'pasture', 'wetlands', 'others', ) colors = ('yellow', 'green', 'brown', 'lime', 'gray', 'orange', 'blue', 'black', ) pdates = (datetime.datetime(2000, 4, 15), datetime.datetime(2000, 5, 15), datetime.datetime(2000, 4, 15), datetime.datetime(2000, 5, 15), datetime.datetime(2000, 3, 1), datetime.datetime(2000, 3, 1), datetime.datetime(2000, 3, 1), datetime.datetime(2000, 3, 1), ) ems = (30, 20, 20, 10, 10, 10, 10, 10, ) gs = (50, 30, 30, 10, 10, 10, 10, 10, ) fs = (60, 60, 60, 120, 240, 240, 240, 240, ) ls = (40, 30, 40, 10, 10, 10, 10, 10, ) Kis = (0.30, 0.40, 0.30, 0.30, 0.30, 0.30, 1.00, 1.00, ) Kms = (1.15, 1.15, 1.15, 0.95, 0.30, 0.85, 1.20, 1.00, ) Kls = (0.40, 0.55, 0.40, 0.90, 0.30, 0.30, 1.00, 1.00, ) # add the hourly RET time series if it isn't present if etcalculator.hourly['RET'] is None: with open(hRET, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('RET', tstep, t, data) # iterate through the land use categories and calculate PET for i in zip(lucs, colors, pdates, ems, gs, fs, ls, Kis, Kms, Kls): crop, c, plant, emergence, growth, full, late, Ki, Km, Kl = i # add the information and calculate the PET time series etcalculator.add_crop(crop, plant, emergence, growth, full, late, Ki, Km, Kl, ) etcalculator.hourly_PET(crop, s, e) # get the PET time series t, PET = etcalculator.hourlyPETs[crop] ts = t, 60, PET # save it name = '{}/{}'.format(evapotranspiration, crop) with open(name, 'wb') as f: pickle.dump(ts, f)
def extract_bbox(self, bbox, output, verbose = True): """Extracts the NID dam locations for a watershed from the dam shapefile and the 8-digit hydrologic unit code of interest. """ self.download_compressed() xmin, ymin, xmax, ymax = bbox # copy the projection files if verbose: print('copying the projections from the NID source\n') projection = self.source + '.prj' shutil.copy(projection, output + '.prj') # get the dams within the watershed if verbose: print('reading the dam file\n') sf = Reader(self.source, shapeType = 1) # work around for issues with pyshp damrecords = [] for i in range(len(sf.shapes())): try: damrecords.append(sf.record(i)) except: damrecords.append([-100 for i in range(len(sf.fields))]) name_index = sf.fields.index(['DAM_NAME', 'C', 65, 0]) - 1 nid_index = sf.fields.index(['NIDID', 'C', 7, 0]) - 1 long_index = sf.fields.index(['LONGITUDE', 'N', 19, 11]) - 1 lat_index = sf.fields.index(['LATITUDE', 'N', 19, 11]) - 1 river_index = sf.fields.index(['RIVER', 'C', 65, 0]) - 1 owner_index = sf.fields.index(['OWN_NAME', 'C', 65, 0]) - 1 type_index = sf.fields.index(['DAM_TYPE', 'C', 10, 0]) - 1 purp_index = sf.fields.index(['PURPOSES', 'C', 254, 0]) - 1 year_index = sf.fields.index(['YR_COMPL', 'C', 10, 0]) - 1 high_index = sf.fields.index(['NID_HEIGHT', 'N', 19, 11]) - 1 mstor_index = sf.fields.index(['MAX_STOR', 'N', 19, 11]) - 1 nstor_index = sf.fields.index(['NORMAL_STO', 'N', 19, 11]) - 1 area_index = sf.fields.index(['SURF_AREA', 'N', 19, 11]) - 1 # iterate through the fields and determine which points are in the box if verbose: print('extracting dams into new file\n') dam_indices = [] i = 0 for record in damrecords: lat = record[lat_index] lon = record[long_index] if self.inside_box([xmin, ymin], [xmax, ymax], [lon, lat]): dam_indices.append(i) i+=1 # write the data from the bbox to a new shapefile w = Writer(shapeType = 1) for field in sf.fields: w.field(*field) for i in dam_indices: point = sf.shape(i).points[0] w.point(*point) values = damrecords[i] rs = [] for value in values: if isinstance(value, bytes): value = value.decode('utf-8') rs.append(value) w.record(*rs) w.save(output) if verbose: print('successfully extracted NID dam locations to new file\n')
def build_watershed(self, subbasinfile, flowfile, outletfile, damfile, gagefile, landfiles, VAAfile, years, HUC8, filename, plotname = None, ): # create a dictionary to store subbasin data subbasins = {} # create a dictionary to keep track of subbasin inlets inlets = {} # read in the flow plane data into an instance of the FlowPlane class sf = Reader(subbasinfile, shapeType = 5) comid_index = sf.fields.index(['ComID', 'N', 9, 0]) - 1 len_index = sf.fields.index(['PlaneLenM', 'N', 8, 2]) - 1 slope_index = sf.fields.index(['PlaneSlope', 'N', 9, 6]) - 1 area_index = sf.fields.index(['AreaSqKm', 'N', 10, 2]) - 1 cx_index = sf.fields.index(['CenX', 'N', 12, 6]) - 1 cy_index = sf.fields.index(['CenY', 'N', 12, 6]) - 1 elev_index = sf.fields.index(['AvgElevM', 'N', 8, 2]) - 1 for record in sf.records(): comid = '{}'.format(record[comid_index]) length = record[len_index] slope = record[slope_index] tot_area = record[area_index] centroid = [record[cx_index], record[cy_index]] elevation = record[elev_index] subbasin = Subbasin(comid) subbasin.add_flowplane(length, slope, centroid, elevation) subbasins[comid] = subbasin # read in the flowline data to an instance of the Reach class sf = Reader(flowfile) outcomid_index = sf.fields.index(['OutComID', 'N', 9, 0]) - 1 gnis_index = sf.fields.index(['GNIS_NAME', 'C', 65, 0]) - 1 reach_index = sf.fields.index(['REACHCODE', 'C', 8, 0]) - 1 incomid_index = sf.fields.index(['InletComID', 'N', 9, 0]) - 1 maxelev_index = sf.fields.index(['MaxElev', 'N', 9, 2]) - 1 minelev_index = sf.fields.index(['MinElev', 'N', 9, 2]) - 1 slopelen_index = sf.fields.index(['SlopeLenKM', 'N', 6, 2]) - 1 slope_index = sf.fields.index(['Slope', 'N', 8, 5]) - 1 inflow_index = sf.fields.index(['InFlowCFS', 'N', 8, 3]) - 1 outflow_index = sf.fields.index(['OutFlowCFS', 'N', 8, 3]) - 1 velocity_index = sf.fields.index(['VelFPS', 'N', 7, 4]) - 1 traveltime_index = sf.fields.index(['TravTimeHR', 'N', 8, 2]) - 1 for record in sf.records(): outcomid = '{}'.format(record[outcomid_index]) gnis = record[gnis_index] reach = record[reach_index] incomid = '{}'.format(record[incomid_index]) maxelev = record[maxelev_index] / 100 minelev = record[minelev_index] / 100 slopelen = record[slopelen_index] slope = record[slope_index] inflow = record[inflow_index] outflow = record[outflow_index] velocity = record[velocity_index] traveltime = record[traveltime_index] if isinstance(gnis, bytes): gnis = '' subbasin = subbasins[outcomid] flow = (inflow + outflow) / 2 subbasin.add_reach(gnis, maxelev, minelev, slopelen, flow = flow, velocity = velocity, traveltime = traveltime) inlets[outcomid] = incomid # open up the outlet file and see if the subbasin has a gage or dam sf = Reader(outletfile) records = sf.records() comid_index = sf.fields.index(['COMID', 'N', 9, 0]) - 1 nid_index = sf.fields.index(['NIDID', 'C', 7, 0]) - 1 nwis_index = sf.fields.index(['SITE_NO', 'C', 15, 0]) - 1 nids = {'{}'.format(r[comid_index]):r[nid_index] for r in records if isinstance(r[nid_index], str)} nwiss = {'{}'.format(r[comid_index]):r[nwis_index] for r in records if r[nwis_index] is not None} # open up the dam file and read in the information for the dams sf = Reader(damfile) records = sf.records() name_index = sf.fields.index(['DAM_NAME', 'C', 65, 0]) - 1 nid_index = sf.fields.index(['NIDID', 'C', 7, 0]) - 1 long_index = sf.fields.index(['LONGITUDE', 'N', 19, 11]) - 1 lat_index = sf.fields.index(['LATITUDE', 'N', 19, 11]) - 1 river_index = sf.fields.index(['RIVER', 'C', 65, 0]) - 1 owner_index = sf.fields.index(['OWN_NAME', 'C', 65, 0]) - 1 type_index = sf.fields.index(['DAM_TYPE', 'C', 10, 0]) - 1 purp_index = sf.fields.index(['PURPOSES', 'C', 254, 0]) - 1 year_index = sf.fields.index(['YR_COMPL', 'C', 10, 0]) - 1 high_index = sf.fields.index(['NID_HEIGHT', 'N', 19, 11]) - 1 mstor_index = sf.fields.index(['MAX_STOR', 'N', 19, 11]) - 1 nstor_index = sf.fields.index(['NORMAL_STO', 'N', 19, 11]) - 1 area_index = sf.fields.index(['SURF_AREA', 'N', 19, 11]) - 1 # iterate through the subbasins and see if they have a dam for comid, subbasin in subbasins.items(): if comid in nids: # if the subbasin has a dam, find the data info in the file nid = nids[comid] r = records[[r[nid_index] for r in records].index(nid)] subbasin.add_dam(nid, r[name_index], r[long_index], r[lat_index], r[river_index], r[owner_index], r[type_index], r[purp_index], r[year_index], r[high_index], r[mstor_index], r[nstor_index], r[area_index], ) # read in the landuse data from the csv files for year in years: csvfile = '{}/{}landuse.csv'.format(landfiles, year) with open(csvfile, 'r') as f: reader = csv.reader(f) rows = [r for r in reader] # organize the data comids = [r[0] for r in rows[3:]] categories = rows[2][2:] emptys = [r[1] for r in rows[3:]] data = [r[2:] for r in rows[3:]] for comid, subbasin in subbasins.items(): i = comids.index(comid) subbasin.add_landuse(year, categories, data[i]) # create an instance of the Watershed class watershed = Watershed(HUC8, subbasins) # open up the flowline VAA file to use to establish mass linkages with open(VAAfile, 'rb') as f: flowlines = pickle.load(f) # create a dictionary to connect the comids to hydroseqs hydroseqs = {'{}'.format(flowlines[f].comid): flowlines[f].hydroseq for f in flowlines} # establish the mass linkages using a dictionary "updown" and a list of # head water subbasins updown = {} for comid, subbasin in watershed.subbasins.items(): # get the flowline instance for the outlet comid flowline = flowlines[hydroseqs[comid]] # check if the subbasin is a watershed inlet or a headwater source inlet = hydroseqs[inlets[comid]] if flowlines[inlet].up in flowlines: i = '{}'.format(flowlines[flowlines[inlet].up].comid) subbasin.add_inlet(i) elif flowlines[inlet].up != 0: watershed.add_inlet(comid) else: watershed.add_headwater(comid) # check if the subbasin is a watershed outlet, and if it is not # then find the downstream reach if flowline.down in flowlines: flowline = flowlines[flowline.down] while '{}'.format(flowline.comid) not in subbasins: flowline = flowlines[flowline.down] updown[comid] = '{}'.format(flowline.comid) else: updown[comid] = 0 watershed.add_outlet('{}'.format(comid)) # add the updown dictionary to show mass linkage in the reaches watershed.add_mass_linkage(updown) with open(filename, 'wb') as f: pickle.dump(watershed, f) if plotname is not None and not os.path.isfile(plotname + '.png'): self.plot_mass_flow(watershed, plotname)
def plot_landuse(self, landuse, catchments, attribute, categoryfile, output = None, datatype = 'raw', overwrite = False, pixels = 1000, border = 0.02, lw = 0.5, show = False, verbose = True, vverbose = False ): """ Makes a plot of the landuse of a catchment shapefile on top of a raster landuse file. """ if self.order is None: print('error: no landuse aggregation file information provided\n') raise self.read_categoryfile(categoryfile) if verbose: print('generating a {} land use plot\n'.format(datatype)) # make the figure fig = pyplot.figure() subplot = fig.add_subplot(111, aspect = 'equal') subplot.tick_params(axis = 'both', which = 'major', labelsize = 11) # add the title if datatype == 'results': title = 'Land Use Fractions' else: title = 'Raw Land Use Data' subplot.set_title(title, size = 14) # open the shapefile and get the bounding box s = Reader(catchments, shapeType = 5) xmin, ymin, xmax, ymax = s.bbox # get the index of the field for the attribute matching index = [f[0] for f in s.fields].index(attribute) - 1 # set up a custom colormap using the rgbs supplied in the aggregate file color_table = [(self.reds[g] / 255, self.greens[g] / 255, self.blues[g] / 255) for g in self.order] cmap = colors.ListedColormap(color_table) # provide the cutoff boundaries for the mapping of values to the table bounds = [i-0.5 for i in range(len(self.order)+1)] # create a norm to map the bounds to the colors norm = colors.BoundaryNorm(bounds, cmap.N) # get the pixel width and origin w = (xmax - xmin) / pixels # calculate the image array height and the height of a pixel height = int(numpy.ceil((ymax - ymin) / (xmax - xmin)) * pixels) h = (ymax - ymin) / height # set up the image array image_array = numpy.zeros((height, pixels), dtype = 'uint8') # get the land use fraction for each category if datatype == 'results': # iterate through the shapes and make patches for i in range(len(s.records())): comid = s.record(i)[index] points = numpy.array(s.shape(i).points) # convert the shape to pixel coordinates pixel_polygon = [(get_pixel(x, xmin, w), get_pixel(y, ymin, h)) for x, y in points] # make a PIL image to use as a mask rasterpoly = Image.new('L', (pixels, height), 1) rasterize = ImageDraw.Draw(rasterpoly) # rasterize the polygon rasterize.polygon(pixel_polygon, 0) # convert the PIL array to numpy boolean to use as a mask mask = 1 - numpy.array(rasterpoly) # get the total number of pixels in the shape tot = mask.sum() # iterate from left to right and get the fraction of the total # area inside the shape as a function of x (takes into account # the depth) fractions = [column.sum() / tot for column in mask.transpose()] area_cdf = [sum(fractions[:i+1]) for i in range(len(fractions))] # convert the land use fractions into a land use cdf fractions = [self.landuse[comid][g] for g in self.order] land_cdf = [sum(fractions[:i+1]) for i in range(len(fractions))] # use the area cdf to determine the break points for the land # use patches. note this array does not account for the masking # of the patch. thus there are n+1 vertical bands. the first # and last are the "empty" (first in the aggregate file). in # between the break points are determined from the area cdf. color_array = numpy.zeros(len(mask[0]), dtype = 'uint8') # find the break point for each band by looping through the land # ues cdf and filling from left to right i = 0 for p, n in zip(land_cdf, range(len(self.order))): # move from left to right nuntil the area_cdf exceeds # the land area cdf while area_cdf[i] <= p: color_array[i] = n if i < len(area_cdf) - 1: i += 1 else: break # multiply the color band array by the mask to get the img sub_img = mask * color_array # add the new mask to the watershed image image_array = image_array + sub_img # add a patch for the shape boundary subplot.add_patch(self.make_patch(points, (1,0,0,0), width=lw)) # show the bands bbox = s.bbox[0], s.bbox[2], s.bbox[1], s.bbox[3] im = subplot.imshow(image_array, extent = bbox, origin = 'upper left', interpolation = 'nearest', cmap = cmap, norm = norm) # adjust the plot bounding box xmin, xmax = xmin-border * (xmax-xmin), xmax + border * (xmax-xmin) ymin, ymax = ymin-border * (ymax-ymin), ymax + border * (ymax-ymin) else: # adjust the plot bounding box xmin, xmax = xmin-border * (xmax-xmin), xmax + border * (xmax-xmin) ymin, ymax = ymin-border * (ymax-ymin), ymax + border * (ymax-ymin) # pixel width in latitude pw = (xmax - xmin) / pixels # calculate the image height in pixels ny = int(numpy.ceil((ymax - ymin) / (xmax - xmin) * pixels)) # note the height of pixels = width of pixels # and image width in pixels is "pixels" xs = numpy.array([xmin + (i + 0.5) * pw for i in range(pixels)]) ys = numpy.array([ymin + (i + 0.5) * pw for i in range(ny)]) # set up an array of values for the image zs = numpy.zeros((ny, pixels)) for i in range(len(ys)): ps = [(x, ys[i]) for x in xs] zs[i, :] = numpy.array(get_raster(landuse, ps, quiet = True)) zs = zs.astype(int) tot = zs.size for v in numpy.unique(zs): group = self.groups[v] i = self.order.index(group) zs[numpy.where(zs == v)] = i # plot the grid im = subplot.imshow(zs, interpolation = 'nearest', origin = 'upper left', extent = [xmin, xmax, ymin, ymax], norm = norm, cmap = cmap, ) # add patch for the shape boundary for shape in s.shapes(): points = numpy.array(shape.points) subplot.add_patch(self.make_patch(points, (1,0,0,0), width=0.5)) # add the legend using a dummy box to make patches for the legend dummybox = [[0,0], [0,1], [1,1], [1,0], [0,0]] handles, labels = [], [] for group, color in zip(self.order[1:], color_table[1:]): p = self.make_patch(dummybox, facecolor = color, width = 0) handles.append(subplot.add_patch(p)) labels.append(group) leg = subplot.legend(handles, labels, bbox_to_anchor = (1.0, 0.5), loc = 'center left', title = 'Land Use Categories') legtext = leg.get_texts() pyplot.setp(legtext, fontsize = 10) subplot.set_position([0.125, 0.1, 0.6, 0.8]) # add the labels and set the limits subplot.set_xlabel('Longitude, Decimal Degrees', size = 13) subplot.set_ylabel('Latitude, Decimal Degrees', size = 13) subplot.set_xlim([xmin, xmax]) subplot.set_ylim([ymin, ymax]) subplot.xaxis.set_major_locator(ticker.MaxNLocator(8)) subplot.yaxis.set_major_locator(ticker.MaxNLocator(8)) subplot.xaxis.set_major_formatter(ticker.FormatStrFormatter('%.2f')) subplot.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.2f')) # show it if output is not None: pyplot.savefig(output) if show: pyplot.show() pyplot.clf() pyplot.close()
def calculate_landuse(self, rasterfile, shapefile, aggregatefile, attribute, csvfile = None, ): """ Calculates the land use for the given year for the "attribute" feature attribute in the polygon shapefile using the aggregate mapping provided in the "aggregatefile." """ # make sure the files exist for f in rasterfile, shapefile + '.shp', aggregatefile: if not os.path.isfile(f): print('error, {} does not exist\n'.format(f)) raise # read the aggregate file self.read_aggregatefile(aggregatefile) # open the shapefile sf = Reader(shapefile, shapeType = 5) attributes = [f[0] for f in sf.fields] try: index = attributes.index(attribute) - 1 except: print('error: attribute ' + '{} is not in the shapefile fields'.format(attribute)) raise # iterate through the shapes, get the fractions and save them for i in range(len(sf.records())): points = numpy.array(sf.shape(i).points) record = sf.record(i) k = record[index] # store the results self.landuse[k] = {r:0 for r in self.order} try: values, origin = get_raster_in_poly(rasterfile, points, verbose = False) values = values.flatten() values = values[values.nonzero()] tot_pixels = len(values) # count the number of pixels of each land use type for v in numpy.unique(values): # find all the indices for each pixel value pixels = numpy.argwhere(values == v) # normalize by the total # of pixels f = len(values[pixels]) / tot_pixels # add the landuse to the aggregated value self.landuse[k][self.groups[v]] += f # work around for small shapes except: self.landuse[k][self.groups[0]] = 1 if csvfile is not None: self.make_csv(attribute, csvfile) return self.landuse
import csv, pandas from shapefile import Reader sf = 'C:/HSPF_data/07080106/hydrography/subbasin_catchments' # read the areas from the shapefile into a lookup dictionary r = Reader(sf) comid_index = [f[0] for f in r.fields].index('ComID') - 1 area_index = [f[0] for f in r.fields].index('AreaSqKm') - 1 areas = {row[comid_index]: row[area_index] for row in r.records()} # directory to the land use data p = 'C:/HSPF_new/07080106/landuse' # store the results in a data structure rows = [['Year']] for y in range(2000,2011): # expand the structure for the next file rows.append([y]) # land use csv file for the year (contains the fractions for each comid)
# few more stations space = 0.5 # download/set the location of the data using the "download_shapefile" method processor.download_shapefile(filename, start, end, output, datasets = ['precip3240'], space = 0.5) # the ClimateProcessor's aggregate method can be used with inverse-distance # weighted average (IDWA) to interpolate between the stations at a given point # using the "method," "latitude," and "longitude" keyword arguments. the # result is the same as the previous example. as before, the subbasin_catchments # shapefile will be used that contains the centroid for each aggregation. sf = Reader(filename) # index of the comid, latitude, and longitude records comid_index = [f[0] for f in sf.fields].index('ComID') - 1 lon_index = [f[0] for f in sf.fields].index('CenX') - 1 lat_index = [f[0] for f in sf.fields].index('CenY') - 1 # iterate through the shapefile records and aggregate the timeseries for i in range(len(sf.records())): record = sf.record(i) comid = record[comid_index] lon = record[lon_index] lat = record[lat_index]
def plot_HUC8(self, flowfile, cfile, bfile, VAAfile, elevfile, patchcolor = None, resolution = 400, colormap = 'gist_earth', grid = False, title = None, verbose = True, output = None, show = False, ): """Makes a plot of the raw NHDPlus data.""" if verbose: print('generating plot of the watershed\n') fig = pyplot.figure() subplot = fig.add_subplot(111, aspect = 'equal') subplot.tick_params(axis = 'both', which = 'major', labelsize = 10) # add the title if title is not None: subplot.set_title(title, fontsize = 14) if patchcolor is None: facecolor = (1,0,0,0.) else: facecolor = patchcolor # open up and show the boundary b = Reader(bfile, shapeType = 5) boundary = b.shape(0) points = numpy.array(boundary.points) subplot.add_patch(self.make_patch(points, facecolor, width = 0.5)) # open up and show the catchments c = Reader(cfile, shapeType = 5) extent = self.get_boundaries(c.shapes(), space = 0.02) xmin, ymin, xmax, ymax = extent # figure out how far one foot is on the map points_per_width = 72 * 8 ft_per_km = 3280.84 scale_factor = (points_per_width / self.get_distance([xmin, ymin], [xmax, ymin]) / ft_per_km) # make patches of the catchment area for i in range(len(c.records())): catchment = c.shape(i) points = numpy.array(catchment.points) subplot.add_patch(self.make_patch(points, facecolor, width = 0.1)) # get the flowline attributes, make an "updown" dictionary to follow # flow, and change the keys to comids with open(VAAfile, 'rb') as f: flowlineVAAs = pickle.load(f) updown = {} for f in flowlineVAAs: if flowlineVAAs[f].down in flowlineVAAs: updown[flowlineVAAs[f].comid] = \ flowlineVAAs[flowlineVAAs[f].down].comid flowlineVAAs = {flowlineVAAs[f].comid:flowlineVAAs[f] for f in flowlineVAAs} # open up and show the flowfiles f = Reader(flowfile, shapeType = 3) comid_index = f.fields.index(['COMID', 'N', 9, 0]) - 1 all_comids = [r[comid_index] for r in f.records()] # get the flows and velocities from the dictionary widths = [] comids = [] for comid in all_comids: if comid in flowlineVAAs: flow = flowlineVAAs[comid].flow velocity = flowlineVAAs[comid].velocity # estimate flow width (ft) assuming triangular 90 d channel comids.append(comid) widths.append(numpy.sqrt(4 * flow / velocity)) # convert widths in feet to points on the figure; exaggerated by 10 widths = [w * scale_factor * 20 for w in widths] # get the flowline and the corresponding catchment for comid, w in zip(comids, widths): i = all_comids.index(comid) flowline = numpy.array(f.shape(i).points) # plot it subplot.plot(flowline[:, 0], flowline[:, 1], 'b', lw = w) subplot.set_xlabel('Longitude, Decimal Degrees', size = 13) subplot.set_ylabel('Latitude, Decimal Degrees', size = 13) # add the NED raster im = self.add_raster(subplot, elevfile, resolution, extent, colormap, 100) divider = make_axes_locatable(subplot) cax = divider.append_axes('right', size = 0.16, pad = 0.16) colorbar = fig.colorbar(im, cax = cax, orientation = 'vertical') colorbar.set_label('Elevation, m', size = 12) cbax = pyplot.axes(colorbar.ax) for t in cbax.get_yaxis().get_majorticklabels(): t.set_fontsize(10) subplot.xaxis.set_major_locator(ticker.MultipleLocator(0.2)) subplot.yaxis.set_major_locator(ticker.MultipleLocator(0.2)) if grid: subplot.xaxis.grid(True, 'minor', linestyle = '-', linewidth = 0.5) subplot.yaxis.grid(True, 'minor', linestyle = '-', linewidth = 0.5) # show it pyplot.tight_layout() if output is not None: pyplot.savefig(output) if show: pyplot.show() pyplot.close() pyplot.clf()
def extract_HUC8(self, HUC8, output, gagefile = 'gagestations', verbose = True): """Extracts the USGS gage stations for a watershed from the gage station shapefile into a shapefile for the 8-digit hydrologic unit code of interest. """ # make sure the metadata exist locally self.download_metadata() # make sure the output destination exists if not os.path.isdir(output): os.mkdir(output) sfile = '{}/{}'.format(output, gagefile) if not os.path.isfile(sfile + '.shp'): # copy the projection shutil.copy(self.NWIS + '.prj', sfile + '.prj') # read the file gagereader = Reader(self.NWIS, shapeType = 1) gagerecords = gagereader.records() # pull out the HUC8 record to parse the dataset HUC8_index = gagereader.fields.index(['HUC', 'C', 8, 0]) - 1 # iterate through the field and find gages in the watershed its = HUC8, sfile print('extracting gage stations in {} to {}\n'.format(*its)) gage_indices = [] i = 0 for record in gagerecords: if record[HUC8_index] == HUC8: gage_indices.append(i) i+=1 # write the data from the HUC8 to a new shapefile w = Writer(shapeType = 1) for field in gagereader.fields: w.field(*field) for i in gage_indices: point = gagereader.shape(i).points[0] w.point(*point) w.record(*gagerecords[i]) w.save(sfile) if verbose: print('successfully extracted NWIS gage stations\n') elif verbose: print('gage station file {} exists\n'.format(sfile)) self.set_metadata(sfile)
def make_timeseries(directory, HUC8, start, end, evapstations = None, plot = True): """Makes an hourly timeseries of the reference evapotranspiration using the ASCE hourly Penman-Monteith Equation.""" nrcm = '{}/{}/NRCM'.format(directory, HUC8) # start and end datetime instances s = datetime.datetime(start, 1, 1) e = datetime.datetime(end, 1, 1) # average the time series together from the NRCM simulation average_timeseries(nrcm) # open the watershed info to use to make subbasin precipitation watershedfile = '{}/{}/watershed'.format(directory, HUC8) with open(watershedfile, 'rb') as f: watershed = pickle.load(f) make_precipitation(watershed.subbasins, nrcm) # convert temperature and humidity to dewpoint make_dewpoint('{}/{}/NRCM/averages'.format(directory, HUC8)) # open the 3-hr temperature, solar, and dewpoint, and daily wind files tempfile = '{}/averages/average_temperature'.format(nrcm) solarfile = '{}/averages/average_solar'.format(nrcm) dewfile = '{}/averages/average_dewpoint'.format(nrcm) windfile = '{}/averages/average_wind'.format(nrcm) # watershed timeseries output = '{}/watershedtimeseries'.format(nrcm) if not os.path.isdir(output): os.mkdir(output) hourlytemp = '{}/hourlytemperature'.format(output) hourlysolar = '{}/hourlysolar'.format(output) dailydew = '{}/dewpoint'.format(output) dailywind = '{}/wind'.format(output) hourlyRET = '{}/hourlyRET'.format(output) hourlyPETs = '{}/hourlyPETs'.format(output) if not os.path.isfile(hourlyRET): print('calculating an hourly time series for the reference ET...\n') # open the bounding box and get the mean lat, lon, and elevation f = '{0}/{1}/{1}boundaries'.format(directory, HUC8) sh = Reader(f) record = sh.record(0) lon, lat, elev = record[-3:] with open(windfile, 'rb') as f: ts, Ws = zip(*pickle.load(f)) with open(tempfile, 'rb') as f: ts, Ts = zip(*pickle.load(f)) with open(solarfile, 'rb') as f: ts, Ss = zip(*pickle.load(f)) with open(dewfile, 'rb') as f: ts, dews = zip(*pickle.load(f)) # dump the daily series with open(dailydew, 'wb') as f: pickle.dump((s, 1440, list(dews)), f) with open(dailywind, 'wb') as f: pickle.dump((s, 1440, list(Ws)), f) # dump all the hourly series and convert the solar radiation # from Watts/m2 to MJ/hour/m2 temp = [T for T in Ts for i in range(3)] solar = [S for S in Ss for i in range(3)] with open(hourlysolar, 'wb') as f: pickle.dump((s, 60, solar), f) with open(hourlytemp, 'wb') as f: pickle.dump((s, 60, temp), f) # convert to hourly numpy arrays temp = numpy.array(temp) solar = numpy.array(solar) * 3600 / 10**6 wind = numpy.array([w for w in Ws for i in range(24)]) dewpoint = numpy.array([T for T in dews for i in range(24)]) # dates dates = [s + i * datetime.timedelta(hours = 1) for i in range(len(solar))] RET = penman_hourly(lat, lon, elev, dates, temp, dewpoint, solar, wind, verbose = False) # dump the timeseries with open(hourlyRET, 'wb') as f: pickle.dump((s, 60, RET), f) if not os.path.isfile(hourlyRET + '.png'): with open('{}/hourlytemperature'.format(output), 'rb') as f: s, t, temp = pickle.load(f) with open('{}/dewpoint'.format(output), 'rb') as f: s, t, dewpoint = pickle.load(f) with open('{}/wind'.format(output), 'rb') as f: s, t, wind = pickle.load(f) with open('{}/hourlysolar'.format(output), 'rb') as f: s, t, solar = pickle.load(f) with open(hourlyRET, 'rb') as f: s, t, hRET = pickle.load(f) # Watts/m2 to kW hr/m2 solar = [s * 0.024 for s in solar] if evapstations is not None: with open(evapstations, 'rb') as f: evaporations = pickle.load(f) else: evaporations = {} plot_hourlyET(HUC8, s, e, evaporations, [hRET], temp, dewpoint, wind, solar, fill = True, colors = ['green', 'yellow', 'orange', 'red'], output = hourlyRET) if not os.path.isfile(hourlyPETs): calculate_cropPET(directory, HUC8, s, e, output = output, evaporations = False)
def plot_climate(HUC8, sfile, bfile, pfile = None, efile = None, tfile = None, snowfile = None, centroids = True, radius = None, patchcolor = None, solarfile = None, windfile = None, output = None, show = False, verbose = True): """Makes a plot of all the hourly precipitation stations of a watershed defined by "bfile" with subbasin defined by "sfile" from the source precipitation shapefile "pfile".""" if verbose: print('generating plot of watershed %s NCDC stations\n' % HUC8) fig = pyplot.figure() subplot = fig.add_subplot(111, aspect = 'equal') subplot.tick_params(axis = 'both', which = 'major', labelsize = 10) # add the title description = 'Climate Data Stations' title = 'Cataloging Unit %s\n%s' % (HUC8, description) subplot.set_title(title, fontsize = 14) # open up and show the catchments if patchcolor is None: facecolor = (1,0,0,0.) else: facecolor = patchcolor b = Reader(bfile, shapeType = 5) points = np.array(b.shape(0).points) subplot.add_patch(make_patch(points, facecolor = facecolor, width = 1.)) extent = get_boundaries(b.shapes(), space = 0.02) xmin, ymin, xmax, ymax = extent # add the subbasin file s = Reader(sfile, shapeType = 5) # make patches of the subbasins for i in range(len(s.records())): shape = s.shape(i) points = np.array(shape.points) subplot.add_patch(make_patch(points, facecolor, width = 0.15)) plots = [] # keep track of the scatterplots names = [] # keep track of names for the legend # add the subbasin centroids if centroids: cx_index = s.fields.index(['CenX', 'N', 12, 6]) - 1 cy_index = s.fields.index(['CenY', 'N', 12, 6]) - 1 centroids = [[r[cx_index], r[cy_index]] for r in s.records()] xs, ys = zip(*centroids) cplot = subplot.scatter(xs, ys, marker = '+', c = 'pink', s = 15) plots.append(cplot) names.append('Centroids') # add a circle showing around subbasin "radius" showing the gages within # the radius for a given subbasin if radius is not None: comid_index = s.fields.index(['ComID', 'N', 9, 0]) - 1 cx_index = s.fields.index(['CenX', 'N', 12, 6]) - 1 cy_index = s.fields.index(['CenY', 'N', 12, 6]) - 1 area_index = s.fields.index(['AreaSqKm', 'N', 10, 2]) - 1 comids = ['{}'.format(r[comid_index]) for r in s.records()] cxs = [r[cx_index] for r in s.records()] cys = [r[cy_index] for r in s.records()] areas = [r[area_index] for r in s.records()] try: i = comids.index(radius) except: i = 0 c = [cxs[i], cys[i]] radii = [math.sqrt(a / math.pi) for a in areas] # scale kms to degrees km = get_distance([xmin, ymin], [xmax, ymax]) deg = math.sqrt((xmin - xmax)**2 + (ymax - ymin)**2) r = sum(radii) / len(radii) * deg / km * 5 circle = pyplot.Circle(c, radius = r, edgecolor = 'black', facecolor = 'yellow', alpha = 0.5) subplot.add_patch(circle) subplot.scatter(c[0], c[1], marker = '+', c = 'black') # add the precipitation gage points if pfile is not None: with open(pfile, 'rb') as f: precips = pickle.load(f) gage_points = [(p.longitude, p.latitude) for p in precips.values()] x1, y1 = zip(*gage_points) plots.append(subplot.scatter(x1, y1, marker = 'o', c = 'b')) names.append('Precipitation') # add the pan evaporation points if efile is not None: with open(efile, 'rb') as f: evaps = pickle.load(f) gage_points = [(e.longitude, e.latitude) for e in evaps.values()] x2, y2 = zip(*gage_points) eplot = subplot.scatter(x2, y2, s = evaps, marker = 'o', c = 'g') plots.append(eplot) names.append('Pan Evaporation') # add the temperature station points if tfile is not None: with open(tfile, 'rb') as f: temps = pickle.load(f) gage_points = [(t.longitude, t.latitude) for t in temps.values()] x2, y2 = zip(*gage_points) plots.append(subplot.scatter(x2, y2, marker = 's', c = 'red')) names.append('Temperature') # add the snowdepth station points if snowfile is not None: with open(snowfile, 'rb') as f: snows = pickle.load(f) snow_points = [(s.longitude, s.latitude) for s in snows.values()] x2, y2 = zip(*snow_points) plots.append(subplot.scatter(x2, y2, marker = 'o', c = 'gray', alpha = 0.5)) names.append('Snow') # add the solar radiation files if solarfile is not None: with open(solarfile, 'rb') as f: solar = pickle.load(f) points = [(s.longitude, s.latitude) for s in solar.values()] x2, y2 = zip(*points) plots.append(subplot.scatter(x2, y2, marker = 'o', c = 'orange')) names.append('Solar') # add the wind files if windfile is not None: with open(windfile, 'rb') as f: wind = pickle.load(f) points = [(w.longitude, w.latitude) for w in wind.values()] x2, y2 = zip(*points) plots.append(subplot.scatter(x2, y2, marker = 'o', c = 'pink')) names.append('Wind') # add a legend leg = subplot.legend(plots, names, loc = 'upper center', ncol = 3, bbox_to_anchor = (0.5, -0.15)) legtext = leg.get_texts() pyplot.setp(legtext, fontsize = 10) #subplot.set_position([0.125, 0.1, 0.6, 0.8]) # set the labels subplot.set_xlabel('Longitude, Decimal Degrees', size = 13) subplot.set_ylabel('Latitude, Decimal Degrees', size = 13) # show it if output is not None: pyplot.savefig(output) if show: pyplot.show() pyplot.clf() pyplot.close()