def merge_shapes(inputfile, outputfile = None, overwrite = False, verbose = True, vverbose = False, ): """ Merges all the shapes in a shapefile into a single shape. """ if outputfile is None: output = '{}/merged'.format(os.getcwd()) if os.path.isfile(outputfile + '.shp') and not overwrite: if verbose: print('combined watershed shapefile {} exists'.format(outputfile)) return if verbose: print('combining shapes from {}\n'.format(inputfile) + 'this may take a while...\n') # start by copying the projection files shutil.copy(inputfile + '.prj', outputfile + '.prj') # load the catchment and flowline shapefiles r = Reader(inputfile, shapeType = 5) try: combined = combine_shapes(r.shapes(), verbose = vverbose) except: print('error: unable to combine shapes') raise # create the new file with the merged shapes w = Writer(shapeType = 5) w.poly(shapeType = 5, parts = [combined]) # copy the fields from the original and then the first record; note this # can be adapted as needed for field in r.fields: w.field(*field) w.record(*r.record(0)) w.save(outputfile) if verbose: its = inputfile, outputfile print('successfully combined shapes from {} to {}\n'.format(*its))
def make_timeseries(directory, HUC8, start, end, evapstations = None, plot = True): """Makes an hourly timeseries of the reference evapotranspiration using the ASCE hourly Penman-Monteith Equation.""" nrcm = '{}/{}/NRCM'.format(directory, HUC8) # start and end datetime instances s = datetime.datetime(start, 1, 1) e = datetime.datetime(end, 1, 1) # average the time series together from the NRCM simulation average_timeseries(nrcm) # open the watershed info to use to make subbasin precipitation watershedfile = '{}/{}/watershed'.format(directory, HUC8) with open(watershedfile, 'rb') as f: watershed = pickle.load(f) make_precipitation(watershed.subbasins, nrcm) # convert temperature and humidity to dewpoint make_dewpoint('{}/{}/NRCM/averages'.format(directory, HUC8)) # open the 3-hr temperature, solar, and dewpoint, and daily wind files tempfile = '{}/averages/average_temperature'.format(nrcm) solarfile = '{}/averages/average_solar'.format(nrcm) dewfile = '{}/averages/average_dewpoint'.format(nrcm) windfile = '{}/averages/average_wind'.format(nrcm) # watershed timeseries output = '{}/watershedtimeseries'.format(nrcm) if not os.path.isdir(output): os.mkdir(output) hourlytemp = '{}/hourlytemperature'.format(output) hourlysolar = '{}/hourlysolar'.format(output) dailydew = '{}/dewpoint'.format(output) dailywind = '{}/wind'.format(output) hourlyRET = '{}/hourlyRET'.format(output) hourlyPETs = '{}/hourlyPETs'.format(output) if not os.path.isfile(hourlyRET): print('calculating an hourly time series for the reference ET...\n') # open the bounding box and get the mean lat, lon, and elevation f = '{0}/{1}/{1}boundaries'.format(directory, HUC8) sh = Reader(f) record = sh.record(0) lon, lat, elev = record[-3:] with open(windfile, 'rb') as f: ts, Ws = zip(*pickle.load(f)) with open(tempfile, 'rb') as f: ts, Ts = zip(*pickle.load(f)) with open(solarfile, 'rb') as f: ts, Ss = zip(*pickle.load(f)) with open(dewfile, 'rb') as f: ts, dews = zip(*pickle.load(f)) # dump the daily series with open(dailydew, 'wb') as f: pickle.dump((s, 1440, list(dews)), f) with open(dailywind, 'wb') as f: pickle.dump((s, 1440, list(Ws)), f) # dump all the hourly series and convert the solar radiation # from Watts/m2 to MJ/hour/m2 temp = [T for T in Ts for i in range(3)] solar = [S for S in Ss for i in range(3)] with open(hourlysolar, 'wb') as f: pickle.dump((s, 60, solar), f) with open(hourlytemp, 'wb') as f: pickle.dump((s, 60, temp), f) # convert to hourly numpy arrays temp = numpy.array(temp) solar = numpy.array(solar) * 3600 / 10**6 wind = numpy.array([w for w in Ws for i in range(24)]) dewpoint = numpy.array([T for T in dews for i in range(24)]) # dates dates = [s + i * datetime.timedelta(hours = 1) for i in range(len(solar))] RET = penman_hourly(lat, lon, elev, dates, temp, dewpoint, solar, wind, verbose = False) # dump the timeseries with open(hourlyRET, 'wb') as f: pickle.dump((s, 60, RET), f) if not os.path.isfile(hourlyRET + '.png'): with open('{}/hourlytemperature'.format(output), 'rb') as f: s, t, temp = pickle.load(f) with open('{}/dewpoint'.format(output), 'rb') as f: s, t, dewpoint = pickle.load(f) with open('{}/wind'.format(output), 'rb') as f: s, t, wind = pickle.load(f) with open('{}/hourlysolar'.format(output), 'rb') as f: s, t, solar = pickle.load(f) with open(hourlyRET, 'rb') as f: s, t, hRET = pickle.load(f) # Watts/m2 to kW hr/m2 solar = [s * 0.024 for s in solar] if evapstations is not None: with open(evapstations, 'rb') as f: evaporations = pickle.load(f) else: evaporations = {} plot_hourlyET(HUC8, s, e, evaporations, [hRET], temp, dewpoint, wind, solar, fill = True, colors = ['green', 'yellow', 'orange', 'red'], output = hourlyRET) if not os.path.isfile(hourlyPETs): calculate_cropPET(directory, HUC8, s, e, output = output, evaporations = False)
def plot_gage_subbasin(self, hspfmodel, folder): """Makes a plot of the subbasin area.""" subbasinfile = '{}/subbasins'.format(folder) boundaryfile = '{}/boundary'.format(folder) flowfile = '{}/flowlines'.format(folder) combinedfile = '{}/combined'.format(folder) watershedplot = '{}/watershed.png'.format(folder) # make a shapefile of the subbasins for the watershed f = '{0}/{1}/{1}subbasins'.format(self.directory, self.HUC8) for out in (subbasinfile, boundaryfile, flowfile, combinedfile): if not os.path.isfile(out + '.prj'): shutil.copy(f + '.prj', out + '.prj') if not os.path.isfile(subbasinfile + '.shp'): subshapes = [] subrecords = [] for subbasin in hspfmodel.subbasins: f = '{0}/{1}/{2}/combined'.format(self.directory, self.HUC8, subbasin) s = Reader(f, shapeType = 5) subshapes.append(s.shape(0).points) subrecords.append(s.record(0)) w = Writer(shapeType = 5) for field in s.fields: w.field(*field) for record in subrecords: w.record(*record) for shape in subshapes: w.poly(shapeType = 5, parts = [shape]) w.save(subbasinfile) if not os.path.isfile(combinedfile + '.shp'): fshapes = [] frecords = [] for subbasin in hspfmodel.subbasins: f = '{0}/{1}/{2}/combined_flowline'.format(self.directory, self.HUC8, subbasin) r = Reader(f, shapeType = 3) fshapes.append(r.shape(0).points) frecords.append(r.record(0)) w = Writer(shapeType = 3) for field in r.fields: w.field(*field) for record in frecords: w.record(*record) for shape in fshapes: w.poly(shapeType = 3, parts = [shape]) w.save(combinedfile) # merge the shapes into a watershed if not os.path.exists(boundaryfile + '.shp'): merge_shapes(subbasinfile, outputfile = boundaryfile) # make a flowline file for the subbasins for the watershed if not os.path.isfile(flowfile + '.shp'): shapes = [] records = [] for subbasin in hspfmodel.subbasins: f = '{0}/{1}/{2}/flowlines'.format(self.directory, self.HUC8, subbasin) r = Reader(f, shapeType = 3) for shape in r.shapes(): shapes.append(shape.points) for record in r.records(): records.append(record) w = Writer(shapeType = 3) for field in r.fields: w.field(*field) for record in records: w.record(*record) for shape in shapes: w.poly(shapeType = 3, parts = [shape]) w.save(flowfile) if not os.path.isfile(watershedplot): plot_gage_subbasin(folder, self.HUC8, self.gageid, hspfmodel, output = watershedplot)
# result is the same as the previous example. as before, the subbasin_catchments # shapefile will be used that contains the centroid for each aggregation. sf = Reader(filename) # index of the comid, latitude, and longitude records comid_index = [f[0] for f in sf.fields].index('ComID') - 1 lon_index = [f[0] for f in sf.fields].index('CenX') - 1 lat_index = [f[0] for f in sf.fields].index('CenY') - 1 # iterate through the shapefile records and aggregate the timeseries for i in range(len(sf.records())): record = sf.record(i) comid = record[comid_index] lon = record[lon_index] lat = record[lat_index] i = comid, lon, lat print('aggregating timeseries for comid {} at {}, {}\n'.format(*i)) precipitation = processor.aggregate('precip3240', 'precip', start, end, method = 'IDWA', longitude = lon, latitude = lat) mean = sum(precipitation) / (end - start).days * 365.25 print('aggregated annual average precipitation: {:.1f} in\n'.format(mean))
def climate(self, HUC8, s, e, verbose = True, ): subbasinfile = '{}/subbasin_catchments'.format(self.hydrography) climatedata = '{}/{}/climate'.format(self.output, HUC8) # make a directory for the climate data and time series if not os.path.isdir(climatedata): os.mkdir(climatedata) # use the Climateprocessor to get the data climateprocessor = ClimateProcessor() climateprocessor.download_shapefile(subbasinfile, s, e, climatedata, space = 0.5) # make directories for hourly and daily aggregated timeseries hourly = '{}/hourly'.format(climatedata) daily = '{}/daily'.format(climatedata) if not os.path.isdir(hourly): os.mkdir(hourly) if not os.path.isdir(daily): os.mkdir(daily) # aggregate the daily GSOD tmin, tmax, dewpoint, and wind data tmin = '{}/tmin'.format(daily) tmax = '{}/tmax'.format(daily) dewt = '{}/dewpoint'.format(daily) wind = '{}/wind'.format(daily) if not os.path.isfile(tmin): ts = s, 1440, climateprocessor.aggregate('GSOD', 'tmin', s, e) with open(tmin, 'wb') as f: pickle.dump(ts, f) if not os.path.isfile(tmax): ts = s, 1440, climateprocessor.aggregate('GSOD', 'tmax', s, e) with open(tmax, 'wb') as f: pickle.dump(ts, f) if not os.path.isfile(dewt): ts = s, 1440, climateprocessor.aggregate('GSOD','dewpoint', s,e) with open(dewt, 'wb') as f: pickle.dump(ts, f) if not os.path.isfile(wind): ts = s, 1440, climateprocessor.aggregate('GSOD', 'wind', s, e) with open(wind, 'wb') as f: pickle.dump(ts, f) # aggregate the daily GHCND snowfall and snowdepth data snowfall = '{}/snowfall'.format(daily) snowdepth = '{}/snowdepth'.format(daily) if not os.path.isfile(snowfall): ts = s, 1440, climateprocessor.aggregate('GHCND','snowfall', s, e) with open(snowfall, 'wb') as f: pickle.dump(ts, f) if not os.path.isfile(snowdepth): ts = s, 1440,climateprocessor.aggregate('GHCND','snowdepth', s, e) with open(snowdepth, 'wb') as f: pickle.dump(ts, f) # find stations with pan evaporation data from GHCND evapstations = [] for k, v in climateprocessor.metadata.ghcndstations.items(): # check if the station has any evaporation data if v['evap'] > 0: # open up the file and get the data with open(k, 'rb') as f: station = pickle.load(f) data = station.make_timeseries('evaporation', s, e) # ignore datasets with no observations during the period observations = [v for v in data if v is not None] if len(observations) > 0: evapstations.append(k) # aggregate the hourly NSRDB metstat data hsolar = '{}/solar'.format(hourly) if not os.path.isfile(hsolar): ts = s, 60, climateprocessor.aggregate('NSRDB', 'metstat', s, e) with open(hsolar, 'wb') as f: pickle.dump(ts, f) # aggregate the hourly solar to daily dsolar = '{}/solar'.format(daily) if not os.path.isfile(dsolar): with open(hsolar, 'rb') as f: t, tstep, data = pickle.load(f) ts = s, 1440, [sum(data[i:i+24]) / 24 for i in range(0, 24 * (e-s).days, 24)] with open(dsolar, 'wb') as f: pickle.dump(ts, f) # aggregate the hourly precipitation for each subbasin using IDWA precip = '{}/hourlyprecipitation'.format(climatedata) if not os.path.isdir(precip): os.mkdir(precip) # use the subbasin shapefile to get the location of the centroids sf = Reader(subbasinfile) # index of the comid, latitude, and longitude records comid_index = [f[0] for f in sf.fields].index('ComID') - 1 lon_index = [f[0] for f in sf.fields].index('CenX') - 1 lat_index = [f[0] for f in sf.fields].index('CenY') - 1 elev_index = [f[0] for f in sf.fields].index('AvgElevM') - 1 area_index = [f[0] for f in sf.fields].index('AreaSqKm') - 1 # iterate through the shapefile records and aggregate the timeseries for i in range(len(sf.records())): record = sf.record(i) comid = record[comid_index] lon = record[lon_index] lat = record[lat_index] # check if the aggregated time series exists or calculate it subbasinprecip = '{}/{}'.format(precip, comid) if not os.path.isfile(subbasinprecip): if verbose: i = comid, lon, lat print('aggregating timeseries for comid ' + '{} at {}, {}\n'.format(*i)) p = climateprocessor.aggregate('precip3240', 'precip', s, e, method = 'IDWA', longitude = lon, latitude = lat) ts = s, 60, p with open(subbasinprecip, 'wb') as f: pickle.dump(ts, f) # make a directory for the evapotranspiration time series evapotranspiration = '{}/evapotranspiration'.format(climatedata) if not os.path.isdir(evapotranspiration): os.mkdir(evapotranspiration) # use the ETCalculator to calculate the ET time series etcalculator = ETCalculator() # get the centroid of the watershed from the subbasin shapefile areas = [r[area_index] for r in sf.records()] xs = [r[lon_index] for r in sf.records()] ys = [r[lat_index] for r in sf.records()] zs = [r[elev_index] for r in sf.records()] # get the areal-weighted averages lon = sum([a * x for a, x in zip(areas, xs)]) / sum(areas) lat = sum([a * y for a, y in zip(areas, ys)]) / sum(areas) elev = sum([a * z for a, z in zip(areas, zs)]) / sum(areas) # add them to the ETCalculator etcalculator.add_location(lon, lat, elev) # check if the daily RET exists; otherwise calculate it dRET = '{}/dailyRET'.format(evapotranspiration) if not os.path.isfile(dRET): # add the daily time series to the calculator with open(tmin, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('tmin', tstep, t, data) with open(tmax, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('tmax', tstep, t, data) with open(dewt, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('dewpoint', tstep, t, data) with open(wind, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('wind', tstep, t, data) with open(dsolar, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('solar', tstep, t, data) # calculate the daily RET etcalculator.penman_daily(s, e) ts = s, 1440, etcalculator.daily['RET'][1] with open(dRET, 'wb') as f: pickle.dump(ts, f) # disaggregate the daily temperature time series to hourly hourlytemp = '{}/temperature'.format(hourly) if not os.path.isfile(hourlytemp): if etcalculator.daily['tmin'] is None: with open(tmin, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('tmin', tstep, t, data) if etcalculator.daily['tmax'] is None: with open(tmax, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('tmax', tstep, t, data) data = etcalculator.interpolate_temperatures(s, e) tstep = 60 ts = t, tstep, data with open(hourlytemp, 'wb') as f: pickle.dump(ts, f) etcalculator.add_timeseries('temperature', tstep, t, data) # disaggregate the dewpoint and wind speed time series to hourly hourlydewt = '{}/dewpoint'.format(hourly) if not os.path.isfile(hourlydewt): if etcalculator.daily['dewpoint'] is None: with open(dewt, 'rb') as f: t, tstep, data = pickle.load(f) else: t, data = etcalculator.daily['dewpoint'] tstep = 60 data = [v for v in data for i in range(24)] ts = t, tstep, data with open(hourlydewt, 'wb') as f: pickle.dump(ts, f) etcalculator.add_timeseries('dewpoint', tstep, t, data) hourlywind = '{}/wind'.format(hourly) if not os.path.isfile(hourlywind): if etcalculator.daily['wind'] is None: with open(wind, 'rb') as f: t, tstep, data = pickle.load(f) else: t, data = etcalculator.daily['wind'] tstep = 60 data = [v for v in data for i in range(24)] ts = t, tstep, data with open(hourlywind, 'wb') as f: pickle.dump(ts, f) etcalculator.add_timeseries('wind', tstep, t, data) # check if the hourly RET exists; otherwise calculate it hRET = '{}/hourlyRET'.format(evapotranspiration) if not os.path.isfile(hRET): required = 'temperature', 'solar', 'dewpoint', 'wind' for tstype in required: if etcalculator.hourly[tstype] is None: name = '{}/{}'.format(hourly, tstype) with open(name, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries(tstype, tstep, t, data) # calculate and save the hourly RET etcalculator.penman_hourly(s, e) ts = s, 60, etcalculator.hourly['RET'][1] with open(hRET, 'wb') as f: pickle.dump(ts, f) # add the daily time series for the plot required = 'tmin', 'tmax', 'dewpoint', 'wind', 'solar' for tstype in required: if etcalculator.daily[tstype] is None: name = '{}/{}'.format(daily, tstype) with open(name, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries(tstype, tstep, t, data) # aggregate the hourly to daily for plotting hRET = etcalculator.hourly['RET'][1] dRET = [sum(hRET[i:i+24]) for i in range(0, len(hRET), 24)] etcalculator.add_timeseries('RET', 'daily', s, dRET) name = '{}/referenceET'.format(evapotranspiration) etcalculator.plotET(stations = evapstations, output = name, show = False) name = '{}/dayofyearET'.format(evapotranspiration) etcalculator.plotdayofyear(stations = evapstations, output = name, show = False) # calculate hourly PET for different land use categories lucs = ('corn', 'soybeans', 'grains', 'alfalfa', 'fallow', 'pasture', 'wetlands', 'others', ) colors = ('yellow', 'green', 'brown', 'lime', 'gray', 'orange', 'blue', 'black', ) pdates = (datetime.datetime(2000, 4, 15), datetime.datetime(2000, 5, 15), datetime.datetime(2000, 4, 15), datetime.datetime(2000, 5, 15), datetime.datetime(2000, 3, 1), datetime.datetime(2000, 3, 1), datetime.datetime(2000, 3, 1), datetime.datetime(2000, 3, 1), ) ems = (30, 20, 20, 10, 10, 10, 10, 10, ) gs = (50, 30, 30, 10, 10, 10, 10, 10, ) fs = (60, 60, 60, 120, 240, 240, 240, 240, ) ls = (40, 30, 40, 10, 10, 10, 10, 10, ) Kis = (0.30, 0.40, 0.30, 0.30, 0.30, 0.30, 1.00, 1.00, ) Kms = (1.15, 1.15, 1.15, 0.95, 0.30, 0.85, 1.20, 1.00, ) Kls = (0.40, 0.55, 0.40, 0.90, 0.30, 0.30, 1.00, 1.00, ) # add the hourly RET time series if it isn't present if etcalculator.hourly['RET'] is None: with open(hRET, 'rb') as f: t, tstep, data = pickle.load(f) etcalculator.add_timeseries('RET', tstep, t, data) # iterate through the land use categories and calculate PET for i in zip(lucs, colors, pdates, ems, gs, fs, ls, Kis, Kms, Kls): crop, c, plant, emergence, growth, full, late, Ki, Km, Kl = i # add the information and calculate the PET time series etcalculator.add_crop(crop, plant, emergence, growth, full, late, Ki, Km, Kl, ) etcalculator.hourly_PET(crop, s, e) # get the PET time series t, PET = etcalculator.hourlyPETs[crop] ts = t, 60, PET # save it name = '{}/{}'.format(evapotranspiration, crop) with open(name, 'wb') as f: pickle.dump(ts, f)
def extract_bbox(self, bbox, output, verbose = True): """Extracts the NID dam locations for a watershed from the dam shapefile and the 8-digit hydrologic unit code of interest. """ self.download_compressed() xmin, ymin, xmax, ymax = bbox # copy the projection files if verbose: print('copying the projections from the NID source\n') projection = self.source + '.prj' shutil.copy(projection, output + '.prj') # get the dams within the watershed if verbose: print('reading the dam file\n') sf = Reader(self.source, shapeType = 1) # work around for issues with pyshp damrecords = [] for i in range(len(sf.shapes())): try: damrecords.append(sf.record(i)) except: damrecords.append([-100 for i in range(len(sf.fields))]) name_index = sf.fields.index(['DAM_NAME', 'C', 65, 0]) - 1 nid_index = sf.fields.index(['NIDID', 'C', 7, 0]) - 1 long_index = sf.fields.index(['LONGITUDE', 'N', 19, 11]) - 1 lat_index = sf.fields.index(['LATITUDE', 'N', 19, 11]) - 1 river_index = sf.fields.index(['RIVER', 'C', 65, 0]) - 1 owner_index = sf.fields.index(['OWN_NAME', 'C', 65, 0]) - 1 type_index = sf.fields.index(['DAM_TYPE', 'C', 10, 0]) - 1 purp_index = sf.fields.index(['PURPOSES', 'C', 254, 0]) - 1 year_index = sf.fields.index(['YR_COMPL', 'C', 10, 0]) - 1 high_index = sf.fields.index(['NID_HEIGHT', 'N', 19, 11]) - 1 mstor_index = sf.fields.index(['MAX_STOR', 'N', 19, 11]) - 1 nstor_index = sf.fields.index(['NORMAL_STO', 'N', 19, 11]) - 1 area_index = sf.fields.index(['SURF_AREA', 'N', 19, 11]) - 1 # iterate through the fields and determine which points are in the box if verbose: print('extracting dams into new file\n') dam_indices = [] i = 0 for record in damrecords: lat = record[lat_index] lon = record[long_index] if self.inside_box([xmin, ymin], [xmax, ymax], [lon, lat]): dam_indices.append(i) i+=1 # write the data from the bbox to a new shapefile w = Writer(shapeType = 1) for field in sf.fields: w.field(*field) for i in dam_indices: point = sf.shape(i).points[0] w.point(*point) values = damrecords[i] rs = [] for value in values: if isinstance(value, bytes): value = value.decode('utf-8') rs.append(value) w.record(*rs) w.save(output) if verbose: print('successfully extracted NID dam locations to new file\n')
def plot_landuse(self, landuse, catchments, attribute, categoryfile, output = None, datatype = 'raw', overwrite = False, pixels = 1000, border = 0.02, lw = 0.5, show = False, verbose = True, vverbose = False ): """ Makes a plot of the landuse of a catchment shapefile on top of a raster landuse file. """ if self.order is None: print('error: no landuse aggregation file information provided\n') raise self.read_categoryfile(categoryfile) if verbose: print('generating a {} land use plot\n'.format(datatype)) # make the figure fig = pyplot.figure() subplot = fig.add_subplot(111, aspect = 'equal') subplot.tick_params(axis = 'both', which = 'major', labelsize = 11) # add the title if datatype == 'results': title = 'Land Use Fractions' else: title = 'Raw Land Use Data' subplot.set_title(title, size = 14) # open the shapefile and get the bounding box s = Reader(catchments, shapeType = 5) xmin, ymin, xmax, ymax = s.bbox # get the index of the field for the attribute matching index = [f[0] for f in s.fields].index(attribute) - 1 # set up a custom colormap using the rgbs supplied in the aggregate file color_table = [(self.reds[g] / 255, self.greens[g] / 255, self.blues[g] / 255) for g in self.order] cmap = colors.ListedColormap(color_table) # provide the cutoff boundaries for the mapping of values to the table bounds = [i-0.5 for i in range(len(self.order)+1)] # create a norm to map the bounds to the colors norm = colors.BoundaryNorm(bounds, cmap.N) # get the pixel width and origin w = (xmax - xmin) / pixels # calculate the image array height and the height of a pixel height = int(numpy.ceil((ymax - ymin) / (xmax - xmin)) * pixels) h = (ymax - ymin) / height # set up the image array image_array = numpy.zeros((height, pixels), dtype = 'uint8') # get the land use fraction for each category if datatype == 'results': # iterate through the shapes and make patches for i in range(len(s.records())): comid = s.record(i)[index] points = numpy.array(s.shape(i).points) # convert the shape to pixel coordinates pixel_polygon = [(get_pixel(x, xmin, w), get_pixel(y, ymin, h)) for x, y in points] # make a PIL image to use as a mask rasterpoly = Image.new('L', (pixels, height), 1) rasterize = ImageDraw.Draw(rasterpoly) # rasterize the polygon rasterize.polygon(pixel_polygon, 0) # convert the PIL array to numpy boolean to use as a mask mask = 1 - numpy.array(rasterpoly) # get the total number of pixels in the shape tot = mask.sum() # iterate from left to right and get the fraction of the total # area inside the shape as a function of x (takes into account # the depth) fractions = [column.sum() / tot for column in mask.transpose()] area_cdf = [sum(fractions[:i+1]) for i in range(len(fractions))] # convert the land use fractions into a land use cdf fractions = [self.landuse[comid][g] for g in self.order] land_cdf = [sum(fractions[:i+1]) for i in range(len(fractions))] # use the area cdf to determine the break points for the land # use patches. note this array does not account for the masking # of the patch. thus there are n+1 vertical bands. the first # and last are the "empty" (first in the aggregate file). in # between the break points are determined from the area cdf. color_array = numpy.zeros(len(mask[0]), dtype = 'uint8') # find the break point for each band by looping through the land # ues cdf and filling from left to right i = 0 for p, n in zip(land_cdf, range(len(self.order))): # move from left to right nuntil the area_cdf exceeds # the land area cdf while area_cdf[i] <= p: color_array[i] = n if i < len(area_cdf) - 1: i += 1 else: break # multiply the color band array by the mask to get the img sub_img = mask * color_array # add the new mask to the watershed image image_array = image_array + sub_img # add a patch for the shape boundary subplot.add_patch(self.make_patch(points, (1,0,0,0), width=lw)) # show the bands bbox = s.bbox[0], s.bbox[2], s.bbox[1], s.bbox[3] im = subplot.imshow(image_array, extent = bbox, origin = 'upper left', interpolation = 'nearest', cmap = cmap, norm = norm) # adjust the plot bounding box xmin, xmax = xmin-border * (xmax-xmin), xmax + border * (xmax-xmin) ymin, ymax = ymin-border * (ymax-ymin), ymax + border * (ymax-ymin) else: # adjust the plot bounding box xmin, xmax = xmin-border * (xmax-xmin), xmax + border * (xmax-xmin) ymin, ymax = ymin-border * (ymax-ymin), ymax + border * (ymax-ymin) # pixel width in latitude pw = (xmax - xmin) / pixels # calculate the image height in pixels ny = int(numpy.ceil((ymax - ymin) / (xmax - xmin) * pixels)) # note the height of pixels = width of pixels # and image width in pixels is "pixels" xs = numpy.array([xmin + (i + 0.5) * pw for i in range(pixels)]) ys = numpy.array([ymin + (i + 0.5) * pw for i in range(ny)]) # set up an array of values for the image zs = numpy.zeros((ny, pixels)) for i in range(len(ys)): ps = [(x, ys[i]) for x in xs] zs[i, :] = numpy.array(get_raster(landuse, ps, quiet = True)) zs = zs.astype(int) tot = zs.size for v in numpy.unique(zs): group = self.groups[v] i = self.order.index(group) zs[numpy.where(zs == v)] = i # plot the grid im = subplot.imshow(zs, interpolation = 'nearest', origin = 'upper left', extent = [xmin, xmax, ymin, ymax], norm = norm, cmap = cmap, ) # add patch for the shape boundary for shape in s.shapes(): points = numpy.array(shape.points) subplot.add_patch(self.make_patch(points, (1,0,0,0), width=0.5)) # add the legend using a dummy box to make patches for the legend dummybox = [[0,0], [0,1], [1,1], [1,0], [0,0]] handles, labels = [], [] for group, color in zip(self.order[1:], color_table[1:]): p = self.make_patch(dummybox, facecolor = color, width = 0) handles.append(subplot.add_patch(p)) labels.append(group) leg = subplot.legend(handles, labels, bbox_to_anchor = (1.0, 0.5), loc = 'center left', title = 'Land Use Categories') legtext = leg.get_texts() pyplot.setp(legtext, fontsize = 10) subplot.set_position([0.125, 0.1, 0.6, 0.8]) # add the labels and set the limits subplot.set_xlabel('Longitude, Decimal Degrees', size = 13) subplot.set_ylabel('Latitude, Decimal Degrees', size = 13) subplot.set_xlim([xmin, xmax]) subplot.set_ylim([ymin, ymax]) subplot.xaxis.set_major_locator(ticker.MaxNLocator(8)) subplot.yaxis.set_major_locator(ticker.MaxNLocator(8)) subplot.xaxis.set_major_formatter(ticker.FormatStrFormatter('%.2f')) subplot.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.2f')) # show it if output is not None: pyplot.savefig(output) if show: pyplot.show() pyplot.clf() pyplot.close()
def calculate_landuse(self, rasterfile, shapefile, aggregatefile, attribute, csvfile = None, ): """ Calculates the land use for the given year for the "attribute" feature attribute in the polygon shapefile using the aggregate mapping provided in the "aggregatefile." """ # make sure the files exist for f in rasterfile, shapefile + '.shp', aggregatefile: if not os.path.isfile(f): print('error, {} does not exist\n'.format(f)) raise # read the aggregate file self.read_aggregatefile(aggregatefile) # open the shapefile sf = Reader(shapefile, shapeType = 5) attributes = [f[0] for f in sf.fields] try: index = attributes.index(attribute) - 1 except: print('error: attribute ' + '{} is not in the shapefile fields'.format(attribute)) raise # iterate through the shapes, get the fractions and save them for i in range(len(sf.records())): points = numpy.array(sf.shape(i).points) record = sf.record(i) k = record[index] # store the results self.landuse[k] = {r:0 for r in self.order} try: values, origin = get_raster_in_poly(rasterfile, points, verbose = False) values = values.flatten() values = values[values.nonzero()] tot_pixels = len(values) # count the number of pixels of each land use type for v in numpy.unique(values): # find all the indices for each pixel value pixels = numpy.argwhere(values == v) # normalize by the total # of pixels f = len(values[pixels]) / tot_pixels # add the landuse to the aggregated value self.landuse[k][self.groups[v]] += f # work around for small shapes except: self.landuse[k][self.groups[0]] = 1 if csvfile is not None: self.make_csv(attribute, csvfile) return self.landuse
contains = [i for i, b in enumerate(bboxes) if b[0] <= x and x <= b[2] and b[0] <= y and y <= b[3]] # find the distances between all the overlapping shapes points and the gage distances = [min([(x1 - x)**2 + (y1 - y)**2 for x1, y1 in reader.shape(i).points]) for i in contains] # find the shape with the point closest to the gage closest = contains[distances.index(min(distances))] # read the record for the flowline record = reader.record(closest) # find the record indices of the comid and reach length in km in the file i = [f[0] for f in reader.fields].index('LENGTHKM') - 1 j = [f[0] for f in reader.fields].index('COMID') - 1 # get the reach length and common identifier length = record[i] comid = record[j] it = comid, length print('comid {} is closest to the gage and has a length of {} km\n'.format(*it)) # make an instance of the FtableCalculator to use for the data from the file
def merge_shapes(inputfile, outputfile = None, overwrite = False, verbose = True, vverbose = False): """Merges all the shapes in a shapefile into a single shape.""" if outputfile is None: output = '{}/merged'.format(os.getcwd()) if os.path.isfile(outputfile + '.shp') and not overwrite: if verbose: print('combined watershed shapefile %s exists' % outputfile) return if verbose: print('combining shapes from {}\n'.format(inputfile) + 'this may take a while...\n') # start by copying the projection files shutil.copy(inputfile + '.prj', outputfile + '.prj') # load the catchment and flowline shapefiles r = Reader(inputfile, shapeType = 5) n = len(r.records()) try: shapes = [] records = [] bboxes = [] for i in range(n): shape = r.shape(i) record = r.record(i) shape_list = format_shape(shape.points) for sh in shape_list: shapes.append(sh) records.append(record) bboxes.append(shape.bbox) try: combined = combine_shapes(shapes, bboxes, verbose = vverbose) except: if verbose: print('trying alternate trace method') combined = combine_shapes(shapes, bboxes, skip = True, verbose = vverbose) except: if verbose: print('trying alternate trace method') shapes = [] records = [] bboxes = [] for i in range(n): shape = r.shape(i) record = r.record(i) shape_list = format_shape(shape.points, omit = True) for sh in shape_list: shapes.append(sh) records.append(record) bboxes.append(shape.bbox) try: combined = combine_shapes(shapes, bboxes, verbose = vverbose) except: if verbose: print('trying alternate trace method') combined = combine_shapes(shapes, bboxes, skip = True, verbose = vverbose) # create the new file with the merged shapes w = Writer(shapeType = 5) w.poly(shapeType = 5, parts = [combined]) # copy the fields from the original and then the first record; note this # can be adapted as needed for field in r.fields: w.field(*field) w.record(*r.record(0)) w.save(outputfile) if verbose: print('successfully combined shapes from %s to %s\n' % (inputfile, outputfile))
def extract_aquifers(directory, HUC8, aquifers, pad = 0.2, verbose = True): """Extracts aquifers from the source datafile to the destination using the HUC8 boundaries for the query.""" start = time.time() # open up the HUC8 boundary shapefile and use it to get the bounding box shapefile = Reader(directory + '/%s/%scatchments' % (HUC8, HUC8)) xmin, ymin, xmax, ymax = get_boundaries(shapefile.shapes()) # convert to bounding corners for testing p1 = [xmin - pad * (xmax - xmin), ymin - pad * (ymax - ymin)] p2 = [xmax + pad * (xmax - xmin), ymax + pad * (ymax - ymin)] shapefile = None # start by copying the projection files if verbose: print('\ncopying the projections\n') shutil.copy(directory + '/%s/%scatchments.prj' % (HUC8, HUC8), directory + '/%s/%saquifers.prj' % (HUC8, HUC8)) # open the flowline file if verbose: print('reading the aquifer file\n') shapefile = Reader(aquifers, shapeType = 5) # work around for issues with pyshp records = [] for i in range(len(shapefile.shapes())): try: records.append(shapefile.record(i)) except: records.append('') # use the bounding boxes to see if the shapes are within the watershed area if verbose: print('searching for aquifers in the watershed\n') bboxes = [shapefile.shape(i).bbox for i in range(len(records))] corners = [[[b[0], b[1]], [b[0], b[3]], [b[2], b[1]], [b[2], b[3]]] for b in bboxes] indices = [i for i, c in zip(range(len(corners)), corners) if any([inside_box(p1, p2, p) for p in c]) or all([inside_box(c[0], c[3], p1), inside_box(c[0], c[3], p2)])] # remove any non aquifers indices = [i for i in indices if shapefile.record(i)[4] != 999] # find a record for the non aquifer i = 0 while shapefile.record(i)[4] != 999: i+=1 nonrecord = shapefile.record(i) nonrecord[1] = nonrecord[1].decode('utf-8') nonrecord[5] = 0 nonrecord[6] = 0 if len(indices) == 0: if verbose: print('query returned no values, returning\n') return # write the data from the HUC8 to a new shapefile w = Writer(shapeType = 5) for field in shapefile.fields: w.field(*field) for i in indices: shape = shapefile.shape(i) # check for multiple parts if len(shape.parts) > 1: parts = [shape.points[i:j] for i, j in zip(shape.parts[:-1], shape.parts[1:])] else: parts = [shape.points] record = records[i] # little work around for blank binary values if isinstance(record[1], bytes): record[1] = record[1].decode('utf-8') w.poly(shapeType = 5, parts = parts) w.record(*record) # add a shape for the bounding box showing no aquifer locations part = [p1, [p1[0], p2[1]], p2, [p2[0], p1[1]]] w.poly(shapeType = 5, parts = [part]) w.record(*nonrecord) w.save(directory + '/%s/%saquifers' % (HUC8, HUC8)) end = time.time() if verbose: print('successfully queried data in %.2f seconds\n' % (end - start))
def calculate_landuse( self, rasterfile, shapefile, aggregatefile, attribute, csvfile=None, ): """ Calculates the land use for the given year for the "attribute" feature attribute in the polygon shapefile using the aggregate mapping provided in the "aggregatefile." """ # make sure the files exist for f in rasterfile, shapefile + '.shp', aggregatefile: if not os.path.isfile(f): print('error, {} does not exist\n'.format(f)) raise # read the aggregate file self.read_aggregatefile(aggregatefile) # open the shapefile sf = Reader(shapefile, shapeType=5) attributes = [f[0] for f in sf.fields] try: index = attributes.index(attribute) - 1 except: print('error: attribute ' + '{} is not in the shapefile fields'.format(attribute)) raise # iterate through the shapes, get the fractions and save them for i in range(len(sf.records())): points = numpy.array(sf.shape(i).points) record = sf.record(i) k = record[index] # store the results self.landuse[k] = {r: 0 for r in self.order} try: values, origin = get_raster_in_poly(rasterfile, points, verbose=False) values = values.flatten() values = values[values.nonzero()] tot_pixels = len(values) # count the number of pixels of each land use type for v in numpy.unique(values): # find all the indices for each pixel value pixels = numpy.argwhere(values == v) # normalize by the total # of pixels f = len(values[pixels]) / tot_pixels # add the landuse to the aggregated value self.landuse[k][self.groups[v]] += f # work around for small shapes except: self.landuse[k][self.groups[0]] = 1 if csvfile is not None: self.make_csv(attribute, csvfile) return self.landuse
def plot_landuse(self, landuse, catchments, attribute, output=None, datatype='raw', overwrite=False, pixels=1000, border=0.02, lw=0.5, show=False, verbose=True, vverbose=False): """ Makes a plot of the landuse of a catchment shapefile on top of a raster landuse file. """ if verbose: print('generating land use plot\n') # make the figure fig = pyplot.figure() subplot = fig.add_subplot(111, aspect='equal') subplot.tick_params(axis='both', which='major', labelsize=11) # add the title if datatype == 'results': title = 'Land Use Fractions' else: title = 'Raw Land Use Data' subplot.set_title(title, size=14) # open the shapefile and get the bounding box s = Reader(catchments, shapeType=5) xmin, ymin, xmax, ymax = s.bbox # get the index of the field for the attribute matching index = [f[0] for f in s.fields].index(attribute) - 1 # set up a custom colormap using the rgbs supplied in the aggregate file color_table = [(self.reds[g] / 255, self.greens[g] / 255, self.blues[g] / 255) for g in self.order] cmap = colors.ListedColormap(color_table) # provide the cutoff boundaries for the mapping of values to the table bounds = [i - 0.5 for i in range(len(self.order) + 1)] # create a norm to map the bounds to the colors norm = colors.BoundaryNorm(bounds, cmap.N) # get the pixel width and origin w = (xmax - xmin) / pixels # calculate the image array height and the height of a pixel height = int(numpy.ceil((ymax - ymin) / (xmax - xmin)) * pixels) h = (ymax - ymin) / height # set up the image array image_array = numpy.zeros((height, pixels), dtype='uint8') # get the land use fraction for each category if datatype == 'results': # iterate through the shapes and make patches for i in range(len(s.records())): comid = s.record(i)[index] points = numpy.array(s.shape(i).points) # convert the shape to pixel coordinates pixel_polygon = [(get_pixel(x, xmin, w), get_pixel(y, ymin, h)) for x, y in points] # make a PIL image to use as a mask rasterpoly = Image.new('L', (pixels, height), 1) rasterize = ImageDraw.Draw(rasterpoly) # rasterize the polygon rasterize.polygon(pixel_polygon, 0) # convert the PIL array to numpy boolean to use as a mask mask = 1 - numpy.array(rasterpoly) # get the total number of pixels in the shape tot = mask.sum() # iterate from left to right and get the fraction of the total # area inside the shape as a function of x (takes into account # the depth) fractions = [column.sum() / tot for column in mask.transpose()] area_cdf = [ sum(fractions[:i + 1]) for i in range(len(fractions)) ] # convert the land use fractions into a land use cdf fractions = [self.landuse[comid][g] for g in self.order] land_cdf = [ sum(fractions[:i + 1]) for i in range(len(fractions)) ] # use the area cdf to determine the break points for the land # use patches. note this array does not account for the masking # of the patch. thus there are n+1 vertical bands. the first # and last are the "empty" (first in the aggregate file). in # between the break points are determined from the area cdf. color_array = numpy.zeros(len(mask[0]), dtype='uint8') # find the break point for each band by looping through the land # ues cdf and filling from left to right i = 0 for p, n in zip(land_cdf, range(len(self.order))): # move from left to right nuntil the area_cdf exceeds # the land area cdf while area_cdf[i] <= p: color_array[i] = n if i < len(area_cdf) - 1: i += 1 else: break # multiply the color band array by the mask to get the img sub_img = mask * color_array # add the new mask to the watershed image image_array = image_array + sub_img # add a patch for the shape boundary subplot.add_patch( self.make_patch(points, (1, 0, 0, 0), width=lw)) # show the bands bbox = s.bbox[0], s.bbox[2], s.bbox[1], s.bbox[3] im = subplot.imshow(image_array, extent=bbox, origin='upper left', interpolation='nearest', cmap=cmap, norm=norm) # adjust the plot bounding box xmin, xmax = xmin - border * (xmax - xmin), xmax + border * (xmax - xmin) ymin, ymax = ymin - border * (ymax - ymin), ymax + border * (ymax - ymin) else: # adjust the plot bounding box xmin, xmax = xmin - border * (xmax - xmin), xmax + border * (xmax - xmin) ymin, ymax = ymin - border * (ymax - ymin), ymax + border * (ymax - ymin) # pixel width in latitude pw = (xmax - xmin) / pixels # calculate the image height in pixels ny = int(numpy.ceil((ymax - ymin) / (xmax - xmin) * pixels)) # note the height of pixels = width of pixels # and image width in pixels is "pixels" xs = numpy.array([xmin + (i + 0.5) * pw for i in range(pixels)]) ys = numpy.array([ymin + (i + 0.5) * pw for i in range(ny)]) # set up an array of values for the image zs = numpy.zeros((ny, pixels)) for i in range(len(ys)): ps = [(x, ys[i]) for x in xs] zs[i, :] = numpy.array(get_raster(landuse, ps, quiet=True)) zs = zs.astype(int) tot = zs.size for v in numpy.unique(zs): group = self.groups[v] i = self.order.index(group) zs[numpy.where(zs == v)] = i # plot the grid im = subplot.imshow( zs, interpolation='nearest', origin='upper left', extent=[xmin, xmax, ymin, ymax], norm=norm, cmap=cmap, ) # add patch for the shape boundary for shape in s.shapes(): points = numpy.array(shape.points) subplot.add_patch( self.make_patch(points, (1, 0, 0, 0), width=0.5)) # add the legend using a dummy box to make patches for the legend dummybox = [[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]] handles, labels = [], [] for group, color in zip(self.order[1:], color_table[1:]): p = self.make_patch(dummybox, facecolor=color, width=0) handles.append(subplot.add_patch(p)) labels.append(group) leg = subplot.legend(handles, labels, bbox_to_anchor=(1.0, 0.5), loc='center left', title='Land Use Categories') legtext = leg.get_texts() pyplot.setp(legtext, fontsize=10) subplot.set_position([0.125, 0.1, 0.6, 0.8]) # add the labels and set the limits subplot.set_xlabel('Longitude, Decimal Degrees', size=13) subplot.set_ylabel('Latitude, Decimal Degrees', size=13) subplot.set_xlim([xmin, xmax]) subplot.set_ylim([ymin, ymax]) subplot.xaxis.set_major_locator(ticker.MaxNLocator(8)) subplot.yaxis.set_major_locator(ticker.MaxNLocator(8)) subplot.xaxis.set_major_formatter(ticker.FormatStrFormatter('%.2f')) subplot.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.2f')) # show it if output is not None: pyplot.savefig(output) if show: pyplot.show() pyplot.clf() pyplot.close()