def main(): """ Main BRAT Run """ parser = argparse.ArgumentParser( description='Run brat against a pre-existing sqlite db:', # epilog="This is an epilog" ) parser.add_argument('project', help='Riverscapes project folder or project xml file', type=str, default=None) parser.add_argument( '--csv_dir', help='(optional) directory where we can find updated lookup tables', action='store_true', default=False) parser.add_argument('--verbose', help='(optional) a little extra logging ', action='store_true', default=False) parser.add_argument( '--debug', help= '(optional) more output about things like memory usage. There is a performance cost', action='store_true', default=False) args = dotenv.parse_args_env(parser) if os.path.isfile(args.project): logpath = os.path.dirname(args.project) elif os.path.isdir(args.project): logpath = args.project else: raise Exception( 'You must supply a valid path to a riverscapes project') log = Logger('BRAT Run') log.setup(logPath=os.path.join(logpath, "brat_run.log"), verbose=args.verbose) log.title('BRAT Run Tool') try: if args.debug is True: from rscommons.debug import ThreadRun memfile = os.path.join(logpath, 'brat_run_memusage.log') retcode, max_obj = ThreadRun(brat_run, memfile, args.project, args.csv_dir) log.debug('Return code: {}, [Max process usage] {}'.format( retcode, max_obj)) else: brat_run(args.project, args.csv_dir) except Exception as e: log.error(e) traceback.print_exc(file=sys.stdout) sys.exit(1) sys.exit(0)
def main(): parser = argparse.ArgumentParser( description='RVD', # epilog="This is an epilog" ) parser.add_argument('huc', help='HUC identifier', type=str) parser.add_argument('flowlines', help='Segmented flowlines input.', type=str) parser.add_argument('existing', help='National existing vegetation raster', type=str) parser.add_argument('historic', help='National historic vegetation raster', type=str) parser.add_argument('valley_bottom', help='Valley bottom (.shp, .gpkg/layer_name)', type=str) parser.add_argument('output_folder', help='Output folder input', type=str) parser.add_argument('--reach_codes', help='Comma delimited reach codes (FCode) to retain when filtering features. Omitting this option retains all features.', type=str) parser.add_argument('--flow_areas', help='(optional) path to the flow area polygon feature class containing artificial paths', type=str) parser.add_argument('--waterbodies', help='(optional) waterbodies input', type=str) parser.add_argument('--meta', help='riverscapes project metadata as comma separated key=value pairs', type=str) parser.add_argument('--verbose', help='(optional) a little extra logging ', action='store_true', default=False) parser.add_argument('--debug', help="(optional) save intermediate outputs for debugging", action='store_true', default=False) args = dotenv.parse_args_env(parser) reach_codes = args.reach_codes.split(',') if args.reach_codes else None meta = parse_metadata(args.meta) # Initiate the log file log = Logger("RVD") log.setup(logPath=os.path.join(args.output_folder, "rvd.log"), verbose=args.verbose) log.title('RVD For HUC: {}'.format(args.huc)) try: if args.debug is True: from rscommons.debug import ThreadRun memfile = os.path.join(args.output_dir, 'rvd_mem.log') retcode, max_obj = ThreadRun(rvd, memfile, args.huc, args.flowlines, args.existing, args.historic, args.valley_bottom, args.output_folder, reach_codes, args.flow_areas, args.waterbodies, meta=meta) log.debug('Return code: {}, [Max process usage] {}'.format(retcode, max_obj)) else: rvd(args.huc, args.flowlines, args.existing, args.historic, args.valley_bottom, args.output_folder, reach_codes, args.flow_areas, args.waterbodies, meta=meta) except Exception as e: log.error(e) traceback.print_exc(file=sys.stdout) sys.exit(1) sys.exit(0)
def get_metadata(database): log = Logger('Database') log.debug('Retrieving metadata') conn = sqlite3.connect(database) curs = conn.cursor() curs.execute('SELECT KeyInfo, ValueInfo FROM MetaData') meta = {} for row in curs.fetchall(): meta[row[0]] = row[1] return meta
def safe_remove_dir(dir_path): """Remove a directory without throwing an error Args: file_path ([type]): [description] """ log = Logger("safe_remove_dir") try: shutil.rmtree(dir_path, ignore_errors=True) log.debug('Directory removed: {}'.format(dir_path)) except Exception as e: log.error(str(e))
def safe_remove_file(file_path): """Remove a file without throwing an error Args: file_path ([type]): [description] """ log = Logger("safe_remove_file") try: if not os.path.isfile(file_path): log.warning('File not found: {}'.format(file_path)) os.remove(file_path) log.debug('File removed: {}'.format(file_path)) except Exception as e: log.error(str(e))
def ThreadRun(callback, memlogfile: str, *args, **kwargs): log = Logger('Debug') memmon = MemoryMonitor(memlogfile, 1) with ThreadPoolExecutor() as executor: mem_thread = executor.submit(memmon.measure_usage) try: fn_thread = executor.submit(callback, *args, **kwargs) result = fn_thread.result() finally: memmon.keep_measuring = False max_obj = mem_thread.result() log.debug('MaxStats: {}'.format(max_obj)) memmon.write_plot(os.path.splitext(memlogfile)[0] + '.png') return result, max_obj.toString()
def rasterize(in_lyr_path, out_raster_path, template_path): """Rasterizing an input Args: in_lyr_path ([type]): [description] out_raster_ ([type]): [description] template_path ([type]): [description] """ log = Logger('VBETRasterize') ds_path, lyr_path = VectorBase.path_sorter(in_lyr_path) progbar = ProgressBar(100, 50, "Rasterizing ") with rasterio.open(template_path) as raster: t = raster.transform raster_bounds = raster.bounds def poly_progress(progress, _msg, _data): progbar.update(int(progress * 100)) # Rasterize the features (roads, rail etc) and calculate a raster of Euclidean distance from these features progbar.update(0) # Rasterize the polygon to a temporary file with TempRaster('vbet_rasterize') as tempfile: log.debug('Temporary file: {}'.format(tempfile.filepath)) gdal.Rasterize( tempfile.filepath, ds_path, layers=[lyr_path], xRes=t[0], yRes=t[4], burnValues=1, outputType=gdal.GDT_Int16, creationOptions=['COMPRESS=LZW'], # outputBounds --- assigned output bounds: [minx, miny, maxx, maxy] outputBounds=[ raster_bounds.left, raster_bounds.bottom, raster_bounds.right, raster_bounds.top ], callback=poly_progress) progbar.finish() # Now mask the output correctly mask_rasters_nodata(tempfile.filepath, template_path, out_raster_path)
def deleteRaster(sFullPath): """ :param path: :return: """ log = Logger("Delete Raster") if path.isfile(sFullPath): try: # Delete the raster properly driver = gdal.GetDriverByName('GTiff') gdal.Driver.Delete(driver, sFullPath) log.debug("Raster Successfully Deleted: {0}".format(sFullPath)) except Exception as e: log.error("Failed to remove existing clipped raster at {0}".format( sFullPath)) raise e else: log.debug("No raster file to delete at {0}".format(sFullPath))
def download_unzip(url, download_folder, unzip_folder=None, force_download=False, retries=3): """ A wrapper for Download() and Unzip(). WE do these things together enough that it makes sense. Also there's the concept of retrying that needs to be handled in a centralized way Arguments: url {[type]} -- [description] download_folder {[type]} -- [description] Keyword Arguments: unzip_folder {[string]} -- (optional) specify the specific directory to extract files into (we still create a subfolder with the zip-file's name though) force_download {bool} -- [description] (default: {False}) Returns: [type] -- [description] """ log = Logger('Download') # If we specified an unzip path then use it, otherwise just unzip into the folder # with the same name as the file (minus the '.zip' extension) dl_retry = 0 dl_success = False while not dl_success and dl_retry < 3: try: zipfilepath = download_file(url, download_folder, force_download) dl_success = True except Exception as e: log.debug(e) log.warning('download failed. retrying...') dl_retry += 1 if (not dl_success): raise Exception('Downloading of file failed after {} attempts'.format(retries)) final_unzip_folder = unzip_folder if unzip_folder is not None else os.path.splitext(zipfilepath)[0] unzip(zipfilepath, final_unzip_folder, force_download, retries) return final_unzip_folder
def print_geom_size(logger: Logger, geom_obj: BaseGeometry): try: size_str = sizeof_fmt(get_obj_size(geom_obj.wkb)) logger.debug('Byte Size of output object: {} Type: {} IsValid: {} Length: {} Area: {}'.format(size_str, geom_obj.type, geom_obj.is_valid, geom_obj.length, geom_obj.area)) except Exception as e: logger.debug(e) logger.debug('Byte Size of output object could not be determined')
def file_compare(file_a, file_b, md5=True): """Do a file comparison, starting with file size and finishing with md5 Args: file_a ([type]): [description] file_b ([type]): [description] Returns: [type]: [description] """ log = Logger("file_compare") log.debug('Comparing: {} {}'.format(file_a, file_b)) try: # If the file sizes aren't the same then there's # no reason to do anything more a_stats = os.stat(file_a) b_stats = os.stat(file_b) if a_stats.st_size != b_stats.st_size: log.debug('Files are NOT the same size: {:,} vs. {:,}') return False # If we want this to be a quick-compare and not do MD5 then we just # do the file size and leave it at that if not md5: return True with open(file_a, 'rb') as afile: hasher1 = hashlib.md5() buf1 = afile.read() hasher1.update(buf1) md5_a = (str(hasher1.hexdigest())) with open(file_b, 'rb') as bfile: hasher2 = hashlib.md5() buf1 = bfile.read() hasher2.update(buf1) md5_b = (str(hasher2.hexdigest())) # Compare md5 if(md5_a == md5_b): log.debug('File MD5 hashes match') return True else: log.debug('File MD5 hashes DO NOT match') return False except Exception as e: log.error('Error comparing files: {}', str(e)) return False
class Raster: def __init__(self, sfilename): self.filename = sfilename self.log = Logger("Raster") self.errs = "" try: if (path.isfile(self.filename)): src_ds = gdal.Open(self.filename) else: self.log.error('Missing file: {}'.format(self.filename)) raise Exception('Could not find raster file: {}'.format( path.basename(self.filename))) except RuntimeError as e: raise Exception('Raster file exists but has problems: {}'.format( path.basename(self.filename))) try: # Read Raster Properties srcband = src_ds.GetRasterBand(1) self.bands = src_ds.RasterCount self.driver = src_ds.GetDriver().LongName self.gt = src_ds.GetGeoTransform() self.nodata = srcband.GetNoDataValue() """ Turn a Raster with a single band into a 2D [x,y] = v array """ self.array = srcband.ReadAsArray() # Now mask out any NAN or nodata values (we do both for consistency) if self.nodata is not None: # To get over the issue where self.nodata may be imprecisely set we may need to use the array's # true nodata, taken directly from the array workingNodata = self.nodata self.min = np.nanmin(self.array) if isclose(self.min, self.nodata, rel_tol=1e-03): workingNodata = self.min self.array = np.ma.array(self.array, mask=(np.isnan(self.array) | (self.array == workingNodata))) self.dataType = srcband.DataType self.min = np.nanmin(self.array) self.max = np.nanmax(self.array) self.proj = src_ds.GetProjection() # Remember: # [0]/* top left x */ # [1]/* w-e pixel resolution */ # [2]/* rotation, 0 if image is "north up" */ # [3]/* top left y */ # [4]/* rotation, 0 if image is "north up" */ # [5]/* n-s pixel resolution */ self.left = self.gt[0] self.cellWidth = self.gt[1] self.top = self.gt[3] self.cellHeight = self.gt[5] self.cols = src_ds.RasterXSize self.rows = src_ds.RasterYSize # Important to throw away the srcband srcband.FlushCache() srcband = None except RuntimeError as e: print('Could not retrieve meta Data for %s' % self.filename, e) raise e def __enter__(self) -> Raster: """Behaviour on open when using the "with VectorBase():" Syntax """ return self def __exit__(self, _type, _value, _traceback): """Behaviour on close when using the "with VectorBase():" Syntax """ print('hi') def getBottom(self): return self.top + (self.cellHeight * self.rows) def getRight(self): return self.left + (self.cellWidth * self.cols) def getWidth(self): return self.getRight() - self.left def getHeight(self): return self.top - self.getBottom() def getBoundaryShape(self): return Polygon([ (self.left, self.top), (self.getRight(), self.top), (self.getRight(), self.getBottom()), (self.left, self.getBottom()), ]) def boundsContains(self, bounds, pt): return (bounds[0] < pt.coords[0][0] and bounds[1] < pt.coords[0][1] and bounds[2] > pt.coords[0][0] and bounds[3] > pt.coords[0][1]) def rasterMaskLayer(self, shapefile, fieldname=None): """ return a masked array that corresponds to the input polygon :param polygon: :return: """ # Create a memory raster to rasterize into. target_ds = gdal.GetDriverByName('MEM').Create('', self.cols, self.rows, 1, gdal.GDT_Byte) target_ds.SetGeoTransform(self.gt) assert len(shapefile) > 0, "The ShapeFile path is empty" # Create a memory layer to rasterize from. driver = ogr.GetDriverByName("ESRI Shapefile") src_ds = driver.Open(shapefile, 0) src_lyr = src_ds.GetLayer() # Run the algorithm. options = ['ALL_TOUCHED=TRUE'] if fieldname and len(fieldname) > 0: options.append('ATTRIBUTE=' + fieldname) err = gdal.RasterizeLayer(target_ds, [1], src_lyr, options=options) if err: print(err) # Get the array: band = target_ds.GetRasterBand(1) return band.ReadAsArray() def getPixelVal(self, pt): # Convert from map to pixel coordinates. # Only works for geotransforms with no rotation. px = int((pt[0] - self.left) / self.cellWidth) # x pixel py = int((pt[1] - self.top) / self.cellHeight) # y pixel val = self.array[py, px] if isclose(val, self.nodata, rel_tol=1e-07) or val is np.ma.masked: return np.nan return val def lookupRasterValues(self, points): """ Given an array of points with real-world coordinates, lookup values in raster then mask out any nan/nodata values :param points: :param raster: :return: """ pointsdict = {"points": points, "values": []} for pt in pointsdict['points']: pointsdict['values'].append(self.getPixelVal(pt.coords[0])) # Mask out the np.nan values pointsdict['values'] = np.ma.masked_invalid(pointsdict['values']) return pointsdict def write(self, outputRaster): """ Write this raster object to a file. The Raster is closed after this so keep that in mind You won't be able to access the raster data after you run this. :param outputRaster: :return: """ if path.isfile(outputRaster): deleteRaster(outputRaster) driver = gdal.GetDriverByName('GTiff') outRaster = driver.Create(outputRaster, self.cols, self.rows, 1, self.dataType, ['COMPRESS=DEFLATE']) # Remember: # [0]/* top left x */ # [1]/* w-e pixel resolution */ # [2]/* rotation, 0 if image is "north up" */ # [3]/* top left y */ # [4]/* rotation, 0 if image is "north up" */ # [5]/* n-s pixel resolution */ outRaster.SetGeoTransform( [self.left, self.cellWidth, 0, self.top, 0, self.cellHeight]) outband = outRaster.GetRasterBand(1) # Set nans to the original No Data Value outband.SetNoDataValue(self.nodata) self.array.data[np.isnan(self.array)] = self.nodata # Any mask that gets passed in here should have masked out elements set to # Nodata Value if isinstance(self.array, np.ma.MaskedArray): np.ma.set_fill_value(self.array, self.nodata) outband.WriteArray(self.array.filled()) else: outband.WriteArray(self.array) spatialRef = osr.SpatialReference() spatialRef.ImportFromWkt(self.proj) outRaster.SetProjection(spatialRef.ExportToWkt()) outband.FlushCache() # Important to throw away the srcband outband = None self.log.debug("Finished Writing Raster: {0}".format(outputRaster)) def setArray(self, incomingArray, copy=False): """ You can use the self.array directly but if you want to copy from one array into a raster we suggest you do it this way :param incomingArray: :return: """ masked = isinstance(self.array, np.ma.MaskedArray) if copy: if masked: self.array = np.ma.copy(incomingArray) else: self.array = np.ma.masked_invalid(incomingArray, copy=True) else: if masked: self.array = incomingArray else: self.array = np.ma.masked_invalid(incomingArray) self.rows = self.array.shape[0] self.cols = self.array.shape[1] self.min = np.nanmin(self.array) self.max = np.nanmax(self.array)
def get_geometry_unary_union(in_layer_path: str, epsg: int = None, spatial_ref: osr.SpatialReference = None, attribute_filter: str = None, clip_shape: BaseGeometry = None, clip_rect: List[float] = None ) -> BaseGeometry: """Load all features from a ShapeFile and union them together into a single geometry Args: in_layer_path (str): path to layer epsg (int, optional): EPSG to project to. Defaults to None. spatial_ref (osr.SpatialReference, optional): Spatial Ref to project to. Defaults to None. attribute_filter (str, optional): Filter to a set of attributes. Defaults to None. clip_shape (BaseGeometry, optional): Clip to a specified shape. Defaults to None. clip_rect (List[double minx, double miny, double maxx, double maxy)]): Iterate over a subset by clipping to a Shapely-ish geometry. Defaults to None. Raises: VectorBaseException: [description] Returns: BaseGeometry: [description] """ log = Logger('get_geometry_unary_union') if epsg is not None and spatial_ref is not None: raise VectorBaseException('Specify either an EPSG or a spatial_ref. Not both') with get_shp_or_gpkg(in_layer_path) as in_layer: transform = None if epsg is not None: _outref, transform = VectorBase.get_transform_from_epsg(in_layer.spatial_ref, epsg) elif spatial_ref is not None: transform = in_layer.get_transform(in_layer.spatial_ref, spatial_ref) geom_list = [] for feature, _counter, progbar in in_layer.iterate_features("Unary Unioning features", attribute_filter=attribute_filter, clip_shape=clip_shape, clip_rect=clip_rect): new_geom = feature.GetGeometryRef() geo_type = new_geom.GetGeometryType() # We can't union non-valid shapes but sometimes a buffer by 0 can help if not new_geom.IsValid(): progbar.erase() # get around the progressbar log.warning('Invalid shape with FID={} trying the Buffer0 technique...'.format(feature.GetFID())) try: new_geom = new_geom.Buffer(0) if not new_geom.IsValid(): log.warning(' Still invalid. Skipping this geometry') continue except Exception: log.warning('Exception raised during buffer 0 technique. skipping this file') continue if new_geom is None: progbar.erase() # get around the progressbar log.warning('Feature with FID={} has no geoemtry. Skipping'.format(feature.GetFID())) # Filter out zero-length lines elif geo_type in VectorBase.LINE_TYPES and new_geom.Length() == 0: progbar.erase() # get around the progressbar log.warning('Zero Length for shape with FID={}'.format(feature.GetFID())) # Filter out zero-area polys elif geo_type in VectorBase.POLY_TYPES and new_geom.Area() == 0: progbar.erase() # get around the progressbar log.warning('Zero Area for shape with FID={}'.format(feature.GetFID())) else: geom_list.append(VectorBase.ogr2shapely(new_geom, transform)) # IF we get past a certain size then run the union if len(geom_list) >= 500: geom_list = [unary_union(geom_list)] new_geom = None log.debug('finished iterating with list of size: {}'.format(len(geom_list))) if len(geom_list) > 1: log.debug('Starting final union of geom_list of size: {}'.format(len(geom_list))) # Do a final union to clean up anything that might still be in the list geom_union = unary_union(geom_list) elif len(geom_list) == 0: log.warning('No geometry found to union') return None else: log.debug('FINAL Unioning geom_list of size {}'.format(len(geom_list))) geom_union = geom_list[0] log.debug(' done') print_geom_size(log, geom_union) log.debug('Complete') # Return a shapely object return geom_union
def intersect_geometry_with_feature_class(geometry: BaseGeometry, in_layer_path: str, output_geom_type: int, epsg: int = None, attribute_filter: str = None, ) -> BaseGeometry: """[summary] Args: geometry (BaseGeometry): [description] in_layer_path (str): [description] out_layer_path (str): [description] output_geom_type (int): [description] epsg (int, optional): [description]. Defaults to None. attribute_filter (str, optional): [description]. Defaults to None. Raises: VectorBaseException: [description] VectorBaseException: [description] Returns: BaseGeometry: [description] """ log = Logger('intersect_geometry_with_feature_class') if output_geom_type not in [ogr.wkbMultiPoint, ogr.wkbMultiLineString]: raise VectorBaseException('Unsupported ogr type for geometry intersection: "{}"'.format(output_geom_type)) log.debug('Intersection with feature class: Performing unary union on input: {}'.format(in_layer_path)) geom_union = get_geometry_unary_union(in_layer_path, epsg=epsg, attribute_filter=attribute_filter, clip_shape=geometry) # Nothing to do if there were no features in the feature class if not geom_union: return log.debug('Finding intersections (may take a few minutes)...') tmr = Timer() geom_inter = geometry.intersection(geom_union) log.debug('Intersection done in {:.1f} seconds'.format(tmr.ellapsed())) # Nothing to do if the intersection is empty if geom_inter.is_empty: return # Single features and collections need to be converted into Multi-features if output_geom_type == ogr.wkbMultiPoint and not isinstance(geom_inter, MultiPoint): if isinstance(geom_inter, Point): geom_inter = MultiPoint([(geom_inter)]) elif isinstance(geom_inter, LineString): # Break this linestring down into vertices as points geom_inter = MultiPoint([geom_inter.coords[0], geom_inter.coords[-1]]) elif isinstance(geom_inter, MultiLineString): # Break this linestring down into vertices as points geom_inter = MultiPoint(reduce(lambda acc, ls: acc + [ls.coords[0], ls.coords[-1]], list(geom_inter.geoms), [])) elif isinstance(geom_inter, GeometryCollection): geom_inter = MultiPoint([geom for geom in geom_inter.geoms if isinstance(geom, Point)]) elif output_geom_type == ogr.wkbMultiLineString and not isinstance(geom_inter, MultiLineString): if isinstance(geom_inter, LineString): geom_inter = MultiLineString([(geom_inter)]) else: raise VectorBaseException('Unsupported ogr type: "{}" does not match shapely type of "{}"'.format(output_geom_type, geom_inter.type)) return geom_inter
def main(): parser = argparse.ArgumentParser(description='Riverscapes Context Tool', # epilog="This is an epilog" ) parser.add_argument('huc', help='HUC identifier', type=str) parser.add_argument('existing', help='National existing vegetation raster', type=str) parser.add_argument('historic', help='National historic vegetation raster', type=str) parser.add_argument('ownership', help='National land ownership shapefile', type=str) parser.add_argument('fairmarket', help='National fair market value raster', type=str) parser.add_argument('ecoregions', help='National EcoRegions shapefile', type=str) parser.add_argument('prism', help='Folder containing PRISM rasters in BIL format', type=str) parser.add_argument('output', help='Path to the output folder', type=str) parser.add_argument( 'download', help= 'Temporary folder for downloading data. Different HUCs may share this', type=str) parser.add_argument('--force', help='(optional) download existing files ', action='store_true', default=False) parser.add_argument( '--parallel', help= '(optional) for running multiple instances of this at the same time', action='store_true', default=False) parser.add_argument('--temp_folder', help='(optional) cache folder for downloading files ', type=str) parser.add_argument( '--meta', help='riverscapes project metadata as comma separated key=value pairs', type=str) parser.add_argument('--verbose', help='(optional) a little extra logging ', action='store_true', default=False) parser.add_argument( '--debug', help= '(optional) more output about things like memory usage. There is a performance cost', action='store_true', default=False) args = dotenv.parse_args_env(parser) # Initiate the log file log = Logger("RS Context") log.setup(logPath=os.path.join(args.output, "rs_context.log"), verbose=args.verbose) log.title('Riverscapes Context For HUC: {}'.format(args.huc)) log.info('HUC: {}'.format(args.huc)) log.info('EPSG: {}'.format(cfg.OUTPUT_EPSG)) log.info('Existing veg: {}'.format(args.existing)) log.info('Historical veg: {}'.format(args.historic)) log.info('Ownership: {}'.format(args.ownership)) log.info('Fair Market Value Raster: {}'.format(args.fairmarket)) log.info('Output folder: {}'.format(args.output)) log.info('Download folder: {}'.format(args.download)) log.info('Force download: {}'.format(args.force)) # This is a general place for unzipping downloaded files and other temporary work. # We use GUIDS to make it specific to a particular run of the tool to avoid unzip collisions parallel_code = "-" + str(uuid.uuid4()) if args.parallel is True else "" scratch_dir = args.temp_folder if args.temp_folder else os.path.join( args.download, 'scratch', 'rs_context{}'.format(parallel_code)) safe_makedirs(scratch_dir) meta = parse_metadata(args.meta) try: if args.debug is True: from rscommons.debug import ThreadRun memfile = os.path.join(args.output, 'rs_context_memusage.log') retcode, max_obj = ThreadRun( rs_context, memfile, args.huc, args.existing, args.historic, args.ownership, args.fairmarket, args.ecoregions, args.prism, args.output, args.download, scratch_dir, args.parallel, args.force, meta) log.debug('Return code: {}, [Max process usage] {}'.format( retcode, max_obj)) else: rs_context(args.huc, args.existing, args.historic, args.ownership, args.fairmarket, args.ecoregions, args.prism, args.output, args.download, scratch_dir, args.parallel, args.force, meta) except Exception as e: log.error(e) traceback.print_exc(file=sys.stdout) # Cleaning up the scratch folder is essential safe_remove_dir(scratch_dir) sys.exit(1) # Cleaning up the scratch folder is essential safe_remove_dir(scratch_dir) sys.exit(0)
def rs_context(huc, existing_veg, historic_veg, ownership, fair_market, ecoregions, prism_folder, output_folder, download_folder, scratch_dir, parallel, force_download, meta: Dict[str, str]): """ Download riverscapes context layers for the specified HUC and organize them as a Riverscapes project :param huc: Eight, 10 or 12 digit HUC identification number :param existing_veg: Path to the existing vegetation conditions raster :param historic_veg: Path to the historical vegetation conditions raster :param ownership: Path to the national land ownership Shapefile :param output_folder: Output location for the riverscapes context project :param download_folder: Temporary folder where downloads are cached. This can be shared between rs_context processes :param force_download: If false then downloads can be skipped if the files already exist :param prism_folder: folder containing PRISM rasters in *.bil format :param meta (Dict[str,str]): dictionary of riverscapes metadata key: value pairs :return: """ log = Logger("RS Context") log.info('Starting RSContext v.{}'.format(cfg.version)) try: int(huc) except ValueError: raise Exception( 'Invalid HUC identifier "{}". Must be an integer'.format(huc)) if not (len(huc) in [4, 8, 10, 12]): raise Exception( 'Invalid HUC identifier. Must be 4, 8, 10 or 12 digit integer') safe_makedirs(output_folder) safe_makedirs(download_folder) # We need a temporary folder for slope rasters, Stitching inputs, intermeditary products, etc. scratch_dem_folder = os.path.join(scratch_dir, 'rs_context', huc) safe_makedirs(scratch_dem_folder) project, realization = create_project(huc, output_folder) hydrology_gpkg_path = os.path.join(output_folder, LayerTypes['HYDROLOGY'].rel_path) dem_node, dem_raster = project.add_project_raster(realization, LayerTypes['DEM']) _node, hill_raster = project.add_project_raster(realization, LayerTypes['HILLSHADE']) _node, flow_accum = project.add_project_raster(realization, LayerTypes['FA']) _node, drain_area = project.add_project_raster(realization, LayerTypes['DA']) hand_node, hand_raster = project.add_project_raster( realization, LayerTypes['HAND']) _node, slope_raster = project.add_project_raster(realization, LayerTypes['SLOPE']) _node, existing_clip = project.add_project_raster(realization, LayerTypes['EXVEG']) _node, historic_clip = project.add_project_raster(realization, LayerTypes['HISTVEG']) _node, fair_market_clip = project.add_project_raster( realization, LayerTypes['FAIR_MARKET']) # Download the four digit NHD archive containing the flow lines and watershed boundaries log.info('Processing NHD') # Incorporate project metadata to the riverscapes project if meta is not None: project.add_metadata(meta) nhd_download_folder = os.path.join(download_folder, 'nhd', huc[:4]) nhd_unzip_folder = os.path.join(scratch_dir, 'nhd', huc[:4]) nhd, db_path, huc_name, nhd_url = clean_nhd_data( huc, nhd_download_folder, nhd_unzip_folder, os.path.join(output_folder, 'hydrology'), cfg.OUTPUT_EPSG, False) # Clean up the unzipped files. We won't need them again if parallel: safe_remove_dir(nhd_unzip_folder) project.add_metadata({'Watershed': huc_name}) boundary = 'WBDHU{}'.format(len(huc)) # For coarser rasters than the DEM we need to buffer our clip polygon to include enough pixels # This shouldn't be too much more data because these are usually integer rasters that are much lower res. buffered_clip_path100 = os.path.join( hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['BUFFEREDCLIP100'].rel_path) copy_feature_class(nhd[boundary], buffered_clip_path100, epsg=cfg.OUTPUT_EPSG, buffer=100) buffered_clip_path500 = os.path.join( hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['BUFFEREDCLIP500'].rel_path) copy_feature_class(nhd[boundary], buffered_clip_path500, epsg=cfg.OUTPUT_EPSG, buffer=500) # PRISM climate rasters mean_annual_precip = None bil_files = glob.glob(os.path.join(prism_folder, '**', '*.bil')) if (len(bil_files) == 0): raise Exception('Could not find any .bil files in the prism folder') for ptype in PrismTypes: try: # Next should always be guarded source_raster_path = next( x for x in bil_files if ptype.lower() in os.path.basename(x).lower()) except StopIteration: raise Exception( 'Could not find .bil file corresponding to "{}"'.format(ptype)) _node, project_raster_path = project.add_project_raster( realization, LayerTypes[ptype]) raster_warp(source_raster_path, project_raster_path, cfg.OUTPUT_EPSG, buffered_clip_path500, {"cutlineBlend": 1}) # Use the mean annual precipitation to calculate bankfull width if ptype.lower() == 'ppt': polygon = get_geometry_unary_union(nhd[boundary], epsg=cfg.OUTPUT_EPSG) mean_annual_precip = raster_buffer_stats2( {1: polygon}, project_raster_path)[1]['Mean'] log.info('Mean annual precipitation for HUC {} is {} mm'.format( huc, mean_annual_precip)) project.add_metadata( {'mean_annual_precipitation_mm': str(mean_annual_precip)}) calculate_bankfull_width(nhd['NHDFlowline'], mean_annual_precip) # Add the DB record to the Project XML db_lyr = RSLayer('NHD Tables', 'NHDTABLES', 'SQLiteDB', os.path.relpath(db_path, output_folder)) sqlite_el = project.add_dataset(realization, db_path, db_lyr, 'SQLiteDB') project.add_metadata({'origin_url': nhd_url}, sqlite_el) # Add any results to project XML for name, file_path in nhd.items(): lyr_obj = RSLayer(name, name, 'Vector', os.path.relpath(file_path, output_folder)) vector_nod, _fpath = project.add_project_vector(realization, lyr_obj) project.add_metadata({'origin_url': nhd_url}, vector_nod) states = get_nhd_states(nhd[boundary]) # Download the NTD archive containing roads and rail log.info('Processing NTD') ntd_raw = {} ntd_unzip_folders = [] ntd_urls = get_ntd_urls(states) for state, ntd_url in ntd_urls.items(): ntd_download_folder = os.path.join(download_folder, 'ntd', state.lower()) ntd_unzip_folder = os.path.join( scratch_dir, 'ntd', state.lower(), 'unzipped' ) # a little awkward but I need a folder for this and this was the best name I could find ntd_raw[state] = download_shapefile_collection(ntd_url, ntd_download_folder, ntd_unzip_folder, force_download) ntd_unzip_folders.append(ntd_unzip_folder) ntd_clean = clean_ntd_data(ntd_raw, nhd['NHDFlowline'], nhd[boundary], os.path.join(output_folder, 'transportation'), cfg.OUTPUT_EPSG) # clean up the NTD Unzip folder. We won't need it again if parallel: for unzip_path in ntd_unzip_folders: safe_remove_dir(unzip_path) # Write transportation layers to project file log.info('Write transportation layers to project file') # Add any results to project XML for name, file_path in ntd_clean.items(): lyr_obj = RSLayer(name, name, 'Vector', os.path.relpath(file_path, output_folder)) ntd_node, _fpath = project.add_project_vector(realization, lyr_obj) project.add_metadata({**ntd_urls}, ntd_node) # Download the HAND raster huc6 = huc[0:6] hand_download_folder = os.path.join(download_folder, 'hand') _hpath, hand_url = download_hand(huc6, cfg.OUTPUT_EPSG, hand_download_folder, nhd[boundary], hand_raster, warp_options={"cutlineBlend": 1}) project.add_metadata({'origin_url': hand_url}, hand_node) # download contributing DEM rasters, mosaic and reproject into compressed GeoTIF ned_download_folder = os.path.join(download_folder, 'ned') ned_unzip_folder = os.path.join(scratch_dir, 'ned') dem_rasters, urls = download_dem(nhd[boundary], cfg.OUTPUT_EPSG, 0.01, ned_download_folder, ned_unzip_folder, force_download) need_dem_rebuild = force_download or not os.path.exists(dem_raster) if need_dem_rebuild: raster_vrt_stitch(dem_rasters, dem_raster, cfg.OUTPUT_EPSG, clip=nhd[boundary], warp_options={"cutlineBlend": 1}) verify_areas(dem_raster, nhd[boundary]) # Calculate slope rasters seperately and then stitch them slope_parts = [] hillshade_parts = [] need_slope_build = need_dem_rebuild or not os.path.isfile(slope_raster) need_hs_build = need_dem_rebuild or not os.path.isfile(hill_raster) project.add_metadata( { 'num_rasters': str(len(urls)), 'origin_urls': json.dumps(urls) }, dem_node) for dem_r in dem_rasters: slope_part_path = os.path.join( scratch_dem_folder, 'SLOPE__' + os.path.basename(dem_r).split('.')[0] + '.tif') hs_part_path = os.path.join( scratch_dem_folder, 'HS__' + os.path.basename(dem_r).split('.')[0] + '.tif') slope_parts.append(slope_part_path) hillshade_parts.append(hs_part_path) if force_download or need_dem_rebuild or not os.path.exists( slope_part_path): gdal_dem_geographic(dem_r, slope_part_path, 'slope') need_slope_build = True if force_download or need_dem_rebuild or not os.path.exists( hs_part_path): gdal_dem_geographic(dem_r, hs_part_path, 'hillshade') need_hs_build = True if need_slope_build: raster_vrt_stitch(slope_parts, slope_raster, cfg.OUTPUT_EPSG, clip=nhd[boundary], clean=parallel, warp_options={"cutlineBlend": 1}) verify_areas(slope_raster, nhd[boundary]) else: log.info('Skipping slope build because nothing has changed.') if need_hs_build: raster_vrt_stitch(hillshade_parts, hill_raster, cfg.OUTPUT_EPSG, clip=nhd[boundary], clean=parallel, warp_options={"cutlineBlend": 1}) verify_areas(hill_raster, nhd[boundary]) else: log.info('Skipping hillshade build because nothing has changed.') # Remove the unzipped rasters. We won't need them anymore if parallel: safe_remove_dir(ned_unzip_folder) # Calculate flow accumulation raster based on the DEM log.info('Running flow accumulation and converting to drainage area.') flow_accumulation(dem_raster, flow_accum, dinfinity=False, pitfill=True) flow_accum_to_drainage_area(flow_accum, drain_area) # Clip and re-project the existing and historic vegetation log.info('Processing existing and historic vegetation rasters.') clip_vegetation(buffered_clip_path100, existing_veg, existing_clip, historic_veg, historic_clip, cfg.OUTPUT_EPSG) log.info('Process the Fair Market Value Raster.') raster_warp(fair_market, fair_market_clip, cfg.OUTPUT_EPSG, clip=buffered_clip_path500, warp_options={"cutlineBlend": 1}) # Clip the landownership Shapefile to a 10km buffer around the watershed boundary own_path = os.path.join(output_folder, LayerTypes['OWNERSHIP'].rel_path) project.add_dataset(realization, own_path, LayerTypes['OWNERSHIP'], 'Vector') clip_ownership(nhd[boundary], ownership, own_path, cfg.OUTPUT_EPSG, 10000) ####################################################### # Segmentation ####################################################### # For now let's just make a copy of the NHD FLowlines tmr = Timer() rs_segmentation(nhd['NHDFlowline'], ntd_clean['Roads'], ntd_clean['Rail'], own_path, hydrology_gpkg_path, SEGMENTATION['Max'], SEGMENTATION['Min'], huc) log.debug('Segmentation done in {:.1f} seconds'.format(tmr.ellapsed())) project.add_project_geopackage(realization, LayerTypes['HYDROLOGY']) # Add Bankfull Buffer Polygons bankfull_path = os.path.join( hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['BANKFULL_CHANNEL'].rel_path) bankfull_buffer( os.path.join(hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['NETWORK'].rel_path), cfg.OUTPUT_EPSG, bankfull_path, ) # TODO Add nhd/bankfull union when merge feature classes in vector.ops works with Geopackage layers # bankfull_nhd_path = os.path.join(hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['COMPOSITE_CHANNEL_AREA'].rel_path) # clip_path = os.path.join(hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['BUFFEREDCLIP500'].rel_path) # bankfull_nhd_area(bankfull_path, nhd['NHDArea'], clip_path, cfg.OUTPUT_EPSG, hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['COMPOSITE_CHANNEL_AREA'].rel_path) # Filter the ecoregions Shapefile to only include attributes that intersect with our HUC eco_path = os.path.join(output_folder, 'ecoregions', 'ecoregions.shp') project.add_dataset(realization, eco_path, LayerTypes['ECOREGIONS'], 'Vector') filter_ecoregions(nhd[boundary], ecoregions, eco_path, cfg.OUTPUT_EPSG, 10000) report_path = os.path.join(project.project_dir, LayerTypes['REPORT'].rel_path) project.add_report(realization, LayerTypes['REPORT'], replace=True) report = RSContextReport(report_path, project, output_folder) report.write() log.info('Process completed successfully.') return { 'DEM': dem_raster, 'Slope': slope_raster, 'ExistingVeg': existing_veg, 'HistoricVeg': historic_veg, 'NHD': nhd }
def _rough_convert_metres_to_dataset_units(in_spatial_ref, extent, distance): """DO NOT USE THIS FOR ACCURATE DISTANCES. IT'S GOOD FOR A QUICK CALCULATION WHEN DISTANCE PRECISION ISN'T THAT IMPORTANT Arguments: shapefile_path {[type]} -- [description] distance {[type]} -- [description] Returns: [type] -- [description] """ log = Logger('_rough_convert_metres_to_dataset_units') # If the ShapeFile uses a projected coordinate system in meters then simply return the distance. # If it's projected but in some other units then throw an exception. # If it's in degrees then continue with the code below to convert it to metres. if in_spatial_ref.IsProjected() == 1: if in_spatial_ref.GetAttrValue('unit').lower() in [ 'meter', 'metre', 'm' ]: return distance else: raise Exception( 'Unhandled projected coordinate system linear units: {}'. format(in_spatial_ref.GetAttrValue('unit'))) # Get the centroid of the Shapefile spatial extent extent_ring = ogr.Geometry(ogr.wkbLinearRing) extent_ring.AddPoint(extent[0], extent[2]) extent_ring.AddPoint(extent[1], extent[2]) extent_ring.AddPoint(extent[1], extent[3]) extent_ring.AddPoint(extent[0], extent[3]) extent_ring.AddPoint(extent[0], extent[2]) extent_poly = ogr.Geometry(ogr.wkbPolygon) extent_poly.AddGeometry(extent_ring) extent_centroid = extent_poly.Centroid() # Go diagonally on the extent rectangle pt1_orig = Point(extent[0], extent[2]) pt2_orig = Point(extent[1], extent[3]) orig_dist = pt1_orig.distance(pt2_orig) # Determine the UTM zone by locating the centroid of the shapefile extent # Then get the transformation required to convert to the Shapefile to this UTM zone utm_epsg = get_utm_zone_epsg(extent_centroid.GetX()) in_spatial_ref.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) out_spatial_ref = osr.SpatialReference() out_spatial_ref.ImportFromEPSG(int(utm_epsg)) out_spatial_ref.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) log.debug( 'Original spatial reference is : \n {0} (AxisMappingStrategy:{1})' .format(*get_srs_debug(in_spatial_ref))) log.debug( 'Transform spatial reference is : \n {0} (AxisMappingStrategy:{1})' .format(*get_srs_debug(out_spatial_ref))) transformFwd = osr.CoordinateTransformation(in_spatial_ref, out_spatial_ref) pt1_ogr = ogr.CreateGeometryFromWkb(pt1_orig.wkb) pt2_ogr = ogr.CreateGeometryFromWkb(pt2_orig.wkb) pt1_ogr.Transform(transformFwd) pt2_ogr.Transform(transformFwd) pt1_proj = wkbload(pt1_ogr.ExportToWkb()) pt2_proj = wkbload(pt2_ogr.ExportToWkb()) proj_dist = pt1_proj.distance(pt2_proj) output_distance = (orig_dist / proj_dist) * distance log.info('{}m distance converts to {:.10f} using UTM EPSG {}'.format( distance, output_distance, utm_epsg)) if output_distance > 360: raise Exception( 'Projection Error: \'{:,}\' is larger than the maximum allowed value' .format(output_distance)) return output_distance
def vbet(huc, flowlines_orig, flowareas_orig, orig_slope, json_transforms, orig_dem, hillshade, max_hand, min_hole_area_m, project_folder, reach_codes: List[str], meta: Dict[str, str]): """[summary] Args: huc ([type]): [description] flowlines_orig ([type]): [description] flowareas_orig ([type]): [description] orig_slope ([type]): [description] json_transforms ([type]): [description] orig_dem ([type]): [description] hillshade ([type]): [description] max_hand ([type]): [description] min_hole_area_m ([type]): [description] project_folder ([type]): [description] reach_codes (List[int]): NHD reach codes for features to include in outputs meta (Dict[str,str]): dictionary of riverscapes metadata key: value pairs """ log = Logger('VBET') log.info('Starting VBET v.{}'.format(cfg.version)) project, _realization, proj_nodes = create_project(huc, project_folder) # Incorporate project metadata to the riverscapes project if meta is not None: project.add_metadata(meta) # Copy the inp _proj_slope_node, proj_slope = project.add_project_raster( proj_nodes['Inputs'], LayerTypes['SLOPE_RASTER'], orig_slope) _proj_dem_node, proj_dem = project.add_project_raster( proj_nodes['Inputs'], LayerTypes['DEM'], orig_dem) _hillshade_node, hillshade = project.add_project_raster( proj_nodes['Inputs'], LayerTypes['HILLSHADE'], hillshade) # Copy input shapes to a geopackage inputs_gpkg_path = os.path.join(project_folder, LayerTypes['INPUTS'].rel_path) intermediates_gpkg_path = os.path.join( project_folder, LayerTypes['INTERMEDIATES'].rel_path) flowlines_path = os.path.join( inputs_gpkg_path, LayerTypes['INPUTS'].sub_layers['FLOWLINES'].rel_path) flowareas_path = os.path.join( inputs_gpkg_path, LayerTypes['INPUTS'].sub_layers['FLOW_AREA'].rel_path) # Make sure we're starting with a fresh slate of new geopackages GeopackageLayer.delete(inputs_gpkg_path) GeopackageLayer.delete(intermediates_gpkg_path) copy_feature_class(flowlines_orig, flowlines_path, epsg=cfg.OUTPUT_EPSG) copy_feature_class(flowareas_orig, flowareas_path, epsg=cfg.OUTPUT_EPSG) project.add_project_geopackage(proj_nodes['Inputs'], LayerTypes['INPUTS']) # Create a copy of the flow lines with just the perennial and also connectors inside flow areas network_path = os.path.join( intermediates_gpkg_path, LayerTypes['INTERMEDIATES'].sub_layers['VBET_NETWORK'].rel_path) vbet_network(flowlines_path, flowareas_path, network_path, cfg.OUTPUT_EPSG, reach_codes) # Generate HAND from dem and vbet_network # TODO make a place for this temporary folder. it can be removed after hand is generated. temp_hand_dir = os.path.join(project_folder, "intermediates", "hand_processing") safe_makedirs(temp_hand_dir) hand_raster = os.path.join(project_folder, LayerTypes['HAND_RASTER'].rel_path) create_hand_raster(proj_dem, network_path, temp_hand_dir, hand_raster) project.add_project_raster(proj_nodes['Intermediates'], LayerTypes['HAND_RASTER']) # Build Transformation Tables with sqlite3.connect(intermediates_gpkg_path) as conn: cursor = conn.cursor() # Build tables with open( os.path.join(os.path.abspath(os.path.dirname(__file__)), '..', 'database', 'vbet_schema.sql')) as sqlfile: sql_commands = sqlfile.read() cursor.executescript(sql_commands) conn.commit() # Load tables for sqldata in glob.glob(os.path.join( os.path.abspath(os.path.dirname(__file__)), '..', 'database', 'data', '**', '*.sql'), recursive=True): with open(sqldata) as sqlfile: sql_commands = sqlfile.read() cursor.executescript(sql_commands) conn.commit() # Load transforms from table transforms = load_transform_functions(json_transforms, intermediates_gpkg_path) # Get raster resolution as min buffer and apply bankfull width buffer to reaches with rasterio.open(proj_slope) as raster: t = raster.transform min_buffer = (t[0] + abs(t[4])) / 2 log.info("Buffering Polyine by bankfull width buffers") network_path_buffered = os.path.join( intermediates_gpkg_path, LayerTypes['INTERMEDIATES']. sub_layers['VBET_NETWORK_BUFFERED'].rel_path) buffer_by_field(network_path, network_path_buffered, "BFwidth", cfg.OUTPUT_EPSG, min_buffer) # Rasterize the channel polygon and write to raster log.info('Writing channel raster using slope as a template') flow_area_raster = os.path.join(project_folder, LayerTypes['FLOW_AREA_RASTER'].rel_path) channel_buffer_raster = os.path.join( project_folder, LayerTypes['CHANNEL_BUFFER_RASTER'].rel_path) rasterize(network_path_buffered, channel_buffer_raster, proj_slope) project.add_project_raster(proj_nodes['Intermediates'], LayerTypes['CHANNEL_BUFFER_RASTER']) rasterize(flowareas_path, flow_area_raster, proj_slope) project.add_project_raster(proj_nodes['Intermediates'], LayerTypes['FLOW_AREA_RASTER']) channel_dist_raster = os.path.join(project_folder, LayerTypes['CHANNEL_DISTANCE'].rel_path) fa_dist_raster = os.path.join(project_folder, LayerTypes['FLOW_AREA_DISTANCE'].rel_path) proximity_raster(channel_buffer_raster, channel_dist_raster) proximity_raster(flow_area_raster, fa_dist_raster) project.add_project_raster(proj_nodes["Intermediates"], LayerTypes['CHANNEL_DISTANCE']) project.add_project_raster(proj_nodes["Intermediates"], LayerTypes['FLOW_AREA_DISTANCE']) slope_transform_raster = os.path.join( project_folder, LayerTypes['NORMALIZED_SLOPE'].rel_path) hand_transform_raster = os.path.join( project_folder, LayerTypes['NORMALIZED_HAND'].rel_path) chan_dist_transform_raster = os.path.join( project_folder, LayerTypes['NORMALIZED_CHANNEL_DISTANCE'].rel_path) fa_dist_transform_raster = os.path.join( project_folder, LayerTypes['NORMALIZED_FLOWAREA_DISTANCE'].rel_path) topo_evidence_raster = os.path.join(project_folder, LayerTypes['EVIDENCE_TOPO'].rel_path) channel_evidence_raster = os.path.join( project_folder, LayerTypes['EVIDENCE_CHANNEL'].rel_path) evidence_raster = os.path.join(project_folder, LayerTypes['VBET_EVIDENCE'].rel_path) # Open evidence rasters concurrently. We're looping over windows so this shouldn't affect # memory consumption too much with rasterio.open(proj_slope) as slp_src, \ rasterio.open(hand_raster) as hand_src, \ rasterio.open(channel_dist_raster) as cdist_src, \ rasterio.open(fa_dist_raster) as fadist_src: # All 3 rasters should have the same extent and properties. They differ only in dtype out_meta = slp_src.meta # Rasterio can't write back to a VRT so rest the driver and number of bands for the output out_meta['driver'] = 'GTiff' out_meta['count'] = 1 out_meta['compress'] = 'deflate' # out_meta['dtype'] = rasterio.uint8 # We use this to buffer the output cell_size = abs(slp_src.get_transform()[1]) with rasterio.open(evidence_raster, 'w', **out_meta) as dest_evidence, \ rasterio.open(topo_evidence_raster, "w", **out_meta) as dest, \ rasterio.open(channel_evidence_raster, 'w', **out_meta) as dest_channel, \ rasterio.open(slope_transform_raster, "w", **out_meta) as slope_ev_out, \ rasterio.open(hand_transform_raster, 'w', **out_meta) as hand_ev_out, \ rasterio.open(chan_dist_transform_raster, 'w', **out_meta) as chan_dist_ev_out, \ rasterio.open(fa_dist_transform_raster, 'w', **out_meta) as fa_dist_ev_out: progbar = ProgressBar(len(list(slp_src.block_windows(1))), 50, "Calculating evidence layer") counter = 0 # Again, these rasters should be orthogonal so their windows should also line up for _ji, window in slp_src.block_windows(1): progbar.update(counter) counter += 1 slope_data = slp_src.read(1, window=window, masked=True) hand_data = hand_src.read(1, window=window, masked=True) cdist_data = cdist_src.read(1, window=window, masked=True) fadist_data = fadist_src.read(1, window=window, masked=True) slope_transform = np.ma.MaskedArray(transforms["Slope"]( slope_data.data), mask=slope_data.mask) hand_transform = np.ma.MaskedArray(transforms["HAND"]( hand_data.data), mask=hand_data.mask) channel_dist_transform = np.ma.MaskedArray( transforms["Channel"](cdist_data.data), mask=cdist_data.mask) fa_dist_transform = np.ma.MaskedArray(transforms["Flow Areas"]( fadist_data.data), mask=fadist_data.mask) fvals_topo = slope_transform * hand_transform fvals_channel = np.maximum(channel_dist_transform, fa_dist_transform) fvals_evidence = np.maximum(fvals_topo, fvals_channel) # Fill the masked values with the appropriate nodata vals # Unthresholded in the base band (mostly for debugging) dest.write(np.ma.filled(np.float32(fvals_topo), out_meta['nodata']), window=window, indexes=1) slope_ev_out.write(slope_transform.astype('float32').filled( out_meta['nodata']), window=window, indexes=1) hand_ev_out.write(hand_transform.astype('float32').filled( out_meta['nodata']), window=window, indexes=1) chan_dist_ev_out.write( channel_dist_transform.astype('float32').filled( out_meta['nodata']), window=window, indexes=1) fa_dist_ev_out.write( fa_dist_transform.astype('float32').filled( out_meta['nodata']), window=window, indexes=1) dest_channel.write(np.ma.filled(np.float32(fvals_channel), out_meta['nodata']), window=window, indexes=1) dest_evidence.write(np.ma.filled(np.float32(fvals_evidence), out_meta['nodata']), window=window, indexes=1) progbar.finish() # The remaining rasters get added to the project project.add_project_raster(proj_nodes["Intermediates"], LayerTypes['NORMALIZED_SLOPE']) project.add_project_raster(proj_nodes["Intermediates"], LayerTypes['NORMALIZED_HAND']) project.add_project_raster(proj_nodes["Intermediates"], LayerTypes['NORMALIZED_CHANNEL_DISTANCE']) project.add_project_raster(proj_nodes["Intermediates"], LayerTypes['NORMALIZED_FLOWAREA_DISTANCE']) project.add_project_raster(proj_nodes['Intermediates'], LayerTypes['EVIDENCE_TOPO']) project.add_project_raster(proj_nodes['Intermediates'], LayerTypes['EVIDENCE_CHANNEL']) project.add_project_raster(proj_nodes['Outputs'], LayerTypes['VBET_EVIDENCE']) # Get the length of a meter (roughly) degree_factor = GeopackageLayer.rough_convert_metres_to_raster_units( proj_slope, 1) buff_dist = cell_size min_hole_degrees = min_hole_area_m * (degree_factor**2) # Get the full paths to the geopackages intermed_gpkg_path = os.path.join(project_folder, LayerTypes['INTERMEDIATES'].rel_path) vbet_path = os.path.join(project_folder, LayerTypes['VBET_OUTPUTS'].rel_path) for str_val, thr_val in thresh_vals.items(): plgnize_id = 'THRESH_{}'.format(str_val) with TempRaster('vbet_raw_thresh_{}'.format(plgnize_id)) as tmp_raw_thresh, \ TempRaster('vbet_cleaned_thresh_{}'.format(plgnize_id)) as tmp_cleaned_thresh: log.debug('Temporary threshold raster: {}'.format( tmp_raw_thresh.filepath)) threshold(evidence_raster, thr_val, tmp_raw_thresh.filepath) raster_clean(tmp_raw_thresh.filepath, tmp_cleaned_thresh.filepath, buffer_pixels=1) plgnize_lyr = RSLayer('Raw Threshold at {}%'.format(str_val), plgnize_id, 'Vector', plgnize_id.lower()) # Add a project node for this thresholded vector LayerTypes['INTERMEDIATES'].add_sub_layer(plgnize_id, plgnize_lyr) vbet_id = 'VBET_{}'.format(str_val) vbet_lyr = RSLayer('Threshold at {}%'.format(str_val), vbet_id, 'Vector', vbet_id.lower()) # Add a project node for this thresholded vector LayerTypes['VBET_OUTPUTS'].add_sub_layer(vbet_id, vbet_lyr) # Now polygonize the raster log.info('Polygonizing') polygonize( tmp_cleaned_thresh.filepath, 1, '{}/{}'.format(intermed_gpkg_path, plgnize_lyr.rel_path), cfg.OUTPUT_EPSG) log.info('Done') # Now the final sanitization sanitize(str_val, '{}/{}'.format(intermed_gpkg_path, plgnize_lyr.rel_path), '{}/{}'.format(vbet_path, vbet_lyr.rel_path), buff_dist, network_path) log.info('Completed thresholding at {}'.format(thr_val)) # Now add our Geopackages to the project XML project.add_project_geopackage(proj_nodes['Intermediates'], LayerTypes['INTERMEDIATES']) project.add_project_geopackage(proj_nodes['Outputs'], LayerTypes['VBET_OUTPUTS']) report_path = os.path.join(project.project_dir, LayerTypes['REPORT'].rel_path) project.add_report(proj_nodes['Outputs'], LayerTypes['REPORT'], replace=True) report = VBETReport(report_path, project) report.write() log.info('VBET Completed Successfully')
def segment_network(inpath: str, outpath: str, interval: float, minimum: float, watershed_id: str, create_layer=False): """ Chop the lines in a polyline feature class at the specified interval unless this would create a line less than the minimum in which case the line is not segmented. :param inpath: Original network feature class :param outpath: Output segmented network feature class :param interval: Distance at which to segment each line feature (map units) :param minimum: Minimum length below which lines are not segmented (map units) :param watershed_id: Give this watershed an id (str) :param create_layer: This layer may be created earlier. We can choose to create it. Defaults to false (bool) :return: None """ log = Logger('Segment Network') if interval <= 0: log.info('Skipping segmentation.') else: log.info( 'Segmenting network to {}m, with minimum feature length of {}m'. format(interval, minimum)) log.info('Segmenting network from {0}'.format(inpath)) # NOTE: Remember to always open the 'write' layer first in case it's the same geopackage with get_shp_or_gpkg( outpath, write=True) as out_lyr, get_shp_or_gpkg(inpath) as in_lyr: # Get the input NHD flow lines layer srs = in_lyr.spatial_ref feature_count = in_lyr.ogr_layer.GetFeatureCount() log.info('Input feature count {:,}'.format(feature_count)) # Get the closest EPSG possible to calculate length extent_poly = ogr.Geometry(ogr.wkbPolygon) extent_centroid = extent_poly.Centroid() utm_epsg = get_utm_zone_epsg(extent_centroid.GetX()) transform_ref, transform = VectorBase.get_transform_from_epsg( in_lyr.spatial_ref, utm_epsg) # IN order to get accurate lengths we are going to need to project into some coordinate system transform_back = osr.CoordinateTransformation(transform_ref, srs) # Create the output shapefile if create_layer is True: out_lyr.create_layer_from_ref(in_lyr) # We add two features to this out_lyr.create_fields({ 'ReachID': ogr.OFTInteger, 'WatershedID': ogr.OFTString }) # Retrieve all input features keeping track of which ones have GNIS names or not named_features = {} all_features = [] junctions = [] # Omit pipelines with FCode 428** attribute_filter = 'FCode < 42800 OR FCode > 42899' log.info('Filtering out pipelines ({})'.format(attribute_filter)) for in_feature, _counter, _progbar in in_lyr.iterate_features( "Loading Network", attribute_filter=attribute_filter): # Store relevant items as a tuple: # (name, FID, StartPt, EndPt, Length, FCode) s_feat = SegmentFeature(in_feature, transform) # Add the end points of all lines to a single list junctions.extend([s_feat.start, s_feat.end]) if not s_feat.name or len(s_feat.name) < 1 or interval <= 0: # Add features without a GNIS name to list. Also add to list if not segmenting all_features.append(s_feat) else: # Build separate lists for each unique GNIS name if s_feat.name not in named_features: named_features[s_feat.name] = [s_feat] else: named_features[s_feat.name].append(s_feat) # Loop over all features with the same GNIS name. # Only merge them if they meet at a junction where no other lines meet. log.info('Merging simple features with the same GNIS name...') for name, features in named_features.items(): log.debug(' {} x{}'.format(name.encode('utf-8'), len(features))) all_features.extend(features) log.info( '{:,} features after merging. Starting segmentation...'.format( len(all_features))) # Segment the features at the desired interval # rid = 0 log.info('Segmenting Network...') progbar = ProgressBar(len(all_features), 50, "Segmenting") counter = 0 for orig_feat in all_features: counter += 1 progbar.update(counter) old_feat = in_lyr.ogr_layer.GetFeature(orig_feat.fid) old_geom = old_feat.GetGeometryRef() # Anything that produces reach shorter than the minimum just gets added. Also just add features if not segmenting if orig_feat.length_m < (interval + minimum) or interval <= 0: new_ogr_feat = ogr.Feature(out_lyr.ogr_layer_def) copy_fields(old_feat, new_ogr_feat, in_lyr.ogr_layer_def, out_lyr.ogr_layer_def) # Set the attributes using the values from the delimited text file new_ogr_feat.SetField("GNIS_NAME", orig_feat.name) new_ogr_feat.SetField("WatershedID", watershed_id) new_ogr_feat.SetGeometry(old_geom) out_lyr.ogr_layer.CreateFeature(new_ogr_feat) # rid += 1 else: # From here on out we use shapely and project to UTM. We'll transform back before writing to disk. new_geom = old_geom.Clone() new_geom.Transform(transform) remaining = LineString(new_geom.GetPoints()) while remaining and remaining.length >= (interval + minimum): part1shply, part2shply = cut(remaining, interval) remaining = part2shply new_ogr_feat = ogr.Feature(out_lyr.ogr_layer_def) copy_fields(old_feat, new_ogr_feat, in_lyr.ogr_layer_def, out_lyr.ogr_layer_def) # Set the attributes using the values from the delimited text file new_ogr_feat.SetField("GNIS_NAME", orig_feat.name) new_ogr_feat.SetField("WatershedID", watershed_id) geo = ogr.CreateGeometryFromWkt(part1shply.wkt) geo.Transform(transform_back) new_ogr_feat.SetGeometry(geo) out_lyr.ogr_layer.CreateFeature(new_ogr_feat) # rid += 1 # Add any remaining line to outGeometries if remaining: new_ogr_feat = ogr.Feature(out_lyr.ogr_layer_def) copy_fields(old_feat, new_ogr_feat, in_lyr.ogr_layer_def, out_lyr.ogr_layer_def) # Set the attributes using the values from the delimited text file new_ogr_feat.SetField("GNIS_NAME", orig_feat.name) new_ogr_feat.SetField("WatershedID", watershed_id) geo = ogr.CreateGeometryFromWkt(remaining.wkt) geo.Transform(transform_back) new_ogr_feat.SetGeometry(geo) out_lyr.ogr_layer.CreateFeature(new_ogr_feat) # rid += 1 progbar.finish() log.info(('{:,} features written to {:}'.format( out_lyr.ogr_layer.GetFeatureCount(), outpath))) log.info('Process completed successfully.')
def unzip(file_path, destination_folder, force_overwrite=False, retries=3): """[summary] Args: file_path: Full path to an existing zip archive destination_folder: Path where the zip archive will be unzipped force_overwrite (bool, optional): Force overwrite of a file if it's already there. Defaults to False. retries (int, optional): Number of retries on a single file. Defaults to 3. Raises: Exception: [description] Exception: [description] Exception: [description] """ log = Logger('Unzipper') if not os.path.isfile(file_path): raise Exception('Unzip error: file not found: {}'.format(file_path)) try: log.info('Attempting unzip: {} ==> {}'.format(file_path, destination_folder)) zip_ref = zipfile.ZipFile(file_path, 'r') # only unzip files we don't already have safe_makedirs(destination_folder) log.info('Extracting: {}'.format(file_path)) # Only unzip things we haven't already unzipped for fitem in zip_ref.filelist: uz_success = False uz_retry = 0 while not uz_success and uz_retry < retries: try: outfile = os.path.join(destination_folder, fitem.filename) if fitem.is_dir(): if not os.path.isdir(outfile): zip_ref.extract(fitem, destination_folder) log.debug(' (creating) {}'.format(fitem.filename)) else: log.debug(' (skipping) {}'.format(fitem.filename)) else: if force_overwrite or (fitem.file_size > 0 and not os.path.isfile(outfile)) or (os.path.getsize(outfile) / fitem.file_size) < 0.99999: log.debug(' (unzipping) {}'.format(fitem.filename)) zip_ref.extract(fitem, destination_folder) else: log.debug(' (skipping) {}'.format(fitem.filename)) uz_success = True except Exception as e: log.debug(e) log.warning('unzipping file failed. waiting 3 seconds and retrying...') time.sleep(3) uz_retry += 1 if (not uz_success): raise Exception('Unzipping of file {} failed after {} attempts'.format(fitem.filename, retries)) zip_ref.close() log.info('Done') except zipfile.BadZipFile as e: # If the zip file is bad then we have to remove it. log.error('BadZipFile. Cleaning up zip file and output folder') safe_remove_file(file_path) safe_remove_dir(destination_folder) raise Exception('Unzip error: BadZipFile') except Exception as e: log.error('Error unzipping. Cleaning up output folder') safe_remove_dir(destination_folder) raise Exception('Unzip error: file could not be unzipped')
def verify_areas(raster_path, boundary_shp): """[summary] Arguments: raster_path {[type]} -- path boundary_shp {[type]} -- path Raises: Exception: [description] if raster area is zero Exception: [description] if shapefile area is zero Returns: [type] -- rastio of raster area over shape file area """ log = Logger('Verify Areas') log.info('Verifying raster and shape areas') # This comes back in the raster's unit raster_area = 0 with rasterio.open(raster_path) as ds: cell_count = 0 gt = ds.get_transform() cell_area = math.fabs(gt[1]) * math.fabs(gt[5]) # Incrememntally add the area of a block to the count progbar = ProgressBar(len(list(ds.block_windows(1))), 50, "Calculating Area") progcount = 0 for _ji, window in ds.block_windows(1): r = ds.read(1, window=window, masked=True) progbar.update(progcount) cell_count += r.count() progcount += 1 progbar.finish() # Multiply the count by the area of a given cell raster_area = cell_area * cell_count log.debug('raster area {}'.format(raster_area)) if (raster_area == 0): raise Exception('Raster has zero area: {}'.format(raster_path)) # We could just use Rasterio's CRS object but it doesn't seem to play nice with GDAL so.... raster_ds = gdal.Open(raster_path) raster_srs = osr.SpatialReference(wkt=raster_ds.GetProjection()) # Load and transform ownership polygons by adminstration agency driver = ogr.GetDriverByName("ESRI Shapefile") data_source = driver.Open(boundary_shp, 0) layer = data_source.GetLayer() in_spatial_ref = layer.GetSpatialRef() # https://github.com/OSGeo/gdal/issues/1546 raster_srs.SetAxisMappingStrategy(in_spatial_ref.GetAxisMappingStrategy()) transform = osr.CoordinateTransformation(in_spatial_ref, raster_srs) shape_area = 0 for polygon in layer: geom = polygon.GetGeometryRef() geom.Transform(transform) shape_area = shape_area + geom.GetArea() log.debug('shape file area {}'.format(shape_area)) if (shape_area == 0): raise Exception('Shapefile has zero area: {}'.format(boundary_shp)) area_ratio = raster_area / shape_area if (area_ratio < 0.99 and area_ratio > 0.9): log.warning('Raster Area covers only {0:.2f}% of the shapefile'.format( area_ratio * 100)) if (area_ratio <= 0.9): log.error('Raster Area covers only {0:.2f}% of the shapefile'.format( area_ratio * 100)) else: log.info('Raster Area covers {0:.2f}% of the shapefile'.format( area_ratio * 100)) return area_ratio
def process_modis(out_sqlite, modis_folder, nhd_folder, verbose, debug_flag): """Generate land surface temperature sqlite db from NHD+ and MODIS data """ log = Logger("Process LST") if os.path.isfile(out_sqlite): os.remove(out_sqlite) # Create sqlite database conn = sqlite3.connect(out_sqlite) cursor = conn.cursor() # test if table exists? cursor.execute( """SELECT COUNT(name) FROM sqlite_master WHERE type='table' AND name='MODIS_LST' """ ) log.info('Creating DB') if cursor.fetchone()[0] == 0: cursor.execute(""" CREATE TABLE MODIS_LST ( NHDPlusID INTEGER NOT NULL, MODIS_Scene DATETIME NOT NULL, LST REAL, PRIMARY KEY ( NHDPlusID, MODIS_Scene ) ) WITHOUT ROWID; """) conn.commit() # populate list of modis files modis_files = glob.glob(os.path.join(modis_folder, "*.tif")) # Load NHD Layers log.info(f"Processing NHD Data: {nhd_folder}") in_driver = ogr.GetDriverByName("OpenFileGDB") in_datasource = in_driver.Open(nhd_folder, 0) layer_hucs = in_datasource.GetLayer(r"WBDHU8_reproject") # Process HUC huc_counter = 0 total_hucs = layer_hucs.GetFeatureCount() for huc in layer_hucs: huc_counter += 1 huc_id = huc.GetField(r"HUC8") log.info('Processing huc:{} ({}/{})'.format(huc_id, huc_counter, total_hucs)) log.info(f"HUC: {huc_id}") huc_geom = huc.GetGeometryRef() layer_catchments = None layer_catchments = in_datasource.GetLayer( r"NHDPlusCatchment_reproject") # layer_catchments.SetSpatialFilter(huc_geom) catchments not perfectly aligned with hucs layer_catchments.SetAttributeFilter(f"""HUC8 = {huc_id}""") huc_bounds = huc_geom.GetEnvelope() bbox = box(huc_bounds[0], huc_bounds[2], huc_bounds[1], huc_bounds[3]) # open a single MODIS raster and load its projection and transform based on current huc with rasterio.open(f"{modis_files[0]}") as dataset: data, modis_transform = mask(dataset, [bbox], all_touched=True, crop=True) # Assuming there is only one band we can drop the first dimenson and get (36,78) instead of (1,36,78) modis_shape = data.shape[1:] # Read all MODIS Scences into array modis_array_raw = np.ma.array( [load_cropped_raster(image, bbox) for image in modis_files]) modis_array_sds = np.ma.masked_where(modis_array_raw == 0, modis_array_raw) # Make sure we mask out the invalid data modis_array_K = modis_array_sds * 0.02 modis_array_C = modis_array_K - 273.15 # K to C # Generate list of MODIS scene dates modis_dates = np.array([ os.path.basename(image).lstrip("A").rstrip(".tif") for image in modis_files ]) # Calcuate average LST per Catchemnt Layer progbar = ProgressBar(layer_catchments.GetFeatureCount(), 50, 'Processing HUC: {}'.format(huc_id)) reach_counter = 0 progbar.update(reach_counter) # loop_timer = LoopTimer("LoopTime", useMs=True) for reach in layer_catchments: reach_counter += 1 progbar.update(reach_counter) # If debug flag is set then drop a CSV for every 5000 reaches debug_drop = debug_flag is True and reach_counter % 5000 == 1 # For Debugging performance # loop_timer.tick() # loop_timer.progprint() nhd_id = int(reach.GetField("NHDPlusID")) # load_catchment_polygon and transform to raster SRS reach_geom = reach.GetGeometryRef() catch_poly = loads(reach_geom.ExportToWkb()) # Catchment polygons are vectorized rasters and they can have invalid geometries if not catch_poly.is_valid: log.warning( 'Invalid catchment polygon detected. Trying the buffer technique: {}' .format(nhd_id)) catch_poly = catch_poly.buffer(0) # Generate mask raster of catchment pixels reach_raster = np.ma.masked_invalid( rasterio.features.rasterize([catch_poly], out_shape=modis_shape, transform=modis_transform, all_touched=True, fill=np.nan)) # Now assign ascending integers to each cell. THis is so the rasterio.features.shapes gives us a unique shape for every cell reach_raster_idx = np.ma.masked_array( np.arange(modis_shape[0] * modis_shape[1], dtype=np.int32).reshape(modis_shape), # pylint: disable=E1101 reach_raster.mask) # Generate a unique shape for each valid pixel geoms = [{ 'properties': { 'name': 'modis_pixel', 'raster_val': int(v), 'valid': v > 0 }, 'geometry': geom } for i, (geom, v) in enumerate( rasterio.features.shapes(reach_raster_idx, transform=modis_transform)) if test_pixel_geom(geom)] # Now create our weights array. Start with weights of 0 so we can rule out any weird points weights_raster_arr = np.ma.masked_array( np.full(modis_shape, 0, dtype=np.float32), # pylint: disable=E1101 reach_raster.mask, ) for geom in geoms: pxl = shape(geom['geometry']) poly_intersect = pxl.intersection(catch_poly) idx, idy = find_indeces(geom['properties']['raster_val'], modis_shape) weight = poly_intersect.area / catch_poly.area # For debugging if debug_drop: geom['type'] = "Feature" geom['properties']['weight'] = weight geom['properties']['raster_coords'] = [idx, idy] geom['properties']['world_coords'] = [ pxl.centroid.coords[0][0], pxl.centroid.coords[0][1] ] weights_raster_arr[idx][idy] = weight # Calculate average weighted modis ave = np.ma.average(modis_array_C, axis=(1, 2), weights=np.broadcast_to( weights_raster_arr, modis_array_C.shape)) # Just some useful debugging stuff if debug_drop: progbar.erase() file_prefix = '{}-{}-debug'.format(huc_id, nhd_id) log.debug('Dropping files: {}'.format(file_prefix)) # PrintArr(reach_raster_idx) # Dump some useful shapes to a geojson Object _debug_shape = DebugGeoJSON( os.path.join(os.path.dirname(out_sqlite), '{}.geojson'.format(file_prefix))) _debug_shape.add_shapely(bbox, {"name": "bbox"}) _debug_shape.add_shapely(catch_poly, {"name": "catch_poly"}) [_debug_shape.add_geojson(gj) for gj in geoms] _debug_shape.write() # Now dump an CSV array report for fun csv_file = os.path.join(os.path.dirname(out_sqlite), '{}.csv'.format(file_prefix)) with open(csv_file, 'w') as csv_file: csvw = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) csvw.writerow(['HUC', 'NHDPlusID', 'Area']) csvw.writerow([huc_id, nhd_id, catch_poly.area]) csvw.writerow([]) debug_weights = [] # Summary of intersected pixels for geom in geoms: debug_weights.append( (geom['properties']['weight'], geom['properties']['raster_coords'])) # Dump the weights Cell values so we can use excel to calculate them manually # Write the average and the csvw.writerow(['Intersecting Cells:'] + [' ' for g in geoms]) for key, name in { 'raster_val': 'cell_id', 'raster_coords': '[row,col]', 'world_coords': '[x,y]', 'weight': 'weight' }.items(): csvw.writerow([name] + [g['properties'][key] for g in geoms]) csvw.writerow([]) csvw.writerow(['Date'] + [' ' for g in geoms] + ['np.ma.average']) for didx, ave_val in enumerate(ave): csvw.writerow([modis_dates[didx]] + [ modis_array_sds[didx][w[1][0]][w[1][1]] for w in debug_weights ] + [ave_val]) # insert_lst_into_sqlite cursor.executemany("""INSERT INTO MODIS_LST VALUES(?,?,?)""", [ (nhd_id, datetime.datetime.strptime(modis_date, "%Y%j").date(), float(v) if float(v) != 0 else None) for (modis_date, v) in zip(modis_dates, ave.data) ]) # Write data to sqlite after each reach conn.commit() # Close database connection conn.close() return
def get_geometry_unary_union_from_wkt(inpath, to_sr_wkt): """ Load all features from a ShapeFile and union them together into a single geometry :param inpath: Path to a ShapeFile :param epsg: Desired output spatial reference :return: Single Shapely geometry of all unioned features """ log = Logger('Unary Union') driver = ogr.GetDriverByName("ESRI Shapefile") data_source = driver.Open(inpath, 0) layer = data_source.GetLayer() in_spatial_ref = layer.GetSpatialRef() out_spatial_ref, transform = get_transform_from_wkt( in_spatial_ref, to_sr_wkt) fcount = layer.GetFeatureCount() progbar = ProgressBar(fcount, 50, "Unary Unioning features") counter = 0 def unionize(wkb_lst): return unary_union([wkbload(g) for g in wkb_lst]).wkb geom_list = [] for feature in layer: counter += 1 progbar.update(counter) new_geom = feature.GetGeometryRef() geo_type = new_geom.GetGeometryType() # We can't union non-valid shapes but sometimes a buffer by 0 can help if not new_geom.IsValid(): progbar.erase() # get around the progressbar log.warning( 'Invalid shape with FID={} trying the Buffer0 technique...'. format(feature.GetFID())) try: new_geom = new_geom.Buffer(0) if not new_geom.IsValid(): progbar.erase() # get around the progressbar log.warning(' Still invalid. Skipping this geometry') continue except Exception as e: progbar.erase() # get around the progressbar log.warning( 'Exception raised during buffer 0 technique. skipping this file' ) continue if new_geom is None: progbar.erase() # get around the progressbar log.warning('Feature with FID={} has no geoemtry. Skipping'.format( feature.GetFID())) # Filter out zero-length lines elif geo_type in LINE_TYPES and new_geom.Length() == 0: progbar.erase() # get around the progressbar log.warning('Zero Length for shape with FID={}'.format( feature.GetFID())) # Filter out zero-area polys elif geo_type in POLY_TYPES and new_geom.Area() == 0: progbar.erase() # get around the progressbar log.warning('Zero Area for shape with FID={}'.format( feature.GetFID())) else: new_geom.Transform(transform) geom_list.append(new_geom.ExportToWkb()) # IF we get past a certain size then run the union if len(geom_list) >= 500: geom_list = [unionize(geom_list)] new_geom = None log.debug('finished iterating with list of size: {}'.format( len(geom_list))) progbar.finish() if len(geom_list) > 1: log.debug('Starting final union of geom_list of size: {}'.format( len(geom_list))) # Do a final union to clean up anything that might still be in the list geom_union = wkbload(unionize(geom_list)) elif len(geom_list) == 0: log.warning('No geometry found to union') return None else: log.debug('FINAL Unioning geom_list of size {}'.format(len(geom_list))) geom_union = wkbload(geom_list[0]) log.debug(' done') print_geom_size(log, geom_union) log.debug('Complete') data_source = None return geom_union
def download_file(s3_url, download_folder, force_download=False): """ Download a file given a HTTPS URL that points to a file on S3 :param s3_url: HTTPS URL for a file on S3 :param download_folder: Folder where the file will be downloaded. :param force_download: :return: Local file path where the file was downloaded """ log = Logger('Download') safe_makedirs(download_folder) # Retrieve the S3 bucket and path from the HTTPS URL result = re.match(r'https://([^.]+)[^/]+/(.*)', s3_url) # If file already exists and forcing download then ensure unique file name file_path = os.path.join(download_folder, os.path.basename(result.group(2))) file_path_pending = os.path.join(download_folder, os.path.basename(result.group(2)) + '.pending') if os.path.isfile(file_path) and force_download: safe_remove_file(file_path) # If there is a pending path and the pending path is fairly new # then wait for it. while pending_check(file_path_pending, PENDING_TIMEOUT): log.debug('Waiting for .pending file. Another process is working on this.') time.sleep(30) log.info('Waiting done. Proceeding.') # Skip the download if the file exists if os.path.isfile(file_path) and os.path.getsize(file_path) > 0: log.info('Skipping download because file exists.') else: _file, tmpfilepath = tempfile.mkstemp(suffix=".temp", prefix="rstools_download") # Write our pending file. No matter what we must clean this file up!!! def refresh_pending(init=False): with open(file_path_pending, 'w') as f: f.write(str(datetime.datetime.now())) # Cleaning up the commone areas is really important def download_cleanup(): os.close(_file) safe_remove_file(tmpfilepath) safe_remove_file(file_path_pending) refresh_pending() pending_timer = Timer() log.info('Downloading {}'.format(s3_url)) # Actual file download for download_retries in range(MAX_ATTEMPTS): if download_retries > 0: log.warning('Download file retry: {}'.format(download_retries)) try: dl = 0 _file, tmpfilepath = tempfile.mkstemp(suffix=".temp", prefix="rstools_download") with requests.get(s3_url, stream=True) as r: r.raise_for_status() byte_total = int(r.headers.get('content-length')) progbar = ProgressBar(byte_total, 50, s3_url, byteFormat=True) # Binary write to file with open(tmpfilepath, 'wb') as tempf: for chunk in r.iter_content(chunk_size=8192): # Periodically refreshing our .pending file # so other processes will be aware we are still working on it. if pending_timer.ellapsed() > 10: refresh_pending() if chunk: # filter out keep-alive new chunks dl += len(chunk) tempf.write(chunk) progbar.update(dl) # Close the temporary file. It will be removed if (not os.path.isfile(tmpfilepath)): raise Exception('Error writing to temporary file: {}'.format(tmpfilepath)) progbar.finish() break except Exception as e: log.debug('Error downloading file from s3 {}: \n{}'.format(s3_url, str(e))) # if this is our last chance then the function must fail [0,1,2] if download_retries == MAX_ATTEMPTS - 1: download_cleanup() # Always clean up raise e # Now copy the temporary file (retry 3 times) for copy_retries in range(MAX_ATTEMPTS): if copy_retries > 0: log.warning('Copy file retry: {}'.format(copy_retries)) try: shutil.copy(tmpfilepath, file_path) # Make sure to clean up so the next process doesn't encounter a broken file if not file_compare(file_path, tmpfilepath): raise Exception('Error copying temporary download to final path') break except Exception as e: log.debug('Error copying file from temporary location {}: \n{}'.format(tmpfilepath, str(e))) # if this is our last chance then the function must fail [0,1,2] if copy_retries == MAX_ATTEMPTS - 1: download_cleanup() # Always clean up raise e download_cleanup() # Always clean up return file_path
def main(): """ Main BRAT Build routine """ parser = argparse.ArgumentParser( description='Build the inputs for an eventual brat_run:', # epilog="This is an epilog" ) parser.add_argument('huc', help='huc input', type=str) parser.add_argument('dem', help='dem input', type=str) parser.add_argument('slope', help='slope input', type=str) parser.add_argument('hillshade', help='hillshade input', type=str) parser.add_argument('flowlines', help='flowlines input', type=str) parser.add_argument('existing_veg', help='existing_veg input', type=str) parser.add_argument('historical_veg', help='historical_veg input', type=str) parser.add_argument('valley_bottom', help='Valley bottom shapeFile', type=str) parser.add_argument('roads', help='Roads shapeFile', type=str) parser.add_argument('rail', help='Railways shapefile', type=str) parser.add_argument('canals', help='Canals shapefile', type=str) parser.add_argument('ownership', help='Ownership shapefile', type=str) parser.add_argument('streamside_buffer', help='streamside_buffer input', type=float) parser.add_argument('riparian_buffer', help='riparian_buffer input', type=float) parser.add_argument('elevation_buffer', help='elevation_buffer input', type=float) parser.add_argument('output_folder', help='output_folder input', type=str) parser.add_argument('--reach_codes', help='Comma delimited reach codes (FCode) to retain when filtering features. Omitting this option retains all features.', type=str) parser.add_argument('--canal_codes', help='Comma delimited reach codes (FCode) representing canals. Omitting this option retains all features.', type=str) parser.add_argument('--peren_codes', help='Comma delimited reach codes (FCode) representing perennial features', type=str) parser.add_argument('--flow_areas', help='(optional) path to the flow area polygon feature class containing artificial paths', type=str) parser.add_argument('--waterbodies', help='(optional) waterbodies input', type=str) parser.add_argument('--max_waterbody', help='(optional) maximum size of small waterbody artificial flows to be retained', type=float) parser.add_argument('--meta', help='riverscapes project metadata as comma separated key=value pairs', type=str) parser.add_argument('--verbose', help='(optional) a little extra logging ', action='store_true', default=False) parser.add_argument('--debug', help='(optional) more output about things like memory usage. There is a performance cost', action='store_true', default=False) # Substitute patterns for environment varaibles args = dotenv.parse_args_env(parser) reach_codes = args.reach_codes.split(',') if args.reach_codes else None canal_codes = args.canal_codes.split(',') if args.canal_codes else None peren_codes = args.peren_codes.split(',') if args.peren_codes else None # Initiate the log file log = Logger("BRAT Build") log.setup(logPath=os.path.join(args.output_folder, "brat_build.log"), verbose=args.verbose) log.title('BRAT Build Tool For HUC: {}'.format(args.huc)) meta = parse_metadata(args.meta) try: if args.debug is True: from rscommons.debug import ThreadRun memfile = os.path.join(args.output_folder, 'brat_build_memusage.log') retcode, max_obj = ThreadRun(brat_build, memfile, args.huc, args.flowlines, args.dem, args.slope, args.hillshade, args.existing_veg, args.historical_veg, args.output_folder, args.streamside_buffer, args.riparian_buffer, reach_codes, canal_codes, peren_codes, args.flow_areas, args.waterbodies, args.max_waterbody, args.valley_bottom, args.roads, args.rail, args.canals, args.ownership, args.elevation_buffer, meta ) log.debug('Return code: {}, [Max process usage] {}'.format(retcode, max_obj)) else: brat_build( args.huc, args.flowlines, args.dem, args.slope, args.hillshade, args.existing_veg, args.historical_veg, args.output_folder, args.streamside_buffer, args.riparian_buffer, reach_codes, canal_codes, peren_codes, args.flow_areas, args.waterbodies, args.max_waterbody, args.valley_bottom, args.roads, args.rail, args.canals, args.ownership, args.elevation_buffer, meta ) except Exception as ex: log.error(ex) traceback.print_exc(file=sys.stdout) sys.exit(1) sys.exit(0)
def main(): parser = argparse.ArgumentParser(description='Riverscapes VBET Tool', # epilog="This is an epilog" ) parser.add_argument('huc', help='NHD flow line ShapeFile path', type=str) parser.add_argument('flowlines', help='NHD flow line ShapeFile path', type=str) parser.add_argument('flowareas', help='NHD flow areas ShapeFile path', type=str) parser.add_argument('slope', help='Slope raster path', type=str) parser.add_argument('dem', help='DEM raster path', type=str) parser.add_argument('hillshade', help='Hillshade raster path', type=str) parser.add_argument( 'output_dir', help='Folder where output VBET project will be created', type=str) parser.add_argument( '--reach_codes', help= 'Comma delimited reach codes (FCode) to retain when filtering features. Omitting this option retains all features.', type=str) parser.add_argument('--max_slope', help='Maximum slope to be considered', type=float, default=12) parser.add_argument('--max_hand', help='Maximum HAND to be considered', type=float, default=50) parser.add_argument('--min_hole_area', help='Minimum hole retained in valley bottom (sq m)', type=float, default=50000) parser.add_argument( '--meta', help='riverscapes project metadata as comma separated key=value pairs', type=str) parser.add_argument('--verbose', help='(optional) a little extra logging ', action='store_true', default=False) parser.add_argument( '--debug', help= 'Add debug tools for tracing things like memory usage at a performance cost.', action='store_true', default=False) args = dotenv.parse_args_env(parser) # make sure the output folder exists safe_makedirs(args.output_dir) # Initiate the log file log = Logger('VBET') log.setup(logPath=os.path.join(args.output_dir, 'vbet.log'), verbose=args.verbose) log.title('Riverscapes VBET For HUC: {}'.format(args.huc)) meta = parse_metadata(args.meta) json_transform = json.dumps({ "Slope": 1, "HAND": 2, "Channel": 3, "Flow Areas": 4 }) reach_codes = args.reach_codes.split(',') if args.reach_codes else None try: if args.debug is True: from rscommons.debug import ThreadRun memfile = os.path.join(args.output_dir, 'vbet_mem.log') retcode, max_obj = ThreadRun(vbet, memfile, args.huc, args.flowlines, args.flowareas, args.slope, json_transform, args.dem, args.hillshade, args.max_hand, args.min_hole_area, args.output_dir, reach_codes, meta) log.debug('Return code: {}, [Max process usage] {}'.format( retcode, max_obj)) else: vbet(args.huc, args.flowlines, args.flowareas, args.slope, json_transform, args.dem, args.hillshade, args.max_hand, args.min_hole_area, args.output_dir, reach_codes, meta) except Exception as e: log.error(e) traceback.print_exc(file=sys.stdout) sys.exit(1) sys.exit(0)
def main(): parser = argparse.ArgumentParser(description='Confinement Tool') parser.add_argument('huc', help='HUC identifier', type=str) parser.add_argument('flowlines', help="NHD Flowlines (.shp, .gpkg/layer_name)", type=str) parser.add_argument( 'confining_polygon', help= 'valley bottom or other polygon representing confining boundary (.shp, .gpkg/layer_name)', type=str) parser.add_argument('output_folder', help='Output folder', type=str) parser.add_argument( 'buffer_field', help='(optional) float field in flowlines with buffer values', default=None) parser.add_argument('confinement_type', help='type of confinement', default="Unspecified") parser.add_argument( '--reach_codes', help= 'Comma delimited reach codes (FCode) to retain when filtering features. Omitting this option retains all features.', type=str) parser.add_argument( '--meta', help='riverscapes project metadata as comma separated key=value pairs', type=str) parser.add_argument('--verbose', help='(optional) a little extra logging ', action='store_true', default=False) parser.add_argument( '--debug', help="(optional) save intermediate outputs for debugging", action='store_true', default=False) args = dotenv.parse_args_env(parser) # Initiate the log file log = Logger("Confinement") log.setup(logPath=os.path.join(args.output_folder, "confinement.log"), verbose=args.verbose) log.title('Confinement For HUC: {}'.format(args.huc)) meta = parse_metadata(args.meta) reach_codes = args.reach_codes.split(',') if args.reach_codes else None try: if args.debug is True: from rscommons.debug import ThreadRun memfile = os.path.join(args.output_folder, 'confinement_mem.log') retcode, max_obj = ThreadRun(confinement, memfile, args.huc, args.flowlines, args.confining_polygon, args.output_folder, args.buffer_field, args.confinement_type, reach_codes, min_buffer=10.0, bankfull_expansion_factor=2.5, debug=args.debug, meta=meta) log.debug('Return code: {}, [Max process usage] {}'.format( retcode, max_obj)) else: confinement(args.huc, args.flowlines, args.confining_polygon, args.output_folder, args.buffer_field, args.confinement_type, reach_codes, min_buffer=10.0, bankfull_expansion_factor=2.5, debug=args.debug, meta=meta) except Exception as e: log.error(e) traceback.print_exc(file=sys.stdout) sys.exit(1) sys.exit(0)
class RSReport(): def __init__(self, rs_project, filepath): self.log = Logger('Report') self.template_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'templates') self.log.info('Creating report at {}'.format(filepath)) self.xml_project = rs_project self.filepath = filepath self.css_files = [] self.footer = '' if os.path.isfile(self.filepath): os.remove(self.filepath) self.toc = [] # Add in our common CSS. This can be extended self.add_css(os.path.join(self.template_path, 'report.css')) self.main_el = ET.Element('main', attrib={'id': 'ReportInner'}) def write(self): css_template = "<style>\n{}\n</style>" html_inner = ET.tostring(self.main_el, method="html", encoding='unicode') styles = ''.join([css_template.format(css) for css in self.css_files]) toc = '' if len(self.toc) > 0: toc = ET.tostring(self._table_of_contents(), method="html", encoding='unicode') # Get my HTML templae and render it with open(os.path.join(self.template_path, 'template.html')) as t: template = Template(t.read()) now = datetime.datetime.now() final_render = HTMLBeautifier.beautify(template.render(report={ 'title': self.xml_project.XMLBuilder.find('Name').text, 'ProjectType': self.xml_project.XMLBuilder.find('ProjectType').text, 'MetaData': self.xml_project.get_metadata_dict(), 'date': now.strftime('%B %d, %Y - %I:%M%p'), 'Warehouse': self.xml_project.get_metadata_dict(tag='Warehouse'), 'head': styles, 'toc': toc, 'body': html_inner, 'footer': self.footer })) with open(self.filepath, "w", encoding="utf-8") as f: f.write(final_render) self.log.debug('Report Writing Completed') def add_css(self, filepath): with open(filepath) as css_file: css = css_file.read() beautiful = CSSBeautifier.beautify(css) self.css_files.append(beautiful) def section(self, sectionid, title, el_parent=None, level=1, attrib=None): if attrib is None: attrib = {} the_id = sectionid if sectionid is not None else str(uuid4()) if 'class' in attrib: attrib['class'] = 'report-section {}'.format(attrib['class']) else: attrib['class'] = 'report-section' section = ET.Element('section', attrib={'id': the_id, **attrib}) section_inner = ET.Element('div', attrib={'class': 'section-inner'}) hlevel = level + 1 if title: h_el = RSReport.header(hlevel, title, section) a_el = ET.Element('a', attrib={'class': 'nav-top', 'href': '#TOC'}) a_el.text = 'Top' h_el.append(a_el) section.append(section_inner) self.toc.append({ 'level': level, 'title': title, 'sectionid': the_id }) real_parent = self.main_el if el_parent is None else el_parent real_parent.append(section) return section_inner def _table_of_contents(self): """This calls the creation of the table of contents in general this should only be called automatically during the report write process Returns: [type]: [description] """ wrapper = ET.Element('nav', attrib={'id': 'TOC'}) RSReport.header(3, 'Table of Contents', wrapper) def get_ol(level): return ET.Element('ol', attrib={'class': 'level-{}'.format(level)}) parents = [get_ol(1)] wrapper.append(parents[-1]) for item in self.toc: # Nothing without a title gets put into the TOC if item['title'] is None: continue if item['level'] > len(parents): for _lidx in range(item['level'] - len(parents)): new_ul = get_ol(item['level']) parents[-1].append(new_ul) parents.append(new_ul) elif item['level'] < len(parents): for _lidx in range(len(parents) - item['level']): parents.pop() # Now create the actual LI li_el = ET.Element('li') anchor = ET.Element('a', attrib={'href': '#{}'.format(item['sectionid'])}) anchor.text = item['title'] li_el.append(anchor) parents[-1].append(li_el) return wrapper @staticmethod def html_head(report_title, el_parent): head = ET.Element('head') title = ET.Element('title') title.text = report_title head.append(title) el_parent.append(head) return head @staticmethod def create_table_from_sql(col_names, sql, database, el_parent, attrib=None, id_cols=None): if attrib is None: attrib = {} table = ET.Element('table', attrib=attrib) thead = ET.Element('thead') theadrow = ET.Element('tr') thead.append(theadrow) table.append(thead) for col in col_names: th = ET.Element('th') th.text = col theadrow.append(th) conn = sqlite3.connect(database) conn.row_factory = RSReport._dict_factory curs = conn.cursor() curs.execute(sql) tbody = ET.Element('tbody') table.append(tbody) for row in curs.fetchall(): tr = ET.Element('tr') tbody.append(tr) for col, val in row.items(): val, class_name = RSReport.format_value(val) if id_cols and col not in id_cols else [str(val), 'idVal'] td = ET.Element('td', attrib={'class': class_name}) td.text = val tr.append(td) el_parent.append(table) @staticmethod def create_table_from_tuple_list(col_names, data, el_parent, attrib=None): if attrib is None: attrib = {} table = ET.Element('table', attrib=attrib) thead = ET.Element('thead') theadrow = ET.Element('tr') thead.append(theadrow) table.append(thead) for col in col_names: th = ET.Element('th') th.text = col theadrow.append(th) tbody = ET.Element('tbody') table.append(tbody) for row in data: tr = ET.Element('tr') tbody.append(tr) for col in row: val, class_name = RSReport.format_value(col) td = ET.Element('td', attrib={'class': class_name}) td.text = val tr.append(td) el_parent.append(table) @staticmethod def create_table_from_dict(values, el_parent, attrib=None): """Keys go in first col, values in second Arguments: values {[type]} - - [description] database {[type]} - - [description] el_parent {[type]} - - [description] Returns: [type] - - [description] """ if attrib is None: attrib = {} if 'class' in attrib: attrib['class'] = 'dictable {}'.format(attrib['class']) else: attrib['class'] = 'dictable' table = ET.Element('table', attrib=attrib) tbody = ET.Element('tbody') table.append(tbody) for key, val in values.items(): tr = ET.Element('tr') tbody.append(tr) th = ET.Element('th') th.text = key tr.append(th) val, class_name = RSReport.format_value(val) td = ET.Element('td', attrib={'class': class_name}) td.text = val tr.append(td) el_parent.append(table) @staticmethod def format_value(value, val_type=None): """[summary] Args: value ([type]): [description] val_type ([type], optional): Type to try and force Returns: [type]: [description] """ formatted = '' class_name = '' try: if val_type == str or isinstance(value, str): formatted = value class_name = 'text' elif val_type == float or isinstance(value, float): formatted = '{0:,.2f}'.format(value) class_name = 'float num' elif val_type == int or isinstance(value, int): formatted = '{0:,d}'.format(value) class_name = 'int num' except Exception as e: print(e) return value, 'unknown' return formatted, class_name @staticmethod def create_ul(values, el_parent, attrib=None, ordered=False): if attrib is None: attrib = {} tagname = 'ul' if ordered is False else 'ol' outer = ET.Element(tagname, attrib=attrib) for key, val in values.items(): li = ET.Element('li') li.text = val outer.append(val) el_parent.append(outer) @staticmethod def _dict_factory(cursor, row): d = {} for idx, col in enumerate(cursor.description): d[col[0]] = row[idx] return d @staticmethod def header(level, text, el_parent): hEl = ET.Element('h{}'.format(level), attrib={'class': 'report-header', 'id': str(uuid4())}) hEl.text = text el_parent.append(hEl) return hEl def layerprint(self, lyr_el, parent_el, project_root, level: int = 2): """Work in progress for printing Riverscapes layers Args: lyr_el ([type]): [description] section ([type]): [description] project_root ([type]): [description] """ tag = lyr_el.tag name = lyr_el.find('Name').text # For geopackages layers = lyr_el.find('Layers') section = self.section(None, '{}: {}'.format(tag, name), parent_el, level=level, attrib={'class': 'rsc-layer'}) meta = self.xml_project.get_metadata_dict(node=lyr_el) if meta is not None: self.create_table_from_dict(meta, section, attrib={'class': 'fullwidth'}) path_el = ET.Element('pre', attrib={'class': 'path'}) pathstr = lyr_el.find('Path').text size = 0 fpath = os.path.join(project_root, pathstr) if os.path.isfile(fpath): size = os.path.getsize(fpath) if layers is not None: layers_container = ET.Element('div', attrib={'class': 'inner-layer-container'}) RSReport.header(level + 1, 'Layers', layers_container) for layer_el in list(layers): self.layerprint(layer_el, layers_container, os.path.join(project_root, pathstr), level=level + 1) section.append(layers_container) footer = ET.Element('div', attrib={'class': 'layer-footer'}) path_el.text = 'Project path: {} ({})'.format(pathstr, sizeof_fmt(size)) footer.append(path_el) section.append(footer)