def test_rasterise_from_shp(self): """ Read inland water data from shapefile """ self.logger_r.info( 'Rasteriser with Inland Water MasterMap data from shapefile...') output_file = 'test_water_output_raster.tif' output_path = '{}/{}'.format(Config.get('DATA_DIRECTORY'), output_file) if path.exists(output_path): remove(output_path) try: # Get MasterMap data, requesting classification codes 'General Surface', 'Natural Environment' gdf = GeoDataFrame.from_file( '{}/inland_water_e08000021.shp'.format( Config.get('DATA_DIRECTORY'))) # Call rasteriser self.logger_r.info('Calling rasteriser...') Rasteriser(gdf.to_json(), area_codes=['E08000021'], output_filename=output_file, area_threshold=50.0).create() self.logger_r.info('Written output to {}/{}'.format( Config.get('DATA_DIRECTORY'), output_file)) self.assertTrue(path.exists(output_path)) self.assertTrue(path.getsize(output_path) > 0) self.logger_r.info('Completed') except: self.logger_r.warning(traceback.format_exc()) self.fail('Failing test due to unexpected exception')
def setUp(self): logging.basicConfig(level=Config.get('LOG_LEVEL'), format=Config.get('LOG_FORMAT'), datefmt=Config.get('LOG_DATE_FORMAT'), filename=Config.get('LOG_FILE'), filemode='w') self.logger_r = logging.getLogger('TestRasteriser')
def setUp(self): logging.basicConfig( level=Config.get("LOG_LEVEL"), format=Config.get("LOG_FORMAT"), datefmt=Config.get("LOG_DATE_FORMAT"), filename=Config.get("LOG_FILE"), filemode="w", ) self.logger = logging.getLogger("TestPointSet")
def debug_dump_geojson_to_file(self, filename, json_data): """ Dump the given JSON data to a file for examination """ filepath = '{}/{}'.format(Config.get('DATA_DIRECTORY'), filename) if path.exists(filepath): remove(filepath) with open(filepath, 'w') as file_out: file_out.write(dumps(json_data))
def test_rasterise_from_nismod(self): """ Get MasterMap data from NISMOD API as input GeoJSON data """ self.logger_r.info('Rasteriser with API MasterMap data...') output_file = 'test_output_raster.tif' output_path = '{}/{}'.format(Config.get('DATA_DIRECTORY'), output_file) if path.exists(output_path): remove(output_path) try: # Get MasterMap data, requesting classification codes 'General Surface', 'Natural Environment' api_parms = { 'scale': 'lad', 'area_codes': ['E07000004', 'E07000008', 'E07000009', 'E07000011'], 'classification_codes': ['10056', '10111'], 'export_format': 'geojson' } api_url = '{}/mastermap/areas'.format( Config.get('NISMOD_DB_API_URL')) auth_username = Config.get('NISMOD_DB_USERNAME') auth_password = Config.get('NISMOD_DB_PASSWORD') self.logger_r.info('Calling API to extract input GeoJSON data...') r = requests.get(api_url, params=api_parms, auth=(auth_username, auth_password)) input_geojson = r.json() # Call rasteriser self.logger_r.info('Calling rasteriser...') Rasteriser(input_geojson, area_codes=[ 'E07000004', 'E07000008', 'E07000009', 'E07000011' ], output_filename=output_file).create() self.assertTrue(path.exists(output_path)) self.assertTrue(path.getsize(output_path) > 0) self.logger_r.info('Written output to {}/{}'.format( Config.get('DATA_DIRECTORY'), output_file)) self.logger_r.info('Completed') except: self.logger_r.warning(traceback.format_exc()) self.fail('Failing test due to unexpected exception')
def test_rasterise_from_shp_and_fishnet_file(self): """ Read inland water data from shapefile, using a fishnet generated previously """ self.logger_r.info( 'Rasteriser with Inland Water MasterMap data from shapefile, using pre-generated fishnet...' ) try: self.logger_r.info( 'Generate fishnet with GeoJSON string return...') fishnet_geojson = FishNet(outfile=None, outformat='GeoJSON', bbox=[414650, 563500, 429600, 575875]).create() self.assertFalse(fishnet_geojson is None) output_file = 'test_water_output_raster_ex_fishnet.tif' output_path = '{}/{}'.format(Config.get('DATA_DIRECTORY'), output_file) if path.exists(output_path): remove(output_path) # Get MasterMap data, requesting classification codes 'General Surface', 'Natural Environment' gdf = GeoDataFrame.from_file( '{}/inland_water_e08000021.shp'.format( Config.get('DATA_DIRECTORY'))) # Call rasteriser self.logger_r.info('Calling rasteriser...') Rasteriser(gdf.to_json(), fishnet=fishnet_geojson, output_filename=output_file, area_threshold=50.0).create() self.assertTrue(path.exists(output_path)) self.assertTrue(path.getsize(output_path) > 0) self.logger_r.info('Written output to {}/{}'.format( Config.get('DATA_DIRECTORY'), output_file)) self.logger_r.info('Completed') except: self.logger_r.warning(traceback.format_exc()) self.fail('Failing test due to unexpected exception')
def test_fishnet_area_codes(self): """ Tests fishnet generation with a list of area codes """ self.logger.info('Fishnet with list of area codes...') output_file = '{}.json'.format(uuid.uuid4().hex) output_path = '{}/{}'.format(Config.get('DATA_DIRECTORY'), output_file) FishNet(outfile=output_file, outformat='GeoJSON', lad=['E07000004']).create() self.assertTrue(path.exists(output_path)) self.assertTrue(path.getsize(output_path) > 0) remove(output_path) self.logger.info('Completed')
def test_fishnet_shapefile(self): """ Tests fishnet generation with a shapefile output """ self.logger.info('Fishnet with ESRI shapefile output...') output_file = '{}.shp'.format(uuid.uuid4().hex) output_path = '{}/{}'.format(Config.get('DATA_DIRECTORY'), output_file) FishNet(outfile=output_file, outformat='ESRI Shapefile', lad=['E07000004']).create() self.assertTrue(path.exists(output_path)) self.assertTrue(path.getsize(output_path) > 0) remove(output_path) self.logger.info('Completed')
def test_fishnet_bbox(self): """ Tests fishnet generation with a bounding box """ self.logger.info('Fishnet with bounding box...') output_file = '{}.json'.format(uuid.uuid4().hex) output_path = '{}/{}'.format(Config.get('DATA_DIRECTORY'), output_file) FishNet(outfile=output_file, outformat='GeoJSON', bbox=[414650, 563500, 429600, 575875]).create() self.assertTrue(path.exists(output_path)) self.assertTrue(path.getsize(output_path) > 0) remove(output_path) self.logger.info('Completed')
def test_pointset_extract(self): """ Generate a point set with points extracted from an existing dataset """ set_size = 500 out_file = "extracted_points.gpkg" out_path = self.absolute_file_output_path(out_file) out_gdf = ExtractGrid( set_size, ["E08000021"], "{}/newcastle_lamp_posts.gpkg".format( Config.get("DATA_DIRECTORY")), ).generate() self.assertTrue(len(out_gdf) == set_size) self.write_geopackage(out_gdf, out_file) self.assertTrue(path.exists(out_path) and path.getsize(out_path) > 0)
def create(self): """ | Generate the fishnet dataset, based on the code at | https://pcjericks.github.io/py-gdalogr-cookbook/vector_layers.html#create-fishnet-grid """ gdal.UseExceptions() try: # Get bounding values aoi = self.bbox if self.lad: # Get the bounds from a (list of) Local Authority District boundary(s)/all self.logger.info('Get boundary from list of LAD codes...') try: kvp = { 'lad_codes': ','.join(self.lad), 'export_format': 'geojson', 'year': 2016 } api = '{}/{}/{}'.format(Config.get('NISMOD_DB_API_URL'), 'boundaries', 'lads') auth_username = Config.get('NISMOD_DB_USERNAME') auth_password = Config.get('NISMOD_DB_PASSWORD') r = requests.get(api, params=kvp, auth=(auth_username, auth_password)) # Note: should be able to simply read r.json() into a GeoDataFrame, however it throws a ValueError # 'Mixing dicts with non-Series may lead to ambiguous ordering' which makes very little sense to me! # So we do it a roundabout way via the recipe at # https://gis.stackexchange.com/questions/225586/reading-raw-data-into-geopandas self.logger.info('NISMOD API call completed') gdf = geopandas.read_file(BytesIO(r.content)) aoi = gdf.total_bounds except ValueError: raise xmin, ymin, xmax, ymax = [float(value) for value in aoi] self.logger.info( 'Fishnet bounds : xmin {}, ymin {}, xmax {}, ymax {}'.format( xmin, ymin, xmax, ymax)) grid_width = grid_height = float(self.netsize) # Number of rows x columns rows = ceil((ymax - ymin) / grid_height) cols = ceil((xmax - xmin) / grid_width) self.logger.info('Fishnet has {} rows and {} columns'.format( rows, cols)) # Start grid cell envelope ring_x_left_origin = xmin ring_x_right_origin = xmin + grid_width ring_y_top_origin = ymax ring_y_bottom_origin = ymax - grid_height out_driver = ogr.GetDriverByName(self.outformat) output_file = self.outfile if output_file is None: # Stream the data to memory output_file = '/vsimem/{}.geojson'.format(uuid.uuid4().hex) else: # Create output file if not path.isabs(output_file): # Relative path => so prepend data directory (does NOT handle making subdirectories here) data_dir = Config.get('DATA_DIRECTORY') self.logger.info( 'Relative path supplied, assume relative to data directory {}' .format(data_dir)) output_file = path.join(data_dir, output_file) else: # Absolute path => ensure all directories are present before writing try: makedirs(path.dirname(output_file), exist_ok=True) except OSError: self.logger.warning( 'Failed to create subdirectory for output file') raise # Delete any pre-existing version of output file if path.exists(output_file): remove(output_file) out_data_source = out_driver.CreateDataSource(output_file) srs = osr.SpatialReference() srs.ImportFromEPSG(27700) out_layer = out_data_source.CreateLayer(output_file, srs=srs, geom_type=ogr.wkbPolygon) # Add a FID field id_field = ogr.FieldDefn('FID', ogr.OFTInteger) out_layer.CreateField(id_field) feature_defn = out_layer.GetLayerDefn() # Create grid cells fid = 1 countcols = 0 while countcols < cols: countcols += 1 #self.logger.info('Generating column {}...'.format(countcols)) # Reset envelope for rows ring_y_top = ring_y_top_origin ring_y_bottom = ring_y_bottom_origin countrows = 0 while countrows < rows: countrows += 1 #self.logger.info('Row {}'.format(countrows)) ring = ogr.Geometry(ogr.wkbLinearRing) ring.AddPoint(ring_x_left_origin, ring_y_top) ring.AddPoint(ring_x_right_origin, ring_y_top) ring.AddPoint(ring_x_right_origin, ring_y_bottom) ring.AddPoint(ring_x_left_origin, ring_y_bottom) ring.AddPoint(ring_x_left_origin, ring_y_top) poly = ogr.Geometry(ogr.wkbPolygon) poly.AddGeometry(ring) # Add new geom to layer out_feature = ogr.Feature(feature_defn) out_feature.SetGeometry(poly) out_feature.SetField('FID', fid) out_layer.CreateFeature(out_feature) out_feature = None fid += 1 # New envelope for next poly ring_y_top = ring_y_top - grid_height ring_y_bottom = ring_y_bottom - grid_height # New envelope for next poly ring_x_left_origin = ring_x_left_origin + grid_width ring_x_right_origin = ring_x_right_origin + grid_width # Save and close data sources out_data_source = None fishnet_output = None if self.outfile is None: # Read the memory buffer GeoJSON into Python dict structure memfile_json = self.read_file(output_file).decode('utf-8') fishnet_output = loads(memfile_json) else: fishnet_output = output_file self.logger.info('Finished writing fishnet output') return fishnet_output except: self.logger.warning(traceback.format_exc()) return None
def create(self): """ | Generate the output raster dataset """ gdal.UseExceptions() temp_shp = '{}/{}.shp'.format(Config.get('DATA_DIRECTORY'), uuid.uuid4().hex) try: # Read the supplied GeoJSON data into a DataFrame self.logger.info('Creating GeoDataFrame from input...') #self.logger.debug('GeoJSON follows:') #elf.logger.debug(self.geojson_data) #self.logger.debug('GeoJSON end') if isinstance(self.geojson_data, str): self.logger.info( 'Input GeoJSON is a string, not a dict => converting...') self.geojson_data = loads(self.geojson_data) #self.debug_dump_geojson_to_file('rasteriser_input_data_dump.json', self.geojson_data) input_data = GeoDataFrame.from_features(self.geojson_data) self.logger.debug(input_data.head(10)) self.logger.info('Done') # Create the fishnet if necessary if self.bounding_box is not None: # Use the supplied British National Grid bounding box self.logger.info( 'Generate fishnet GeoDataFrame from supplied bounding box...' ) fishnet_geojson = FishNet(bbox=self.bounding_box, netsize=self.resolution).create() elif self.fishnet is not None: # Use a supplied fishnet output self.logger.info( 'Generate fishnet GeoDataFrame from supplied GeoJSON...') if isinstance(self.fishnet, str): self.logger.info( 'Input fishnet GeoJSON is a string, not a dict => converting...' ) self.fishnet = loads(self.fishnet) fishnet_geojson = self.fishnet elif len(self.area_codes) > 0: # Use the LAD codes self.logger.info( 'Generate fishnet GeoDataFrame from supplied LAD codes...') fishnet_geojson = FishNet(lad=self.area_codes, netsize=self.resolution).create() else: raise ValueError( 'No boundary information supplied - please supply fishnet GeoJSON, bounding box, or list of LAD codes' ) #self.debug_dump_geojson_to_file('rasteriser_fishnet_data_dump.json', fishnet_geojson) fishnet = GeoDataFrame.from_features(fishnet_geojson) x_min, y_min, x_max, y_max = fishnet.total_bounds self.logger.debug(fishnet.head(10)) self.logger.info('Done') # Overlay intersection self.logger.info('Overlay data on fishnet using intersection...') intersection = overlay(fishnet, input_data, how='intersection') self.logger.info('Done') # Write area attribute into frame self.logger.info('Computing areas...') intersection['area'] = intersection.geometry.area self.logger.info('Done') # Create grid to rasterize via merge and assign an 'include' field based on the threshold self.logger.info('Doing merge...') self.logger.debug(intersection.head(10)) int_merge = fishnet.merge( intersection.groupby(['FID']).area.sum() / 100.0, on='FID') for i, row in int_merge.iterrows(): self.logger.debug('{} has area {}'.format(i, row['area'])) if row['area'] > self.area_threshold: int_merge.at[i, 'include_me'] = int( 0) if self.invert else int(1) else: int_merge.at[i, 'include_me'] = int( 1) if self.invert else int(0) self.logger.info('Done') self.logger.info('Compute bounds of dataset...') #x_min, y_min, x_max, y_max = int_merge.total_bounds xdim = int((x_max - x_min) / self.resolution) ydim = int((y_max - y_min) / self.resolution) self.logger.info( 'xmin = {}, ymin = {}, xmax = {}, ymax = {}'.format( x_min, y_min, x_max, y_max)) # Save as temporary shapefile (TO DO - understand what information is gained by doing this that is not present in GeoJSON) self.logger.info('Write out temporary shapefile...') int_merge.to_file(temp_shp) self.logger.info('Written to {}'.format(temp_shp)) # Open OGR dataset ogr_source = ogr.Open(temp_shp) output_file = '{}/{}'.format(Config.get('DATA_DIRECTORY'), self.output_filename) self.logger.info( 'Will write output raster to {}'.format(output_file)) # Create raster dataset and set projection driver = gdal.GetDriverByName('GTiff') rasterised = driver.Create(output_file, xdim, ydim, 1, gdal.GDT_Byte) rasterised.SetGeoTransform( (x_min, self.resolution, 0, y_max, 0, -self.resolution)) srs = osr.SpatialReference() srs.ImportFromEPSG(27700) rasterised.SetProjection(srs.ExportToWkt()) # Set nodata values band = rasterised.GetRasterBand(1) #band.SetNoDataValue(self.nodata) band.Fill(self.nodata) # Do rasterisation self.logger.info( 'Set transform and projection, about to rasterise layer...') gdal.RasterizeLayer(rasterised, [1], ogr_source.GetLayer(0), options=["ATTRIBUTE=include_me"]) self.logger.info('Done') rasterised.FlushCache() rasterised = None ogr_source = None except: self.logger.warning(traceback.format_exc()) finally: self.logger.info('Removing temporary files...') filestem = Path(temp_shp).stem for shpf in Path(Config.get('DATA_DIRECTORY')).glob( '{}.*'.format(filestem)): self.logger.info('Cleaning up {}'.format(shpf)) shpf.unlink()
def read_geopackage(self, gpkg_path): return gpd.read_file("{}/{}".format(Config.get("DATA_DIRECTORY"), gpkg_path))
def absolute_file_output_path(self, relpath): return "{}/unit_test_outputs/{}".format(Config.get("DATA_DIRECTORY"), relpath)