def test_raster_transform_clone(self): with mock.patch.object(GDALRaster, 'clone') as mocked_clone: # Create in file based raster. rstfile = tempfile.NamedTemporaryFile(suffix='.tif') source = GDALRaster({ 'datatype': 1, 'driver': 'tif', 'name': rstfile.name, 'width': 5, 'height': 5, 'nr_of_bands': 1, 'srid': 4326, 'origin': (-5, 5), 'scale': (2, -2), 'skew': (0, 0), 'bands': [{ 'data': range(25), 'nodata_value': 99, }], }) # transform() returns a clone because it is the same SRID and # driver. source.transform(4326) self.assertEqual(mocked_clone.call_count, 1)
def test_raster_transform_clone(self): with mock.patch.object(GDALRaster, "clone") as mocked_clone: # Create in file based raster. rstfile = tempfile.NamedTemporaryFile(suffix=".tif") source = GDALRaster({ "datatype": 1, "driver": "tif", "name": rstfile.name, "width": 5, "height": 5, "nr_of_bands": 1, "srid": 4326, "origin": (-5, 5), "scale": (2, -2), "skew": (0, 0), "bands": [{ "data": range(25), "nodata_value": 99, }], }) # transform() returns a clone because it is the same SRID and # driver. source.transform(4326) self.assertEqual(mocked_clone.call_count, 1)
def reprojected_by_gdal( src : str, dst : str, dst_crs : int = 4326 ) -> None: """ Reproject raster with gdal """ # open raster src_gdal = GDALRaster( src , write=True ) # make the reprojection src_gdal.transform( dst_crs, name=dst )
def test_raster_transform(self): tests = [ 3086, '3086', SpatialReference(3086), ] for srs in tests: with self.subTest(srs=srs): # Prepare tempfile and nodata value. rstfile = tempfile.NamedTemporaryFile(suffix='.tif') ndv = 99 # Create in file based raster. source = GDALRaster({ 'datatype': 1, 'driver': 'tif', 'name': rstfile.name, 'width': 5, 'height': 5, 'nr_of_bands': 1, 'srid': 4326, 'origin': (-5, 5), 'scale': (2, -2), 'skew': (0, 0), 'bands': [{ 'data': range(25), 'nodata_value': ndv, }], }) target = source.transform(srs) # Reload data from disk. target = GDALRaster(target.name) self.assertEqual(target.srs.srid, 3086) self.assertEqual(target.width, 7) self.assertEqual(target.height, 7) self.assertEqual(target.bands[0].datatype(), source.bands[0].datatype()) self.assertAlmostEqual(target.origin[0], 9124842.791079799, 3) self.assertAlmostEqual(target.origin[1], 1589911.6476407414, 3) self.assertAlmostEqual(target.scale[0], 223824.82664250192, 3) self.assertAlmostEqual(target.scale[1], -223824.82664250192, 3) self.assertEqual(target.skew, [0, 0]) result = target.bands[0].data() if numpy: result = result.flatten().tolist() # The reprojection of a raster that spans over a large area # skews the data matrix and might introduce nodata values. self.assertEqual( result, [ ndv, ndv, ndv, ndv, 4, ndv, ndv, ndv, ndv, 2, 3, 9, ndv, ndv, ndv, 1, 2, 8, 13, 19, ndv, 0, 6, 6, 12, 18, 18, 24, ndv, 10, 11, 16, 22, 23, ndv, ndv, ndv, 15, 21, 22, ndv, ndv, ndv, ndv, 20, ndv, ndv, ndv, ndv, ], )
def test_raster_transform_clone_name(self): # Create in file based raster. rstfile = tempfile.NamedTemporaryFile(suffix=".tif") source = GDALRaster({ "datatype": 1, "driver": "tif", "name": rstfile.name, "width": 5, "height": 5, "nr_of_bands": 1, "srid": 4326, "origin": (-5, 5), "scale": (2, -2), "skew": (0, 0), "bands": [{ "data": range(25), "nodata_value": 99, }], }) clone_name = rstfile.name + "_respect_name.GTiff" target = source.transform(4326, name=clone_name) self.assertEqual(target.name, clone_name)
def test_raster_transform_clone_name(self): # Create in file based raster. rstfile = tempfile.NamedTemporaryFile(suffix='.tif') source = GDALRaster({ 'datatype': 1, 'driver': 'tif', 'name': rstfile.name, 'width': 5, 'height': 5, 'nr_of_bands': 1, 'srid': 4326, 'origin': (-5, 5), 'scale': (2, -2), 'skew': (0, 0), 'bands': [{ 'data': range(25), 'nodata_value': 99, }], }) clone_name = rstfile.name + '_respect_name.GTiff' target = source.transform(4326, name=clone_name) self.assertEqual(target.name, clone_name)
def raster_handler(files, *args, **kwargs): """ Returns merged transformed raster file """ rasters_dir = os.path.join(settings.MEDIA_ROOT, 'rasters') if len(files) > 1: output_raster = os.path.join(rasters_dir, 'merged.tif') if os.path.isfile(output_raster): os.remove(output_raster) merge_command = ["python", "utils/gdal_merge.py", "-o", output_raster, "-separate"] rasters = [] for f in files: storage = FileSystemStorage() filename = storage.save('rasters/' + f.name, f) rasters.append(os.path.join(settings.MEDIA_ROOT, filename)) merge_command += rasters subprocess.call(merge_command) for f in rasters: os.remove(f) source = GDALRaster(output_raster, write=True) elif len(files) == 1: storage = FileSystemStorage() filename = storage.save('rasters/' + files[0].name, files[0]) source = GDALRaster(os.path.join(settings.MEDIA_ROOT, filename), write=True) return source.transform(3857)
def test_raster_transform(self): # Prepare tempfile and nodata value rstfile = tempfile.NamedTemporaryFile(suffix='.tif') ndv = 99 # Create in file based raster source = GDALRaster({ 'datatype': 1, 'driver': 'tif', 'name': rstfile.name, 'width': 5, 'height': 5, 'nr_of_bands': 1, 'srid': 4326, 'origin': (-5, 5), 'scale': (2, -2), 'skew': (0, 0), 'bands': [{ 'data': range(25), 'nodata_value': ndv, }], }) # Transform raster into srid 4326. target = source.transform(3086) # Reload data from disk target = GDALRaster(target.name) self.assertEqual(target.srs.srid, 3086) self.assertEqual(target.width, 7) self.assertEqual(target.height, 7) self.assertEqual(target.bands[0].datatype(), source.bands[0].datatype()) self.assertAlmostEqual(target.origin[0], 9124842.791079799) self.assertAlmostEqual(target.origin[1], 1589911.6476407414) self.assertAlmostEqual(target.scale[0], 223824.82664250192) self.assertAlmostEqual(target.scale[1], -223824.82664250192) self.assertEqual(target.skew, [0, 0]) result = target.bands[0].data() if numpy: result = result.flatten().tolist() # The reprojection of a raster that spans over a large area # skews the data matrix and might introduce nodata values. self.assertEqual( result, [ ndv, ndv, ndv, ndv, 4, ndv, ndv, ndv, ndv, 2, 3, 9, ndv, ndv, ndv, 1, 2, 8, 13, 19, ndv, 0, 6, 6, 12, 18, 18, 24, ndv, 10, 11, 16, 22, 23, ndv, ndv, ndv, 15, 21, 22, ndv, ndv, ndv, ndv, 20, ndv, ndv, ndv, ndv, ] )
def test_raster_transform(self): if GDAL_VERSION < (1, 8, 1): self.skipTest("GDAL >= 1.8.1 is required for this test") # Prepare tempfile and nodata value rstfile = tempfile.NamedTemporaryFile(suffix='.tif') ndv = 99 # Create in file based raster source = GDALRaster({ 'datatype': 1, 'driver': 'tif', 'name': rstfile.name, 'width': 5, 'height': 5, 'nr_of_bands': 1, 'srid': 4326, 'origin': (-5, 5), 'scale': (2, -2), 'skew': (0, 0), 'bands': [{ 'data': range(25), 'nodata_value': ndv, }], }) # Transform raster into srid 4326. target = source.transform(3086) # Reload data from disk target = GDALRaster(target.name) self.assertEqual(target.srs.srid, 3086) self.assertEqual(target.width, 7) self.assertEqual(target.height, 7) self.assertEqual(target.bands[0].datatype(), source.bands[0].datatype()) self.assertEqual(target.origin, [9124842.791079799, 1589911.6476407414]) self.assertEqual(target.scale, [223824.82664250192, -223824.82664250192]) self.assertEqual(target.skew, [0, 0]) result = target.bands[0].data() if numpy: result = result.flatten().tolist() # The reprojection of a raster that spans over a large area # skews the data matrix and might introduce nodata values. self.assertEqual( result, [ ndv, ndv, ndv, ndv, 4, ndv, ndv, ndv, ndv, 2, 3, 9, ndv, ndv, ndv, 1, 2, 8, 13, 19, ndv, 0, 6, 6, 12, 18, 18, 24, ndv, 10, 11, 16, 22, 23, ndv, ndv, ndv, 15, 21, 22, ndv, ndv, ndv, ndv, 20, ndv, ndv, ndv, ndv, ] )
def test_db(self): # Make sure we can use PostGIS raster type raster = GDALRaster(os.path.realpath(os.path.join("app", "fixtures", "orthophoto.tif")), write=True) self.assertTrue(raster.srid == 32615) with transaction.atomic(): # We cannot store offdb references with SRID different than the one declared (4326) self.assertRaises(InternalError, Task.objects.create, project=Project.objects.latest("created_at"), orthophoto=raster) # All OK when we transform to 4326 task = Task.objects.create(project=Project.objects.latest("created_at"), orthophoto=raster.transform(4326)) task.refresh_from_db() self.assertTrue(task.orthophoto.srid == 4326) self.assertTrue(task.orthophoto.width == 252) # not original size, warp happened
def test_db(self): # Make sure we can use PostGIS raster type raster = GDALRaster(os.path.realpath( os.path.join("app", "fixtures", "orthophoto.tif")), write=True) self.assertTrue(raster.srid == 32615) with transaction.atomic(): # We cannot store offdb references with SRID different than the one declared (4326) self.assertRaises(InternalError, Task.objects.create, project=Project.objects.latest("created_at"), orthophoto=raster) # All OK when we transform to 4326 task = Task.objects.create( project=Project.objects.latest("created_at"), orthophoto=raster.transform(4326)) task.refresh_from_db() self.assertTrue(task.orthophoto.srid == 4326) self.assertTrue( task.orthophoto.width == 252) # not original size, warp happened
class RasterLayerParser(object): """ Class to parse raster layers. """ def __init__(self, rasterlayer_id): self.rasterlayer = RasterLayer.objects.get(id=rasterlayer_id) # Set raster tilesize self.tilesize = int( getattr(settings, 'RASTER_TILESIZE', WEB_MERCATOR_TILESIZE)) self.batch_step_size = int( getattr(settings, 'RASTER_BATCH_STEP_SIZE', BATCH_STEP_SIZE)) self.s3_endpoint_url = getattr(settings, 'RASTER_S3_ENDPOINT_URL', None) def log(self, msg, status=None, zoom=None): """ Write a message to the parse log of the rasterlayer instance and update the parse status object. """ parsestatus = self.rasterlayer.parsestatus parsestatus.refresh_from_db() if status is not None: parsestatus.status = status if zoom is not None and zoom not in parsestatus.tile_levels: parsestatus.tile_levels.append(zoom) parsestatus.tile_levels.sort() # Prepare datetime stamp for log now = '[{0}] '.format( datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) if parsestatus.log: now = '\n' + now parsestatus.log += now + msg parsestatus.save() def open_raster_file(self): """ Get raster source file to extract tiles from. This makes a local copy of rasterfile, unzips the raster and reprojects it into web mercator if necessary. The reprojected raster is stored for reuse such that reprojection does only happen once. The local copy of the raster is needed if files are stored on remote storages. """ reproj, created = RasterLayerReprojected.objects.get_or_create( rasterlayer=self.rasterlayer) # Check if the raster has already been reprojected has_reprojected = reproj.rasterfile.name not in (None, '') # Create workdir raster_workdir = getattr(settings, 'RASTER_WORKDIR', None) self.tmpdir = tempfile.mkdtemp(dir=raster_workdir) # Choose source for raster data, use the reprojected version if it exists. if self.rasterlayer.source_url and not has_reprojected: url = self.rasterlayer.source_url if url.lower().startswith('http') or url.startswith('file'): url_path = urlparse(self.rasterlayer.source_url).path filename = url_path.split('/')[-1] filepath = os.path.join(self.tmpdir, filename) urlretrieve(self.rasterlayer.source_url, filepath) elif url.startswith('s3'): # Get the bucket name and file key, assuming the following url # strucure: s3://BUCKET_NAME/BUCKET_KEY bucket_name = url.split('s3://')[1].split('/')[0] bucket_key = '/'.join(url.split('s3://')[1].split('/')[1:]) # Assume the file name is the last piece of the key. filename = bucket_key.split('/')[-1] filepath = os.path.join(self.tmpdir, filename) # Get file from s3. s3 = boto3.resource('s3', endpoint_url=self.s3_endpoint_url) bucket = s3.Bucket(bucket_name) bucket.download_file(bucket_key, filepath, ExtraArgs={'RequestPayer': 'requester'}) else: raise RasterException( 'Only http(s) and s3 urls are supported.') else: if has_reprojected: rasterfile_source = reproj.rasterfile else: rasterfile_source = self.rasterlayer.rasterfile if not rasterfile_source.name: raise RasterException( 'No data source found. Provide a rasterfile or a source url.' ) # Copy raster file source to local folder filepath = os.path.join(self.tmpdir, os.path.basename(rasterfile_source.name)) rasterfile = open(filepath, 'wb') for chunk in rasterfile_source.chunks(): rasterfile.write(chunk) rasterfile.close() # If the raster file is compressed, decompress it, otherwise try to # open the source file directly. if os.path.splitext(filepath)[1].lower() == '.zip': # Open and extract zipfile zf = zipfile.ZipFile(filepath) zf.extractall(self.tmpdir) # Remove zipfile os.remove(filepath) # Get filelist from directory matches = [] for root, dirnames, filenames in os.walk(self.tmpdir): for filename in fnmatch.filter(filenames, '*.*'): matches.append(os.path.join(root, filename)) # Open the first raster file found in the matched files. self.dataset = None for match in matches: try: self.dataset = GDALRaster(match) break except GDALException: pass # Raise exception if no file could be opened by gdal. if not self.dataset: raise RasterException('Could not open rasterfile.') else: self.dataset = GDALRaster(filepath) # Override srid if provided if self.rasterlayer.srid: try: self.dataset = GDALRaster(self.dataset.name, write=True) except GDALException: raise RasterException( 'Could not override srid because the driver for this ' 'type of raster does not support write mode.') self.dataset.srs = self.rasterlayer.srid def reproject_rasterfile(self): """ Reproject the rasterfile into web mercator. """ # Return if reprojected rasterfile already exists. if hasattr(self.rasterlayer, 'reprojected' ) and self.rasterlayer.reprojected.rasterfile.name: return # Return if the raster already has the right projection # and nodata value is acceptable. if self.dataset.srs.srid == WEB_MERCATOR_SRID: # SRID was not manually specified. if self.rasterlayer.nodata in ('', None): return # All bands from dataset already have the same nodata value as the # one that was manually specified. if all([ self.rasterlayer.nodata == band.nodata_value for band in self.dataset.bands ]): return else: # Log projection change if original raster is not in web mercator. self.log( 'Transforming raster to SRID {0}'.format(WEB_MERCATOR_SRID), status=self.rasterlayer.parsestatus.REPROJECTING_RASTER, ) # Reproject the dataset. self.dataset = self.dataset.transform( WEB_MERCATOR_SRID, driver=INTERMEDIATE_RASTER_FORMAT, ) # Manually override nodata value if neccessary if self.rasterlayer.nodata not in ('', None): self.log( 'Setting no data values to {0}.'.format( self.rasterlayer.nodata), status=self.rasterlayer.parsestatus.REPROJECTING_RASTER, ) for band in self.dataset.bands: band.nodata_value = float(self.rasterlayer.nodata) # Compress reprojected raster file and store it if self.rasterlayer.store_reprojected: dest = tempfile.NamedTemporaryFile(dir=self.tmpdir, suffix='.zip') dest_zip = zipfile.ZipFile(dest.name, 'w', allowZip64=True) dest_zip.write( filename=self.dataset.name, arcname=os.path.basename(self.dataset.name), compress_type=zipfile.ZIP_DEFLATED, ) dest_zip.close() # Store zip file in reprojected raster model self.rasterlayer.reprojected.rasterfile = File( open(dest_zip.filename, 'rb'), name=os.path.basename(dest_zip.filename)) self.rasterlayer.reprojected.save() self.log('Finished transforming raster.') def create_initial_histogram_buckets(self): """ Gets the empty histogram arrays for statistics collection. """ self.hist_values = [] self.hist_bins = [] for i, band in enumerate(self.dataset.bands): bandmeta = RasterLayerBandMetadata.objects.filter( rasterlayer=self.rasterlayer, band=i).first() self.hist_values.append(numpy.array(bandmeta.hist_values)) self.hist_bins.append(numpy.array(bandmeta.hist_bins)) def extract_metadata(self): """ Extract and store metadata for the raster and its bands. """ self.log('Extracting metadata from raster.') # Try to compute max zoom try: max_zoom = self.compute_max_zoom() except GDALException: raise RasterException( 'Failed to compute max zoom. Check the SRID of the raster.') # Extract global raster metadata meta = self.rasterlayer.metadata meta.uperleftx = self.dataset.origin.x meta.uperlefty = self.dataset.origin.y meta.width = self.dataset.width meta.height = self.dataset.height meta.scalex = self.dataset.scale.x meta.scaley = self.dataset.scale.y meta.skewx = self.dataset.skew.x meta.skewy = self.dataset.skew.y meta.numbands = len(self.dataset.bands) meta.srs_wkt = self.dataset.srs.wkt meta.srid = self.dataset.srs.srid meta.max_zoom = max_zoom meta.save() # Extract band metadata for i, band in enumerate(self.dataset.bands): bandmeta = RasterLayerBandMetadata.objects.filter( rasterlayer=self.rasterlayer, band=i).first() if not bandmeta: bandmeta = RasterLayerBandMetadata( rasterlayer=self.rasterlayer, band=i) bandmeta.nodata_value = band.nodata_value bandmeta.min = band.min bandmeta.max = band.max # Depending on Django version, the band statistics include std and mean. if hasattr(band, 'std'): bandmeta.std = band.std if hasattr(band, 'mean'): bandmeta.mean = band.mean bandmeta.save() self.log('Finished extracting metadata from raster.') def create_tiles(self, zoom_levels): """ Create tiles for input zoom levels, either a list or an integer. """ if isinstance(zoom_levels, int): self.populate_tile_level(zoom_levels) else: for zoom in zoom_levels: self.populate_tile_level(zoom) def populate_tile_level(self, zoom): """ Create tiles for this raster at the given zoomlevel. This routine first snaps the raster to the grid of the zoomlevel, then creates the tiles from the snapped raster. """ # Abort if zoom level is above resolution of the raster layer if zoom > self.max_zoom: return elif zoom == self.max_zoom: self.create_initial_histogram_buckets() # Compute the tile x-y-z index range for the rasterlayer for this zoomlevel bbox = self.dataset.extent quadrants = utils.quadrants(bbox, zoom) self.log('Creating {0} tiles in {1} quadrants at zoom {2}.'.format( self.nr_of_tiles(zoom), len(quadrants), zoom)) # Process quadrants in parallell for indexrange in quadrants: self.process_quadrant(indexrange, zoom) # Store histogram data if zoom == self.max_zoom: bandmetas = RasterLayerBandMetadata.objects.filter( rasterlayer=self.rasterlayer) for bandmeta in bandmetas: bandmeta.hist_values = self.hist_values[bandmeta.band].tolist() bandmeta.save() self.log('Finished parsing at zoom level {0}.'.format(zoom), zoom=zoom) _quadrant_count = 0 def process_quadrant(self, indexrange, zoom): """ Create raster tiles for a quadrant of tiles defined by a x-y-z index range and a zoom level. """ # TODO Use a standalone celery task for this method in order to # gain speedup from parallelism. self._quadrant_count += 1 self.log( 'Starting tile creation for quadrant {0} at zoom level {1}'.format( self._quadrant_count, zoom), status=self.rasterlayer.parsestatus.CREATING_TILES) # Compute scale of tiles for this zoomlevel tilescale = utils.tile_scale(zoom) # Compute quadrant bounds and create destination file bounds = utils.tile_bounds(indexrange[0], indexrange[1], zoom) dest_file = tempfile.NamedTemporaryFile(dir=self.tmpdir, suffix='.tif') # Snap dataset to the quadrant snapped_dataset = self.dataset.warp({ 'name': dest_file.name, 'origin': [bounds[0], bounds[3]], 'scale': [tilescale, -tilescale], 'width': (indexrange[2] - indexrange[0] + 1) * self.tilesize, 'height': (indexrange[3] - indexrange[1] + 1) * self.tilesize, }) # Create all tiles in this quadrant in batches batch = [] for tilex in range(indexrange[0], indexrange[2] + 1): for tiley in range(indexrange[1], indexrange[3] + 1): # Calculate raster tile origin bounds = utils.tile_bounds(tilex, tiley, zoom) # Construct band data arrays pixeloffset = ((tilex - indexrange[0]) * self.tilesize, (tiley - indexrange[1]) * self.tilesize) band_data = [{ 'data': band.data(offset=pixeloffset, size=(self.tilesize, self.tilesize)), 'nodata_value': band.nodata_value } for band in snapped_dataset.bands] # Ignore tile if its only nodata. if all([ numpy.all(dat['data'] == dat['nodata_value']) for dat in band_data ]): continue # Add tile data to histogram if zoom == self.max_zoom: self.push_histogram(band_data) # Warp source raster into this tile (in memory) dest = GDALRaster({ 'width': self.tilesize, 'height': self.tilesize, 'origin': [bounds[0], bounds[3]], 'scale': [tilescale, -tilescale], 'srid': WEB_MERCATOR_SRID, 'datatype': snapped_dataset.bands[0].datatype(), 'bands': band_data, }) # Store tile in batch array batch.append( RasterTile(rast=dest, rasterlayer_id=self.rasterlayer.id, tilex=tilex, tiley=tiley, tilez=zoom)) # Commit batch to database and reset it if len(batch) == self.batch_step_size: RasterTile.objects.bulk_create(batch) batch = [] # Commit remaining objects if len(batch): RasterTile.objects.bulk_create(batch) def push_histogram(self, data): """ Add data to band level histogram. """ # Loop through bands of this tile for i, dat in enumerate(data): # Create histogram for new data with the same bins new_hist = numpy.histogram(dat['data'], bins=self.hist_bins[i]) # Add counts of this tile to band metadata histogram self.hist_values[i] += new_hist[0] def drop_all_tiles(self): """ Delete all existing tiles for this parser's rasterlayer. """ self.log('Clearing all existing tiles.') self.rasterlayer.rastertile_set.all().delete() self.log('Finished clearing existing tiles.') def send_success_signal(self): """ Send parser end signal for other dependencies to be handling new tiles. """ self.log('Successfully finished parsing raster', status=self.rasterlayer.parsestatus.FINISHED) rasterlayers_parser_ended.send(sender=self.rasterlayer.__class__, instance=self.rasterlayer) def compute_max_zoom(self): """ Set max zoom property based on rasterlayer metadata. """ # Return manual override value if provided if self.rasterlayer.max_zoom is not None: return self.rasterlayer.max_zoom if self.dataset.srs.srid == WEB_MERCATOR_SRID: # For rasters in web mercator, use the scale directly scale = abs(self.dataset.scale.x) else: # Create a line from the center of the raster to a point that is # one pixel width away from the center. xcenter = self.dataset.extent[0] + (self.dataset.extent[2] - self.dataset.extent[0]) / 2 ycenter = self.dataset.extent[1] + (self.dataset.extent[3] - self.dataset.extent[1]) / 2 linestring = 'LINESTRING({} {}, {} {})'.format( xcenter, ycenter, xcenter + self.dataset.scale.x, ycenter) line = OGRGeometry(linestring, srs=self.dataset.srs) # Tansform the line into web mercator. line.transform(WEB_MERCATOR_SRID) # Use the lenght of the transformed line as scale. scale = line.geos.length return utils.closest_zoomlevel(scale) @property def max_zoom(self): # Return manual override value if provided if self.rasterlayer.max_zoom is not None: return self.rasterlayer.max_zoom # Get max zoom from metadata if not hasattr(self.rasterlayer, 'metadata'): raise RasterException('Could not determine max zoom level.') max_zoom = self.rasterlayer.metadata.max_zoom # Reduce max zoom by one if zoomdown flag was disabled if not self.rasterlayer.next_higher: max_zoom -= 1 return max_zoom def nr_of_tiles(self, zoom): """ Compute the number of tiles for the rasterlayer on a given zoom level. """ bbox = self.dataset.extent indexrange = utils.tile_index_range(bbox, zoom) return (indexrange[2] - indexrange[0] + 1) * (indexrange[3] - indexrange[1] + 1)
def process(self): """ This method contains the logic for processing tasks asynchronously from a background thread or from the scheduler. Here tasks that are ready to be processed execute some logic. This could be communication with a processing node or executing a pending action. """ try: if self.auto_processing_node and not self.status in [ status_codes.FAILED, status_codes.CANCELED ]: # No processing node assigned and need to auto assign if self.processing_node is None: # Assign first online node with lowest queue count self.processing_node = ProcessingNode.find_best_available_node( ) if self.processing_node: self.processing_node.queue_count += 1 # Doesn't have to be accurate, it will get overriden later self.processing_node.save() logger.info( "Automatically assigned processing node {} to {}". format(self.processing_node, self)) self.save() # Processing node assigned, but is offline and no errors if self.processing_node and not self.processing_node.is_online( ): # Detach processing node, will be processed at the next tick logger.info( "Processing node {} went offline, reassigning {}...". format(self.processing_node, self)) self.uuid = '' self.processing_node = None self.save() if self.processing_node: # Need to process some images (UUID not yet set and task doesn't have pending actions)? if not self.uuid and self.pending_action is None and self.status is None: logger.info("Processing... {}".format(self)) images = [ image.path() for image in self.imageupload_set.all() ] # This takes a while uuid = self.processing_node.process_new_task( images, self.name, self.options) # Refresh task object before committing change self.refresh_from_db() self.uuid = uuid self.save() # TODO: log process has started processing if self.pending_action is not None: if self.pending_action == pending_actions.CANCEL: # Do we need to cancel the task on the processing node? logger.info("Canceling {}".format(self)) if self.processing_node and self.uuid: # Attempt to cancel the task on the processing node # We don't care if this fails (we tried) try: self.processing_node.cancel_task(self.uuid) self.status = None except ProcessingException: logger.warning( "Could not cancel {} on processing node. We'll proceed anyway..." .format(self)) self.status = status_codes.CANCELED self.pending_action = None self.save() else: raise ProcessingError( "Cannot cancel a task that has no processing node or UUID" ) elif self.pending_action == pending_actions.RESTART: logger.info("Restarting {}".format(self)) if self.processing_node: # Check if the UUID is still valid, as processing nodes purge # results after a set amount of time, the UUID might have eliminated. uuid_still_exists = False if self.uuid: try: info = self.processing_node.get_task_info( self.uuid) uuid_still_exists = info['uuid'] == self.uuid except ProcessingException: pass if uuid_still_exists: # Good to go try: self.processing_node.restart_task(self.uuid) except ProcessingError as e: # Something went wrong logger.warning( "Could not restart {}, will start a new one" .format(self)) self.uuid = '' else: # Task has been purged (or processing node is offline) # Process this as a new task # Removing its UUID will cause the scheduler # to process this the next tick self.uuid = '' self.console_output = "" self.processing_time = -1 self.status = None self.last_error = None self.pending_action = None self.save() else: raise ProcessingError( "Cannot restart a task that has no processing node" ) elif self.pending_action == pending_actions.REMOVE: logger.info("Removing {}".format(self)) if self.processing_node and self.uuid: # Attempt to delete the resources on the processing node # We don't care if this fails, as resources on processing nodes # Are expected to be purged on their own after a set amount of time anyway try: self.processing_node.remove_task(self.uuid) except ProcessingException: pass # What's more important is that we delete our task properly here self.delete() # Stop right here! return if self.processing_node: # Need to update status (first time, queued or running?) if self.uuid and self.status in [ None, status_codes.QUEUED, status_codes.RUNNING ]: # Update task info from processing node info = self.processing_node.get_task_info(self.uuid) self.processing_time = info["processingTime"] self.status = info["status"]["code"] current_lines_count = len( self.console_output.split("\n")) - 1 self.console_output += self.processing_node.get_task_console_output( self.uuid, current_lines_count) if "errorMessage" in info["status"]: self.last_error = info["status"]["errorMessage"] # Has the task just been canceled, failed, or completed? if self.status in [ status_codes.FAILED, status_codes.COMPLETED, status_codes.CANCELED ]: logger.info("Processing status: {} for {}".format( self.status, self)) if self.status == status_codes.COMPLETED: assets_dir = self.assets_path("") if not os.path.exists(assets_dir): os.makedirs(assets_dir) logger.info( "Downloading all.zip for {}".format(self)) # Download all assets zip_stream = self.processing_node.download_task_asset( self.uuid, "all.zip") zip_path = os.path.join(assets_dir, "all.zip") with open(zip_path, 'wb') as fd: for chunk in zip_stream.iter_content(4096): fd.write(chunk) logger.info( "Done downloading all.zip for {}".format(self)) # Extract from zip with zipfile.ZipFile(zip_path, "r") as zip_h: zip_h.extractall(assets_dir) logger.info( "Extracted all.zip for {}".format(self)) # Add to database orthophoto orthophoto_path = os.path.realpath( self.assets_path("odm_orthophoto", "odm_orthophoto.tif")) if os.path.exists(orthophoto_path): orthophoto = GDALRaster(orthophoto_path, write=True) # We need to transform to 4326 before we can store it # as an offdb raster field orthophoto_4326_path = os.path.realpath( self.assets_path( "odm_orthophoto", "odm_orthophoto_4326.tif")) self.orthophoto = orthophoto.transform( 4326, 'GTiff', orthophoto_4326_path) logger.info( "Imported orthophoto {} for {}".format( orthophoto_4326_path, self)) # Remove old odm_texturing.zip archive (if any) textured_model_archive = self.assets_path( self.get_textured_model_filename()) if os.path.exists(textured_model_archive): os.remove(textured_model_archive) self.save() else: # FAILED, CANCELED self.save() else: # Still waiting... self.save() except ProcessingError as e: self.set_failure(str(e)) except (ConnectionRefusedError, ConnectionError) as e: logger.warning( "{} cannot communicate with processing node: {}".format( self, str(e))) except ProcessingTimeout as e: logger.warning( "{} timed out with error: {}. We'll try reprocessing at the next tick." .format(self, str(e)))
def handle(self, *args, **options): verbose = options['verbosity'] != 0 try: cmd = 'raster2pgsql -G > /dev/null' kwargs_raster = {'shell': True} ret = self.call_command_system(cmd, **kwargs_raster) if ret != 0: raise Exception('raster2pgsql failed with exit code %d' % ret) except Exception as e: msg = 'Caught %s: %s' % ( e.__class__.__name__, e, ) raise CommandError(msg) if verbose: self.stdout.write('-- Checking input DEM ------------------\n') # Obtain DEM path dem_path = options['dem_path'] # Open GDAL dataset if not os.path.exists(dem_path): raise CommandError('DEM file does not exists at: %s' % dem_path) try: rst = GDALRaster(dem_path, write=False) except GDALException: raise CommandError('DEM format is not recognized by GDAL.') # GDAL dataset check 1: ensure dataset has a known SRS if not rst.srs: raise CommandError('DEM coordinate system is unknown.') # Obtain dataset SRS if settings.SRID != rst.srs.srid: rst = rst.transform(settings.SRID) cur = connection.cursor() sql = 'SELECT * FROM raster_columns WHERE r_table_name = \'mnt\'' cur.execute(sql) dem_exists = cur.rowcount != 0 cur.close() # Obtain replace mode replace = options['replace'] # What to do with existing DEM (if any) if dem_exists and replace: # Drop table cur = connection.cursor() sql = 'DROP TABLE mnt' cur.execute(sql) cur.close() elif dem_exists and not replace: raise CommandError('DEM file exists, use --replace to overwrite') if verbose: self.stdout.write( 'Everything looks fine, we can start loading DEM\n') output = tempfile.NamedTemporaryFile() # SQL code for raster creation cmd = 'raster2pgsql -c -C -I -M -t 100x100 %s mnt %s' % ( rst.name, '' if verbose else '2>/dev/null') try: if verbose: self.stdout.write( '\n-- Relaying to raster2pgsql ------------\n') self.stdout.write(cmd) kwargs_raster2 = { 'shell': True, 'stdout': output.file, 'stderr': PIPE } ret = self.call_command_system(cmd, **kwargs_raster2) if ret != 0: raise Exception('raster2pgsql failed with exit code %d' % ret) except Exception as e: output.close() msg = 'Caught %s: %s' % ( e.__class__.__name__, e, ) raise CommandError(msg) if verbose: self.stdout.write('DEM successfully converted to SQL.\n') # Step 3: Dump SQL code into database if verbose: self.stdout.write('\n-- Loading DEM into database -----------\n') cur = connection.cursor() output.file.seek(0) for sql_line in output.file: cur.execute(sql_line) cur.close() output.close() if verbose: self.stdout.write('DEM successfully loaded.\n') return
class RasterLayerParser(object): """ Class to parse raster layers. """ def __init__(self, rasterlayer_id): self.rasterlayer = RasterLayer.objects.get(id=rasterlayer_id) # Set raster tilesize self.tilesize = int(getattr(settings, 'RASTER_TILESIZE', WEB_MERCATOR_TILESIZE)) def log(self, msg, status=None, zoom=None): """ Write a message to the parse log of the rasterlayer instance and update the parse status object. """ parsestatus = self.rasterlayer.parsestatus parsestatus.refresh_from_db() if status is not None: parsestatus.status = status if zoom is not None and zoom not in parsestatus.tile_levels: parsestatus.tile_levels.append(zoom) parsestatus.tile_levels.sort() # Prepare datetime stamp for log now = '[{0}] '.format(datetime.datetime.now().strftime('%Y-%m-%d %T')) if parsestatus.log: now = '\n' + now parsestatus.log += now + msg parsestatus.save() def open_raster_file(self): """ Get raster source file to extract tiles from. This makes a local copy of rasterfile, unzips the raster and reprojects it into web mercator if necessary. The reprojected raster is stored for reuse such that reprojection does only happen once. The local copy of the raster is needed if files are stored on remote storages. """ reproj, created = RasterLayerReprojected.objects.get_or_create(rasterlayer=self.rasterlayer) # Check if the raster has already been reprojected has_reprojected = reproj.rasterfile.name not in (None, '') # Create workdir raster_workdir = getattr(settings, 'RASTER_WORKDIR', None) self.tmpdir = tempfile.mkdtemp(dir=raster_workdir) # Choose source for raster data, use the reprojected version if it exists. if self.rasterlayer.source_url and not has_reprojected: url_path = urlparse(self.rasterlayer.source_url).path filename = url_path.split('/')[-1] filepath = os.path.join(self.tmpdir, filename) urlretrieve(self.rasterlayer.source_url, filepath) else: if has_reprojected: rasterfile_source = reproj.rasterfile else: rasterfile_source = self.rasterlayer.rasterfile if not rasterfile_source.name: raise RasterException('No data source found. Provide a rasterfile or a source url.') # Copy raster file source to local folder filepath = os.path.join(self.tmpdir, os.path.basename(rasterfile_source.name)) rasterfile = open(filepath, 'wb') for chunk in rasterfile_source.chunks(): rasterfile.write(chunk) rasterfile.close() # If the raster file is compressed, decompress it, otherwise try to # open the source file directly. if os.path.splitext(filepath)[1].lower() == '.zip': # Open and extract zipfile zf = zipfile.ZipFile(filepath) zf.extractall(self.tmpdir) # Remove zipfile os.remove(filepath) # Get filelist from directory matches = [] for root, dirnames, filenames in os.walk(self.tmpdir): for filename in fnmatch.filter(filenames, '*.*'): matches.append(os.path.join(root, filename)) # Open the first raster file found in the matched files. self.dataset = None for match in matches: try: self.dataset = GDALRaster(match) break except GDALException: pass # Raise exception if no file could be opened by gdal. if not self.dataset: raise RasterException('Could not open rasterfile.') else: self.dataset = GDALRaster(filepath) # Override srid if provided if self.rasterlayer.srid: try: self.dataset = GDALRaster(self.dataset.name, write=True) except GDALException: raise RasterException( 'Could not override srid because the driver for this ' 'type of raster does not support write mode.' ) self.dataset.srs = self.rasterlayer.srid def reproject_rasterfile(self): """ Reproject the rasterfile into web mercator. """ # Return if reprojected rasterfile already exists. if hasattr(self.rasterlayer, 'reprojected') and self.rasterlayer.reprojected.rasterfile.name: return # Return if the raster already has the right projection # and nodata value is acceptable. if self.dataset.srs.srid == WEB_MERCATOR_SRID: # SRID was not manually specified. if self.rasterlayer.nodata in ('', None): return # All bands from dataset already have the same nodata value as the # one that was manually specified. if all([self.rasterlayer.nodata == band.nodata_value for band in self.dataset.bands]): return else: # Log projection change if original raster is not in web mercator. self.log( 'Transforming raster to SRID {0}'.format(WEB_MERCATOR_SRID), status=self.rasterlayer.parsestatus.REPROJECTING_RASTER, ) # Reproject the dataset. self.dataset = self.dataset.transform( WEB_MERCATOR_SRID, driver=INTERMEDIATE_RASTER_FORMAT, ) # Manually override nodata value if neccessary if self.rasterlayer.nodata not in ('', None): self.log( 'Setting no data values to {0}.'.format(self.rasterlayer.nodata), status=self.rasterlayer.parsestatus.REPROJECTING_RASTER, ) for band in self.dataset.bands: band.nodata_value = float(self.rasterlayer.nodata) # Compress reprojected raster file and store it if self.rasterlayer.store_reprojected: dest = tempfile.NamedTemporaryFile(dir=self.tmpdir, suffix='.zip') dest_zip = zipfile.ZipFile(dest.name, 'w', allowZip64=True) dest_zip.write( filename=self.dataset.name, arcname=os.path.basename(self.dataset.name), compress_type=zipfile.ZIP_DEFLATED, ) dest_zip.close() # Store zip file in reprojected raster model self.rasterlayer.reprojected.rasterfile = File(open(dest_zip.filename, 'rb')) self.rasterlayer.reprojected.save() self.log('Finished transforming raster.') def create_initial_histogram_buckets(self): """ Gets the empty histogram arrays for statistics collection. """ self.hist_values = [] self.hist_bins = [] for i, band in enumerate(self.dataset.bands): bandmeta = RasterLayerBandMetadata.objects.filter(rasterlayer=self.rasterlayer, band=i).first() self.hist_values.append(numpy.array(bandmeta.hist_values)) self.hist_bins.append(numpy.array(bandmeta.hist_bins)) def extract_metadata(self): """ Extract and store metadata for the raster and its bands. """ self.log('Extracting metadata from raster.') # Try to compute max zoom try: max_zoom = self.compute_max_zoom() except GDALException: raise RasterException('Failed to compute max zoom. Check the SRID of the raster.') # Extract global raster metadata meta = self.rasterlayer.metadata meta.uperleftx = self.dataset.origin.x meta.uperlefty = self.dataset.origin.y meta.width = self.dataset.width meta.height = self.dataset.height meta.scalex = self.dataset.scale.x meta.scaley = self.dataset.scale.y meta.skewx = self.dataset.skew.x meta.skewy = self.dataset.skew.y meta.numbands = len(self.dataset.bands) meta.srs_wkt = self.dataset.srs.wkt meta.srid = self.dataset.srs.srid meta.max_zoom = max_zoom meta.save() # Extract band metadata for i, band in enumerate(self.dataset.bands): bandmeta = RasterLayerBandMetadata.objects.filter(rasterlayer=self.rasterlayer, band=i).first() if not bandmeta: bandmeta = RasterLayerBandMetadata(rasterlayer=self.rasterlayer, band=i) bandmeta.nodata_value = band.nodata_value bandmeta.min = band.min bandmeta.max = band.max # Depending on Django version, the band statistics include std and mean. if hasattr(band, 'std'): bandmeta.std = band.std if hasattr(band, 'mean'): bandmeta.mean = band.mean bandmeta.save() self.log('Finished extracting metadata from raster.') def create_tiles(self, zoom_levels): """ Create tiles for input zoom levels, either a list or an integer. """ if isinstance(zoom_levels, int): self.populate_tile_level(zoom_levels) else: for zoom in zoom_levels: self.populate_tile_level(zoom) def populate_tile_level(self, zoom): """ Create tiles for this raster at the given zoomlevel. This routine first snaps the raster to the grid of the zoomlevel, then creates the tiles from the snapped raster. """ # Abort if zoom level is above resolution of the raster layer if zoom > self.max_zoom: return elif zoom == self.max_zoom: self.create_initial_histogram_buckets() # Compute the tile x-y-z index range for the rasterlayer for this zoomlevel bbox = self.dataset.extent quadrants = utils.quadrants(bbox, zoom) self.log('Creating {0} tiles in {1} quadrants at zoom {2}.'.format(self.nr_of_tiles(zoom), len(quadrants), zoom)) # Process quadrants in parallell quadrant_task_group = group(self.process_quadrant.si(indexrange, zoom) for indexrange in quadrants) quadrant_task_group.apply() # Store histogram data if zoom == self.max_zoom: bandmetas = RasterLayerBandMetadata.objects.filter(rasterlayer=self.rasterlayer) for bandmeta in bandmetas: bandmeta.hist_values = self.hist_values[bandmeta.band].tolist() bandmeta.save() self.log('Finished parsing at zoom level {0}.'.format(zoom), zoom=zoom) _quadrant_count = 0 @current_app.task(filter=task_method) def process_quadrant(self, indexrange, zoom): """ Create raster tiles for a quadrant of tiles defined by a x-y-z index range and a zoom level. """ self._quadrant_count += 1 self.log( 'Starting tile creation for quadrant {0} at zoom level {1}'.format(self._quadrant_count, zoom), status=self.rasterlayer.parsestatus.CREATING_TILES ) # Compute scale of tiles for this zoomlevel tilescale = utils.tile_scale(zoom) # Compute quadrant bounds and create destination file bounds = utils.tile_bounds(indexrange[0], indexrange[1], zoom) dest_file = tempfile.NamedTemporaryFile(dir=self.tmpdir, suffix='.tif') # Snap dataset to the quadrant snapped_dataset = self.dataset.warp({ 'name': dest_file.name, 'origin': [bounds[0], bounds[3]], 'scale': [tilescale, -tilescale], 'width': (indexrange[2] - indexrange[0] + 1) * self.tilesize, 'height': (indexrange[3] - indexrange[1] + 1) * self.tilesize, }) # Create all tiles in this quadrant in batches batch = [] for tilex in range(indexrange[0], indexrange[2] + 1): for tiley in range(indexrange[1], indexrange[3] + 1): # Calculate raster tile origin bounds = utils.tile_bounds(tilex, tiley, zoom) # Construct band data arrays pixeloffset = ( (tilex - indexrange[0]) * self.tilesize, (tiley - indexrange[1]) * self.tilesize ) band_data = [ { 'data': band.data(offset=pixeloffset, size=(self.tilesize, self.tilesize)), 'nodata_value': band.nodata_value } for band in snapped_dataset.bands ] # Ignore tile if its only nodata. if all([numpy.all(dat['data'] == dat['nodata_value']) for dat in band_data]): continue # Add tile data to histogram if zoom == self.max_zoom: self.push_histogram(band_data) # Warp source raster into this tile (in memory) dest = GDALRaster({ 'width': self.tilesize, 'height': self.tilesize, 'origin': [bounds[0], bounds[3]], 'scale': [tilescale, -tilescale], 'srid': WEB_MERCATOR_SRID, 'datatype': snapped_dataset.bands[0].datatype(), 'bands': band_data, }) # Store tile in batch array batch.append( RasterTile( rast=dest, rasterlayer_id=self.rasterlayer.id, tilex=tilex, tiley=tiley, tilez=zoom ) ) # Commit batch to database and reset it if len(batch) == BATCH_STEP_SIZE: RasterTile.objects.bulk_create(batch) batch = [] # Commit remaining objects if len(batch): RasterTile.objects.bulk_create(batch) def push_histogram(self, data): """ Add data to band level histogram. """ # Loop through bands of this tile for i, dat in enumerate(data): # Create histogram for new data with the same bins new_hist = numpy.histogram(dat['data'], bins=self.hist_bins[i]) # Add counts of this tile to band metadata histogram self.hist_values[i] += new_hist[0] def drop_all_tiles(self): """ Delete all existing tiles for this parser's rasterlayer. """ self.log('Clearing all existing tiles.') self.rasterlayer.rastertile_set.all().delete() self.log('Finished clearing existing tiles.') def send_success_signal(self): """ Send parser end signal for other dependencies to be handling new tiles. """ self.log( 'Successfully finished parsing raster', status=self.rasterlayer.parsestatus.FINISHED ) rasterlayers_parser_ended.send(sender=self.rasterlayer.__class__, instance=self.rasterlayer) def compute_max_zoom(self): """ Set max zoom property based on rasterlayer metadata. """ # Return manual override value if provided if self.rasterlayer.max_zoom is not None: return self.rasterlayer.max_zoom if self.dataset.srs.srid == WEB_MERCATOR_SRID: # For rasters in web mercator, use the scale directly scale = abs(self.dataset.scale.x) else: # Create a line from the center of the raster to a point that is # one pixel width away from the center. xcenter = self.dataset.extent[0] + (self.dataset.extent[2] - self.dataset.extent[0]) / 2 ycenter = self.dataset.extent[1] + (self.dataset.extent[3] - self.dataset.extent[1]) / 2 linestring = 'LINESTRING({} {}, {} {})'.format( xcenter, ycenter, xcenter + self.dataset.scale.x, ycenter ) line = OGRGeometry(linestring, srs=self.dataset.srs) # Tansform the line into web mercator. line.transform(WEB_MERCATOR_SRID) # Use the lenght of the transformed line as scale. scale = line.geos.length return utils.closest_zoomlevel(scale) @property def max_zoom(self): # Return manual override value if provided if self.rasterlayer.max_zoom is not None: return self.rasterlayer.max_zoom # Get max zoom from metadata if not hasattr(self.rasterlayer, 'metadata'): raise RasterException('Could not determine max zoom level.') max_zoom = self.rasterlayer.metadata.max_zoom # Reduce max zoom by one if zoomdown flag was disabled if not self.rasterlayer.next_higher: max_zoom -= 1 return max_zoom def nr_of_tiles(self, zoom): """ Compute the number of tiles for the rasterlayer on a given zoom level. """ bbox = self.dataset.extent indexrange = utils.tile_index_range(bbox, zoom) return (indexrange[2] - indexrange[0] + 1) * (indexrange[3] - indexrange[1] + 1)
def process(self): """ This method contains the logic for processing tasks asynchronously from a background thread or from the scheduler. Here tasks that are ready to be processed execute some logic. This could be communication with a processing node or executing a pending action. """ try: if self.auto_processing_node and not self.status in [status_codes.FAILED, status_codes.CANCELED]: # No processing node assigned and need to auto assign if self.processing_node is None: # Assign first online node with lowest queue count self.processing_node = ProcessingNode.find_best_available_node() if self.processing_node: self.processing_node.queue_count += 1 # Doesn't have to be accurate, it will get overriden later self.processing_node.save() logger.info("Automatically assigned processing node {} to {}".format(self.processing_node, self)) self.save() # Processing node assigned, but is offline and no errors if self.processing_node and not self.processing_node.is_online(): # Detach processing node, will be processed at the next tick logger.info("Processing node {} went offline, reassigning {}...".format(self.processing_node, self)) self.uuid = '' self.processing_node = None self.save() if self.processing_node: # Need to process some images (UUID not yet set and task doesn't have pending actions)? if not self.uuid and self.pending_action is None and self.status is None: logger.info("Processing... {}".format(self)) images = [image.path() for image in self.imageupload_set.all()] # This takes a while uuid = self.processing_node.process_new_task(images, self.name, self.options) # Refresh task object before committing change self.refresh_from_db() self.uuid = uuid self.save() # TODO: log process has started processing if self.pending_action is not None: if self.pending_action == pending_actions.CANCEL: # Do we need to cancel the task on the processing node? logger.info("Canceling {}".format(self)) if self.processing_node and self.uuid: # Attempt to cancel the task on the processing node # We don't care if this fails (we tried) try: self.processing_node.cancel_task(self.uuid) self.status = None except ProcessingException: logger.warning("Could not cancel {} on processing node. We'll proceed anyway...".format(self)) self.status = status_codes.CANCELED self.pending_action = None self.save() else: raise ProcessingError("Cannot cancel a task that has no processing node or UUID") elif self.pending_action == pending_actions.RESTART: logger.info("Restarting {}".format(self)) if self.processing_node: # Check if the UUID is still valid, as processing nodes purge # results after a set amount of time, the UUID might have eliminated. uuid_still_exists = False if self.uuid: try: info = self.processing_node.get_task_info(self.uuid) uuid_still_exists = info['uuid'] == self.uuid except ProcessingException: pass if uuid_still_exists: # Good to go try: self.processing_node.restart_task(self.uuid) except ProcessingError as e: # Something went wrong logger.warning("Could not restart {}, will start a new one".format(self)) self.uuid = '' else: # Task has been purged (or processing node is offline) # Process this as a new task # Removing its UUID will cause the scheduler # to process this the next tick self.uuid = '' self.console_output = "" self.processing_time = -1 self.status = None self.last_error = None self.pending_action = None self.save() else: raise ProcessingError("Cannot restart a task that has no processing node") elif self.pending_action == pending_actions.REMOVE: logger.info("Removing {}".format(self)) if self.processing_node and self.uuid: # Attempt to delete the resources on the processing node # We don't care if this fails, as resources on processing nodes # Are expected to be purged on their own after a set amount of time anyway try: self.processing_node.remove_task(self.uuid) except ProcessingException: pass # What's more important is that we delete our task properly here self.delete() # Stop right here! return if self.processing_node: # Need to update status (first time, queued or running?) if self.uuid and self.status in [None, status_codes.QUEUED, status_codes.RUNNING]: # Update task info from processing node info = self.processing_node.get_task_info(self.uuid) self.processing_time = info["processingTime"] self.status = info["status"]["code"] current_lines_count = len(self.console_output.split("\n")) - 1 self.console_output += self.processing_node.get_task_console_output(self.uuid, current_lines_count) if "errorMessage" in info["status"]: self.last_error = info["status"]["errorMessage"] # Has the task just been canceled, failed, or completed? if self.status in [status_codes.FAILED, status_codes.COMPLETED, status_codes.CANCELED]: logger.info("Processing status: {} for {}".format(self.status, self)) if self.status == status_codes.COMPLETED: assets_dir = self.assets_path("") if not os.path.exists(assets_dir): os.makedirs(assets_dir) logger.info("Downloading all.zip for {}".format(self)) # Download all assets zip_stream = self.processing_node.download_task_asset(self.uuid, "all.zip") zip_path = os.path.join(assets_dir, "all.zip") with open(zip_path, 'wb') as fd: for chunk in zip_stream.iter_content(4096): fd.write(chunk) logger.info("Done downloading all.zip for {}".format(self)) # Extract from zip with zipfile.ZipFile(zip_path, "r") as zip_h: zip_h.extractall(assets_dir) logger.info("Extracted all.zip for {}".format(self)) # Add to database orthophoto orthophoto_path = os.path.realpath(self.assets_path("odm_orthophoto", "odm_orthophoto.tif")) if os.path.exists(orthophoto_path): orthophoto = GDALRaster(orthophoto_path, write=True) # We need to transform to 4326 before we can store it # as an offdb raster field orthophoto_4326_path = os.path.realpath(self.assets_path("odm_orthophoto", "odm_orthophoto_4326.tif")) self.orthophoto = orthophoto.transform(4326, 'GTiff', orthophoto_4326_path) logger.info("Imported orthophoto {} for {}".format(orthophoto_4326_path, self)) # Remove old odm_texturing.zip archive (if any) textured_model_archive = self.assets_path(self.get_textured_model_filename()) if os.path.exists(textured_model_archive): os.remove(textured_model_archive) self.save() else: # FAILED, CANCELED self.save() else: # Still waiting... self.save() except ProcessingError as e: self.set_failure(str(e)) except (ConnectionRefusedError, ConnectionError) as e: logger.warning("{} cannot communicate with processing node: {}".format(self, str(e))) except ProcessingTimeout as e: logger.warning("{} timed out with error: {}. We'll try reprocessing at the next tick.".format(self, str(e)))
class RasterLayerParser(object): """ Class to parse raster layers. """ def __init__(self, rasterlayer): self.rasterlayer = rasterlayer self.rastername = os.path.basename(rasterlayer.rasterfile.name) # Set raster tilesize self.tilesize = int(getattr(settings, "RASTER_TILESIZE", WEB_MERCATOR_TILESIZE)) self.zoomdown = getattr(settings, "RASTER_ZOOM_NEXT_HIGHER", True) def log(self, msg, reset=False, status=None, zoom=None): """ Write a message to the parse log of the rasterlayer instance and update the parse status object. """ if status is not None: self.rasterlayer.parsestatus.status = status if zoom is not None: self.rasterlayer.parsestatus.tile_level = zoom # Prepare datetime stamp for log now = "[{0}] ".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) # Write log, reset if requested if reset: self.rasterlayer.parsestatus.log = now + msg else: self.rasterlayer.parsestatus.log += "\n" + now + msg self.rasterlayer.save() self.rasterlayer.parsestatus.save() def get_raster_file(self): """ Make local copy of rasterfile, which is needed if files are stored on remote storage, and unzip it if necessary. """ self.log("Getting raster file from storage") raster_workdir = getattr(settings, "RASTER_WORKDIR", None) self.tmpdir = tempfile.mkdtemp(dir=raster_workdir) # Access rasterfile and store in a temp folder rasterfile = open(os.path.join(self.tmpdir, self.rastername), "wb") for chunk in self.rasterlayer.rasterfile.chunks(): rasterfile.write(chunk) rasterfile.close() # If the raster file is compressed, decompress it fileName, fileExtension = os.path.splitext(self.rastername) if fileExtension == ".zip": # Open and extract zipfile zf = zipfile.ZipFile(os.path.join(self.tmpdir, self.rastername)) zf.extractall(self.tmpdir) # Remove zipfile os.remove(os.path.join(self.tmpdir, self.rastername)) # Get filelist from directory raster_list = glob.glob(os.path.join(self.tmpdir, "*.*")) # Check if only one file is found in zipfile if len(raster_list) > 1: self.log( "WARNING: Found more than one file in zipfile " "using only first file found. This might lead " "to problems if its not a raster file." ) # Return first one as raster file self.rastername = os.path.basename(raster_list[0]) def open_raster_file(self): """ Open the raster file as GDALRaster and set nodata-values. """ self.log("Opening raster file as GDALRaster.") # Open raster file self.dataset = GDALRaster(os.path.join(self.tmpdir, self.rastername), write=True) # Make sure nodata value is set from input self.hist_values = [] self.hist_bins = [] for i, band in enumerate(self.dataset.bands): if self.rasterlayer.nodata is not None: band.nodata_value = float(self.rasterlayer.nodata) # Create band metatdata object bandmeta = RasterLayerBandMetadata.objects.create( rasterlayer=self.rasterlayer, band=i, nodata_value=band.nodata_value, min=band.min, max=band.max ) # Prepare numpy hist values and bins self.hist_values.append(numpy.array(bandmeta.hist_values)) self.hist_bins.append(numpy.array(bandmeta.hist_bins)) # Store original metadata for this raster meta = self.rasterlayer.metadata meta.uperleftx = self.dataset.origin.x meta.uperlefty = self.dataset.origin.y meta.width = self.dataset.width meta.height = self.dataset.height meta.scalex = self.dataset.scale.x meta.scaley = self.dataset.scale.y meta.skewx = self.dataset.skew.x meta.skewy = self.dataset.skew.y meta.numbands = len(self.dataset.bands) meta.srs_wkt = self.dataset.srs.wkt meta.srid = self.dataset.srs.srid meta.save() def close_raster_file(self): """ On Windows close and release the GDALRaster resources """ try: if self.dataset: del self.dataset self.dataset = None except AttributeError: pass def create_tiles(self, zoom): """ Create tiles for this raster at the given zoomlevel. This routine first snaps the raster to the grid of the zoomlevel, then creates the tiles from the snapped raster. """ # Compute the tile x-y-z index range for the rasterlayer for this zoomlevel bbox = self.rasterlayer.extent() indexrange = tiler.tile_index_range(bbox, zoom) # Compute scale of tiles for this zoomlevel tilescale = tiler.tile_scale(zoom) # Count the number of tiles that are required to cover the raster at this zoomlevel nr_of_tiles = (indexrange[2] - indexrange[0] + 1) * (indexrange[3] - indexrange[1] + 1) # Create destination raster file self.log("Snapping dataset to zoom level {0}".format(zoom)) bounds = tiler.tile_bounds(indexrange[0], indexrange[1], zoom) sizex = (indexrange[2] - indexrange[0] + 1) * self.tilesize sizey = (indexrange[3] - indexrange[1] + 1) * self.tilesize dest_file = os.path.join(self.tmpdir, "djangowarpedraster" + str(zoom) + ".tif") snapped_dataset = self.dataset.warp( { "name": dest_file, "origin": [bounds[0], bounds[3]], "scale": [tilescale, -tilescale], "width": sizex, "height": sizey, } ) self.log("Creating {0} tiles for zoom {1}.".format(nr_of_tiles, zoom)) counter = 0 for tilex in range(indexrange[0], indexrange[2] + 1): for tiley in range(indexrange[1], indexrange[3] + 1): # Log progress counter += 1 if counter % 250 == 0: self.log("{0} tiles created at zoom {1}".format(counter, zoom)) # Calculate raster tile origin bounds = tiler.tile_bounds(tilex, tiley, zoom) # Construct band data arrays pixeloffset = ((tilex - indexrange[0]) * self.tilesize, (tiley - indexrange[1]) * self.tilesize) band_data = [ { "data": band.data(offset=pixeloffset, size=(self.tilesize, self.tilesize)), "nodata_value": band.nodata_value, } for band in snapped_dataset.bands ] # Add tile data to histogram if zoom == self.max_zoom: self.push_histogram(band_data) # Warp source raster into this tile (in memory) dest = GDALRaster( { "width": self.tilesize, "height": self.tilesize, "origin": [bounds[0], bounds[3]], "scale": [tilescale, -tilescale], "srid": WEB_MERCATOR_SRID, "datatype": snapped_dataset.bands[0].datatype(), "bands": band_data, } ) # Store tile RasterTile.objects.create(rast=dest, rasterlayer=self.rasterlayer, tilex=tilex, tiley=tiley, tilez=zoom) # Store histogram data if zoom == self.max_zoom: bandmetas = RasterLayerBandMetadata.objects.filter(rasterlayer=self.rasterlayer) for bandmeta in bandmetas: bandmeta.hist_values = self.hist_values[bandmeta.band].tolist() bandmeta.save() # Remove snapped dataset self.log("Removing snapped dataset.", zoom=zoom) snapped_dataset = None os.remove(dest_file) def push_histogram(self, data): """ Add data to band level histogram histogram. """ # Loop through bands of this tile for i, dat in enumerate(data): # Create histogram for new data with the same bins new_hist = numpy.histogram(dat["data"], bins=self.hist_bins[i]) # Add counts of this tile to band metadata histogram self.hist_values[i] += new_hist[0] def drop_empty_rasters(self): """ Remove rasters that are only no-data from the current rasterlayer. """ self.log("Dropping empty raster tiles.", status=self.rasterlayer.parsestatus.DROPPING_EMPTY_TILES) # Setup SQL command sql = ("DELETE FROM raster_rastertile " "WHERE ST_Count(rast)=0 " "AND rasterlayer_id={0}").format( self.rasterlayer.id ) # Run SQL to drop empty tiles cursor = connection.cursor() cursor.execute(sql) def parse_raster_layer(self): """ This function pushes the raster data from the Raster Layer into the RasterTile table. """ try: # Clean previous parse log self.log("Started parsing raster file", reset=True, status=self.rasterlayer.parsestatus.DOWNLOADING_FILE) # Download, unzip and open raster file self.get_raster_file() self.open_raster_file() # Remove existing tiles for this layer before loading new ones self.rasterlayer.rastertile_set.all().delete() # Transform raster to global srid if self.dataset.srs.srid == WEB_MERCATOR_SRID: self.log("Dataset already in SRID {0}, skipping transform".format(WEB_MERCATOR_SRID)) else: self.log( "Transforming raster to SRID {0}".format(WEB_MERCATOR_SRID), status=self.rasterlayer.parsestatus.REPROJECTING_RASTER, ) self.dataset = self.dataset.transform(WEB_MERCATOR_SRID) # Compute max zoom at the web mercator projection self.max_zoom = tiler.closest_zoomlevel(abs(self.dataset.scale.x)) # Store max zoom level in metadata self.rasterlayer.metadata.max_zoom = self.max_zoom self.rasterlayer.metadata.save() # Reduce max zoom by one if zoomdown flag was disabled if not self.zoomdown: self.max_zoom -= 1 self.log("Started creating tiles", status=self.rasterlayer.parsestatus.CREATING_TILES) # Loop through all lower zoom levels and create tiles to # setup TMS aligned tiles in world mercator for iz in range(self.max_zoom + 1): self.create_tiles(iz) self.drop_empty_rasters() # Send signal for end of parsing rasterlayers_parser_ended.send(sender=self.rasterlayer.__class__, instance=self.rasterlayer) # Log success of parsing self.log("Successfully finished parsing raster", status=self.rasterlayer.parsestatus.FINISHED) except: self.log(traceback.format_exc(), status=self.rasterlayer.parsestatus.FAILED) raise finally: self.close_raster_file() shutil.rmtree(self.tmpdir)
def test_raster_transform(self): if GDAL_VERSION < (1, 8, 1): self.skipTest("GDAL >= 1.8.1 is required for this test") # Prepare tempfile and nodata value rstfile = tempfile.NamedTemporaryFile(suffix=".tif") ndv = 99 # Create in file based raster source = GDALRaster( { "datatype": 1, "driver": "tif", "name": rstfile.name, "width": 5, "height": 5, "nr_of_bands": 1, "srid": 4326, "origin": (-5, 5), "scale": (2, -2), "skew": (0, 0), "bands": [{"data": range(25), "nodata_value": ndv}], } ) # Transform raster into srid 4326. target = source.transform(3086) # Reload data from disk target = GDALRaster(target.name) self.assertEqual(target.srs.srid, 3086) self.assertEqual(target.width, 7) self.assertEqual(target.height, 7) self.assertEqual(target.bands[0].datatype(), source.bands[0].datatype()) self.assertEqual(target.origin, [9124842.791079799, 1589911.6476407414]) self.assertEqual(target.scale, [223824.82664250192, -223824.82664250192]) self.assertEqual(target.skew, [0, 0]) result = target.bands[0].data() if numpy: result = result.flatten().tolist() # The reprojection of a raster that spans over a large area # skews the data matrix and might introduce nodata values. self.assertEqual( result, [ ndv, ndv, ndv, ndv, 4, ndv, ndv, ndv, ndv, 2, 3, 9, ndv, ndv, ndv, 1, 2, 8, 13, 19, ndv, 0, 6, 6, 12, 18, 18, 24, ndv, 10, 11, 16, 22, 23, ndv, ndv, ndv, 15, 21, 22, ndv, ndv, ndv, ndv, 20, ndv, ndv, ndv, ndv, ], )