Ejemplo n.º 1
0
 def test_raster_transform_clone(self):
     with mock.patch.object(GDALRaster, 'clone') as mocked_clone:
         # Create in file based raster.
         rstfile = tempfile.NamedTemporaryFile(suffix='.tif')
         source = GDALRaster({
             'datatype':
             1,
             'driver':
             'tif',
             'name':
             rstfile.name,
             'width':
             5,
             'height':
             5,
             'nr_of_bands':
             1,
             'srid':
             4326,
             'origin': (-5, 5),
             'scale': (2, -2),
             'skew': (0, 0),
             'bands': [{
                 'data': range(25),
                 'nodata_value': 99,
             }],
         })
         # transform() returns a clone because it is the same SRID and
         # driver.
         source.transform(4326)
         self.assertEqual(mocked_clone.call_count, 1)
Ejemplo n.º 2
0
 def test_raster_transform_clone(self):
     with mock.patch.object(GDALRaster, "clone") as mocked_clone:
         # Create in file based raster.
         rstfile = tempfile.NamedTemporaryFile(suffix=".tif")
         source = GDALRaster({
             "datatype":
             1,
             "driver":
             "tif",
             "name":
             rstfile.name,
             "width":
             5,
             "height":
             5,
             "nr_of_bands":
             1,
             "srid":
             4326,
             "origin": (-5, 5),
             "scale": (2, -2),
             "skew": (0, 0),
             "bands": [{
                 "data": range(25),
                 "nodata_value": 99,
             }],
         })
         # transform() returns a clone because it is the same SRID and
         # driver.
         source.transform(4326)
         self.assertEqual(mocked_clone.call_count, 1)
Ejemplo n.º 3
0
def reprojected_by_gdal( src : str, dst : str, dst_crs : int = 4326 ) -> None:

    """ Reproject raster with gdal """

    # open raster
    src_gdal = GDALRaster( src , write=True )

    # make the reprojection
    src_gdal.transform( dst_crs, name=dst )
Ejemplo n.º 4
0
    def test_raster_transform(self):
        tests = [
            3086,
            '3086',
            SpatialReference(3086),
        ]
        for srs in tests:
            with self.subTest(srs=srs):
                # Prepare tempfile and nodata value.
                rstfile = tempfile.NamedTemporaryFile(suffix='.tif')
                ndv = 99
                # Create in file based raster.
                source = GDALRaster({
                    'datatype': 1,
                    'driver': 'tif',
                    'name': rstfile.name,
                    'width': 5,
                    'height': 5,
                    'nr_of_bands': 1,
                    'srid': 4326,
                    'origin': (-5, 5),
                    'scale': (2, -2),
                    'skew': (0, 0),
                    'bands': [{
                        'data': range(25),
                        'nodata_value': ndv,
                    }],
                })

                target = source.transform(srs)

                # Reload data from disk.
                target = GDALRaster(target.name)
                self.assertEqual(target.srs.srid, 3086)
                self.assertEqual(target.width, 7)
                self.assertEqual(target.height, 7)
                self.assertEqual(target.bands[0].datatype(), source.bands[0].datatype())
                self.assertAlmostEqual(target.origin[0], 9124842.791079799, 3)
                self.assertAlmostEqual(target.origin[1], 1589911.6476407414, 3)
                self.assertAlmostEqual(target.scale[0], 223824.82664250192, 3)
                self.assertAlmostEqual(target.scale[1], -223824.82664250192, 3)
                self.assertEqual(target.skew, [0, 0])

                result = target.bands[0].data()
                if numpy:
                    result = result.flatten().tolist()
                # The reprojection of a raster that spans over a large area
                # skews the data matrix and might introduce nodata values.
                self.assertEqual(
                    result,
                    [
                        ndv, ndv, ndv, ndv, 4, ndv, ndv,
                        ndv, ndv, 2, 3, 9, ndv, ndv,
                        ndv, 1, 2, 8, 13, 19, ndv,
                        0, 6, 6, 12, 18, 18, 24,
                        ndv, 10, 11, 16, 22, 23, ndv,
                        ndv, ndv, 15, 21, 22, ndv, ndv,
                        ndv, ndv, 20, ndv, ndv, ndv, ndv,
                    ],
                )
Ejemplo n.º 5
0
 def test_raster_transform_clone_name(self):
     # Create in file based raster.
     rstfile = tempfile.NamedTemporaryFile(suffix=".tif")
     source = GDALRaster({
         "datatype":
         1,
         "driver":
         "tif",
         "name":
         rstfile.name,
         "width":
         5,
         "height":
         5,
         "nr_of_bands":
         1,
         "srid":
         4326,
         "origin": (-5, 5),
         "scale": (2, -2),
         "skew": (0, 0),
         "bands": [{
             "data": range(25),
             "nodata_value": 99,
         }],
     })
     clone_name = rstfile.name + "_respect_name.GTiff"
     target = source.transform(4326, name=clone_name)
     self.assertEqual(target.name, clone_name)
Ejemplo n.º 6
0
 def test_raster_transform_clone_name(self):
     # Create in file based raster.
     rstfile = tempfile.NamedTemporaryFile(suffix='.tif')
     source = GDALRaster({
         'datatype':
         1,
         'driver':
         'tif',
         'name':
         rstfile.name,
         'width':
         5,
         'height':
         5,
         'nr_of_bands':
         1,
         'srid':
         4326,
         'origin': (-5, 5),
         'scale': (2, -2),
         'skew': (0, 0),
         'bands': [{
             'data': range(25),
             'nodata_value': 99,
         }],
     })
     clone_name = rstfile.name + '_respect_name.GTiff'
     target = source.transform(4326, name=clone_name)
     self.assertEqual(target.name, clone_name)
Ejemplo n.º 7
0
def raster_handler(files, *args, **kwargs):
    """ Returns merged transformed raster file """
    rasters_dir = os.path.join(settings.MEDIA_ROOT, 'rasters')
    if len(files) > 1:
        output_raster = os.path.join(rasters_dir, 'merged.tif')
        if os.path.isfile(output_raster):
            os.remove(output_raster)

        merge_command = ["python", "utils/gdal_merge.py", "-o", output_raster, "-separate"]
        rasters = []

        for f in files:
            storage = FileSystemStorage()
            filename = storage.save('rasters/' + f.name, f)
            rasters.append(os.path.join(settings.MEDIA_ROOT, filename))

        merge_command += rasters

        subprocess.call(merge_command)

        for f in rasters:
            os.remove(f)

        source = GDALRaster(output_raster, write=True)
    
    elif len(files) == 1:
        storage = FileSystemStorage()
        filename = storage.save('rasters/' + files[0].name, files[0])

        source = GDALRaster(os.path.join(settings.MEDIA_ROOT, filename), write=True)

    return source.transform(3857)
Ejemplo n.º 8
0
    def test_raster_transform(self):
        # Prepare tempfile and nodata value
        rstfile = tempfile.NamedTemporaryFile(suffix='.tif')
        ndv = 99

        # Create in file based raster
        source = GDALRaster({
            'datatype': 1,
            'driver': 'tif',
            'name': rstfile.name,
            'width': 5,
            'height': 5,
            'nr_of_bands': 1,
            'srid': 4326,
            'origin': (-5, 5),
            'scale': (2, -2),
            'skew': (0, 0),
            'bands': [{
                'data': range(25),
                'nodata_value': ndv,
            }],
        })

        # Transform raster into srid 4326.
        target = source.transform(3086)

        # Reload data from disk
        target = GDALRaster(target.name)

        self.assertEqual(target.srs.srid, 3086)
        self.assertEqual(target.width, 7)
        self.assertEqual(target.height, 7)
        self.assertEqual(target.bands[0].datatype(), source.bands[0].datatype())
        self.assertAlmostEqual(target.origin[0], 9124842.791079799)
        self.assertAlmostEqual(target.origin[1], 1589911.6476407414)
        self.assertAlmostEqual(target.scale[0], 223824.82664250192)
        self.assertAlmostEqual(target.scale[1], -223824.82664250192)
        self.assertEqual(target.skew, [0, 0])

        result = target.bands[0].data()
        if numpy:
            result = result.flatten().tolist()

        # The reprojection of a raster that spans over a large area
        # skews the data matrix and might introduce nodata values.
        self.assertEqual(
            result,
            [
                ndv, ndv, ndv, ndv, 4, ndv, ndv,
                ndv, ndv, 2, 3, 9, ndv, ndv,
                ndv, 1, 2, 8, 13, 19, ndv,
                0, 6, 6, 12, 18, 18, 24,
                ndv, 10, 11, 16, 22, 23, ndv,
                ndv, ndv, 15, 21, 22, ndv, ndv,
                ndv, ndv, 20, ndv, ndv, ndv, ndv,
            ]
        )
Ejemplo n.º 9
0
    def test_raster_transform(self):
        if GDAL_VERSION < (1, 8, 1):
            self.skipTest("GDAL >= 1.8.1 is required for this test")
        # Prepare tempfile and nodata value
        rstfile = tempfile.NamedTemporaryFile(suffix='.tif')
        ndv = 99

        # Create in file based raster
        source = GDALRaster({
            'datatype': 1,
            'driver': 'tif',
            'name': rstfile.name,
            'width': 5,
            'height': 5,
            'nr_of_bands': 1,
            'srid': 4326,
            'origin': (-5, 5),
            'scale': (2, -2),
            'skew': (0, 0),
            'bands': [{
                'data': range(25),
                'nodata_value': ndv,
            }],
        })

        # Transform raster into srid 4326.
        target = source.transform(3086)

        # Reload data from disk
        target = GDALRaster(target.name)

        self.assertEqual(target.srs.srid, 3086)
        self.assertEqual(target.width, 7)
        self.assertEqual(target.height, 7)
        self.assertEqual(target.bands[0].datatype(), source.bands[0].datatype())
        self.assertEqual(target.origin, [9124842.791079799, 1589911.6476407414])
        self.assertEqual(target.scale, [223824.82664250192, -223824.82664250192])
        self.assertEqual(target.skew, [0, 0])

        result = target.bands[0].data()
        if numpy:
            result = result.flatten().tolist()

        # The reprojection of a raster that spans over a large area
        # skews the data matrix and might introduce nodata values.
        self.assertEqual(
            result,
            [
                ndv, ndv, ndv, ndv, 4, ndv, ndv,
                ndv, ndv, 2, 3, 9, ndv, ndv,
                ndv, 1, 2, 8, 13, 19, ndv,
                0, 6, 6, 12, 18, 18, 24,
                ndv, 10, 11, 16, 22, 23, ndv,
                ndv, ndv, 15, 21, 22, ndv, ndv,
                ndv, ndv, 20, ndv, ndv, ndv, ndv,
            ]
        )
Ejemplo n.º 10
0
    def test_db(self):
        # Make sure we can use PostGIS raster type
        raster = GDALRaster(os.path.realpath(os.path.join("app", "fixtures", "orthophoto.tif")), write=True)

        self.assertTrue(raster.srid == 32615)
        with transaction.atomic():
            # We cannot store offdb references with SRID different than the one declared (4326)
            self.assertRaises(InternalError, Task.objects.create, project=Project.objects.latest("created_at"),
                                       orthophoto=raster)

        # All OK when we transform to 4326
        task = Task.objects.create(project=Project.objects.latest("created_at"),
                                   orthophoto=raster.transform(4326))
        task.refresh_from_db()
        self.assertTrue(task.orthophoto.srid == 4326)
        self.assertTrue(task.orthophoto.width == 252) # not original size, warp happened
Ejemplo n.º 11
0
    def test_db(self):
        # Make sure we can use PostGIS raster type
        raster = GDALRaster(os.path.realpath(
            os.path.join("app", "fixtures", "orthophoto.tif")),
                            write=True)

        self.assertTrue(raster.srid == 32615)
        with transaction.atomic():
            # We cannot store offdb references with SRID different than the one declared (4326)
            self.assertRaises(InternalError,
                              Task.objects.create,
                              project=Project.objects.latest("created_at"),
                              orthophoto=raster)

        # All OK when we transform to 4326
        task = Task.objects.create(
            project=Project.objects.latest("created_at"),
            orthophoto=raster.transform(4326))
        task.refresh_from_db()
        self.assertTrue(task.orthophoto.srid == 4326)
        self.assertTrue(
            task.orthophoto.width == 252)  # not original size, warp happened
Ejemplo n.º 12
0
class RasterLayerParser(object):
    """
    Class to parse raster layers.
    """
    def __init__(self, rasterlayer_id):
        self.rasterlayer = RasterLayer.objects.get(id=rasterlayer_id)

        # Set raster tilesize
        self.tilesize = int(
            getattr(settings, 'RASTER_TILESIZE', WEB_MERCATOR_TILESIZE))
        self.batch_step_size = int(
            getattr(settings, 'RASTER_BATCH_STEP_SIZE', BATCH_STEP_SIZE))
        self.s3_endpoint_url = getattr(settings, 'RASTER_S3_ENDPOINT_URL',
                                       None)

    def log(self, msg, status=None, zoom=None):
        """
        Write a message to the parse log of the rasterlayer instance and update
        the parse status object.
        """
        parsestatus = self.rasterlayer.parsestatus
        parsestatus.refresh_from_db()

        if status is not None:
            parsestatus.status = status

        if zoom is not None and zoom not in parsestatus.tile_levels:
            parsestatus.tile_levels.append(zoom)
            parsestatus.tile_levels.sort()

        # Prepare datetime stamp for log
        now = '[{0}] '.format(
            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

        if parsestatus.log:
            now = '\n' + now

        parsestatus.log += now + msg
        parsestatus.save()

    def open_raster_file(self):
        """
        Get raster source file to extract tiles from.

        This makes a local copy of rasterfile, unzips the raster and reprojects
        it into web mercator if necessary. The reprojected raster is stored for
        reuse such that reprojection does only happen once.

        The local copy of the raster is needed if files are stored on remote
        storages.
        """
        reproj, created = RasterLayerReprojected.objects.get_or_create(
            rasterlayer=self.rasterlayer)
        # Check if the raster has already been reprojected
        has_reprojected = reproj.rasterfile.name not in (None, '')

        # Create workdir
        raster_workdir = getattr(settings, 'RASTER_WORKDIR', None)
        self.tmpdir = tempfile.mkdtemp(dir=raster_workdir)

        # Choose source for raster data, use the reprojected version if it exists.
        if self.rasterlayer.source_url and not has_reprojected:
            url = self.rasterlayer.source_url
            if url.lower().startswith('http') or url.startswith('file'):
                url_path = urlparse(self.rasterlayer.source_url).path
                filename = url_path.split('/')[-1]
                filepath = os.path.join(self.tmpdir, filename)
                urlretrieve(self.rasterlayer.source_url, filepath)
            elif url.startswith('s3'):
                # Get the bucket name and file key, assuming the following url
                # strucure: s3://BUCKET_NAME/BUCKET_KEY
                bucket_name = url.split('s3://')[1].split('/')[0]
                bucket_key = '/'.join(url.split('s3://')[1].split('/')[1:])
                # Assume the file name is the last piece of the key.
                filename = bucket_key.split('/')[-1]
                filepath = os.path.join(self.tmpdir, filename)
                # Get file from s3.
                s3 = boto3.resource('s3', endpoint_url=self.s3_endpoint_url)
                bucket = s3.Bucket(bucket_name)
                bucket.download_file(bucket_key,
                                     filepath,
                                     ExtraArgs={'RequestPayer': 'requester'})
            else:
                raise RasterException(
                    'Only http(s) and s3 urls are supported.')
        else:
            if has_reprojected:
                rasterfile_source = reproj.rasterfile
            else:
                rasterfile_source = self.rasterlayer.rasterfile

            if not rasterfile_source.name:
                raise RasterException(
                    'No data source found. Provide a rasterfile or a source url.'
                )

            # Copy raster file source to local folder
            filepath = os.path.join(self.tmpdir,
                                    os.path.basename(rasterfile_source.name))
            rasterfile = open(filepath, 'wb')
            for chunk in rasterfile_source.chunks():
                rasterfile.write(chunk)
            rasterfile.close()

        # If the raster file is compressed, decompress it, otherwise try to
        # open the source file directly.
        if os.path.splitext(filepath)[1].lower() == '.zip':
            # Open and extract zipfile
            zf = zipfile.ZipFile(filepath)
            zf.extractall(self.tmpdir)

            # Remove zipfile
            os.remove(filepath)

            # Get filelist from directory
            matches = []
            for root, dirnames, filenames in os.walk(self.tmpdir):
                for filename in fnmatch.filter(filenames, '*.*'):
                    matches.append(os.path.join(root, filename))

            # Open the first raster file found in the matched files.
            self.dataset = None
            for match in matches:
                try:
                    self.dataset = GDALRaster(match)
                    break
                except GDALException:
                    pass

            # Raise exception if no file could be opened by gdal.
            if not self.dataset:
                raise RasterException('Could not open rasterfile.')
        else:
            self.dataset = GDALRaster(filepath)

        # Override srid if provided
        if self.rasterlayer.srid:
            try:
                self.dataset = GDALRaster(self.dataset.name, write=True)
            except GDALException:
                raise RasterException(
                    'Could not override srid because the driver for this '
                    'type of raster does not support write mode.')
            self.dataset.srs = self.rasterlayer.srid

    def reproject_rasterfile(self):
        """
        Reproject the rasterfile into web mercator.
        """
        # Return if reprojected rasterfile already exists.
        if hasattr(self.rasterlayer, 'reprojected'
                   ) and self.rasterlayer.reprojected.rasterfile.name:
            return

        # Return if the raster already has the right projection
        # and nodata value is acceptable.
        if self.dataset.srs.srid == WEB_MERCATOR_SRID:
            # SRID was not manually specified.
            if self.rasterlayer.nodata in ('', None):
                return
            # All bands from dataset already have the same nodata value as the
            # one that was manually specified.
            if all([
                    self.rasterlayer.nodata == band.nodata_value
                    for band in self.dataset.bands
            ]):
                return
        else:
            # Log projection change if original raster is not in web mercator.
            self.log(
                'Transforming raster to SRID {0}'.format(WEB_MERCATOR_SRID),
                status=self.rasterlayer.parsestatus.REPROJECTING_RASTER,
            )

        # Reproject the dataset.
        self.dataset = self.dataset.transform(
            WEB_MERCATOR_SRID,
            driver=INTERMEDIATE_RASTER_FORMAT,
        )

        # Manually override nodata value if neccessary
        if self.rasterlayer.nodata not in ('', None):
            self.log(
                'Setting no data values to {0}.'.format(
                    self.rasterlayer.nodata),
                status=self.rasterlayer.parsestatus.REPROJECTING_RASTER,
            )
            for band in self.dataset.bands:
                band.nodata_value = float(self.rasterlayer.nodata)

        # Compress reprojected raster file and store it
        if self.rasterlayer.store_reprojected:
            dest = tempfile.NamedTemporaryFile(dir=self.tmpdir, suffix='.zip')
            dest_zip = zipfile.ZipFile(dest.name, 'w', allowZip64=True)
            dest_zip.write(
                filename=self.dataset.name,
                arcname=os.path.basename(self.dataset.name),
                compress_type=zipfile.ZIP_DEFLATED,
            )
            dest_zip.close()

            # Store zip file in reprojected raster model
            self.rasterlayer.reprojected.rasterfile = File(
                open(dest_zip.filename, 'rb'),
                name=os.path.basename(dest_zip.filename))
            self.rasterlayer.reprojected.save()

        self.log('Finished transforming raster.')

    def create_initial_histogram_buckets(self):
        """
        Gets the empty histogram arrays for statistics collection.
        """
        self.hist_values = []
        self.hist_bins = []

        for i, band in enumerate(self.dataset.bands):
            bandmeta = RasterLayerBandMetadata.objects.filter(
                rasterlayer=self.rasterlayer, band=i).first()
            self.hist_values.append(numpy.array(bandmeta.hist_values))
            self.hist_bins.append(numpy.array(bandmeta.hist_bins))

    def extract_metadata(self):
        """
        Extract and store metadata for the raster and its bands.
        """
        self.log('Extracting metadata from raster.')

        # Try to compute max zoom
        try:
            max_zoom = self.compute_max_zoom()
        except GDALException:
            raise RasterException(
                'Failed to compute max zoom. Check the SRID of the raster.')

        # Extract global raster metadata
        meta = self.rasterlayer.metadata
        meta.uperleftx = self.dataset.origin.x
        meta.uperlefty = self.dataset.origin.y
        meta.width = self.dataset.width
        meta.height = self.dataset.height
        meta.scalex = self.dataset.scale.x
        meta.scaley = self.dataset.scale.y
        meta.skewx = self.dataset.skew.x
        meta.skewy = self.dataset.skew.y
        meta.numbands = len(self.dataset.bands)
        meta.srs_wkt = self.dataset.srs.wkt
        meta.srid = self.dataset.srs.srid
        meta.max_zoom = max_zoom
        meta.save()

        # Extract band metadata
        for i, band in enumerate(self.dataset.bands):
            bandmeta = RasterLayerBandMetadata.objects.filter(
                rasterlayer=self.rasterlayer, band=i).first()
            if not bandmeta:
                bandmeta = RasterLayerBandMetadata(
                    rasterlayer=self.rasterlayer, band=i)

            bandmeta.nodata_value = band.nodata_value
            bandmeta.min = band.min
            bandmeta.max = band.max
            # Depending on Django version, the band statistics include std and mean.
            if hasattr(band, 'std'):
                bandmeta.std = band.std
            if hasattr(band, 'mean'):
                bandmeta.mean = band.mean
            bandmeta.save()

        self.log('Finished extracting metadata from raster.')

    def create_tiles(self, zoom_levels):
        """
        Create tiles for input zoom levels, either a list or an integer.
        """
        if isinstance(zoom_levels, int):
            self.populate_tile_level(zoom_levels)
        else:
            for zoom in zoom_levels:
                self.populate_tile_level(zoom)

    def populate_tile_level(self, zoom):
        """
        Create tiles for this raster at the given zoomlevel.

        This routine first snaps the raster to the grid of the zoomlevel,
        then creates  the tiles from the snapped raster.
        """
        # Abort if zoom level is above resolution of the raster layer
        if zoom > self.max_zoom:
            return
        elif zoom == self.max_zoom:
            self.create_initial_histogram_buckets()

        # Compute the tile x-y-z index range for the rasterlayer for this zoomlevel
        bbox = self.dataset.extent
        quadrants = utils.quadrants(bbox, zoom)

        self.log('Creating {0} tiles in {1} quadrants at zoom {2}.'.format(
            self.nr_of_tiles(zoom), len(quadrants), zoom))

        # Process quadrants in parallell
        for indexrange in quadrants:
            self.process_quadrant(indexrange, zoom)

        # Store histogram data
        if zoom == self.max_zoom:
            bandmetas = RasterLayerBandMetadata.objects.filter(
                rasterlayer=self.rasterlayer)
            for bandmeta in bandmetas:
                bandmeta.hist_values = self.hist_values[bandmeta.band].tolist()
                bandmeta.save()

        self.log('Finished parsing at zoom level {0}.'.format(zoom), zoom=zoom)

    _quadrant_count = 0

    def process_quadrant(self, indexrange, zoom):
        """
        Create raster tiles for a quadrant of tiles defined by a x-y-z index
        range and a zoom level.
        """
        # TODO Use a standalone celery task for this method in order to
        # gain speedup from parallelism.
        self._quadrant_count += 1
        self.log(
            'Starting tile creation for quadrant {0} at zoom level {1}'.format(
                self._quadrant_count, zoom),
            status=self.rasterlayer.parsestatus.CREATING_TILES)

        # Compute scale of tiles for this zoomlevel
        tilescale = utils.tile_scale(zoom)

        # Compute quadrant bounds and create destination file
        bounds = utils.tile_bounds(indexrange[0], indexrange[1], zoom)
        dest_file = tempfile.NamedTemporaryFile(dir=self.tmpdir, suffix='.tif')

        # Snap dataset to the quadrant
        snapped_dataset = self.dataset.warp({
            'name':
            dest_file.name,
            'origin': [bounds[0], bounds[3]],
            'scale': [tilescale, -tilescale],
            'width': (indexrange[2] - indexrange[0] + 1) * self.tilesize,
            'height': (indexrange[3] - indexrange[1] + 1) * self.tilesize,
        })

        # Create all tiles in this quadrant in batches
        batch = []
        for tilex in range(indexrange[0], indexrange[2] + 1):
            for tiley in range(indexrange[1], indexrange[3] + 1):
                # Calculate raster tile origin
                bounds = utils.tile_bounds(tilex, tiley, zoom)

                # Construct band data arrays
                pixeloffset = ((tilex - indexrange[0]) * self.tilesize,
                               (tiley - indexrange[1]) * self.tilesize)

                band_data = [{
                    'data':
                    band.data(offset=pixeloffset,
                              size=(self.tilesize, self.tilesize)),
                    'nodata_value':
                    band.nodata_value
                } for band in snapped_dataset.bands]

                # Ignore tile if its only nodata.
                if all([
                        numpy.all(dat['data'] == dat['nodata_value'])
                        for dat in band_data
                ]):
                    continue

                # Add tile data to histogram
                if zoom == self.max_zoom:
                    self.push_histogram(band_data)

                # Warp source raster into this tile (in memory)
                dest = GDALRaster({
                    'width':
                    self.tilesize,
                    'height':
                    self.tilesize,
                    'origin': [bounds[0], bounds[3]],
                    'scale': [tilescale, -tilescale],
                    'srid':
                    WEB_MERCATOR_SRID,
                    'datatype':
                    snapped_dataset.bands[0].datatype(),
                    'bands':
                    band_data,
                })

                # Store tile in batch array
                batch.append(
                    RasterTile(rast=dest,
                               rasterlayer_id=self.rasterlayer.id,
                               tilex=tilex,
                               tiley=tiley,
                               tilez=zoom))

                # Commit batch to database and reset it
                if len(batch) == self.batch_step_size:
                    RasterTile.objects.bulk_create(batch)
                    batch = []

        # Commit remaining objects
        if len(batch):
            RasterTile.objects.bulk_create(batch)

    def push_histogram(self, data):
        """
        Add data to band level histogram.
        """
        # Loop through bands of this tile
        for i, dat in enumerate(data):
            # Create histogram for new data with the same bins
            new_hist = numpy.histogram(dat['data'], bins=self.hist_bins[i])
            # Add counts of this tile to band metadata histogram
            self.hist_values[i] += new_hist[0]

    def drop_all_tiles(self):
        """
        Delete all existing tiles for this parser's rasterlayer.
        """
        self.log('Clearing all existing tiles.')
        self.rasterlayer.rastertile_set.all().delete()
        self.log('Finished clearing existing tiles.')

    def send_success_signal(self):
        """
        Send parser end signal for other dependencies to be handling new tiles.
        """
        self.log('Successfully finished parsing raster',
                 status=self.rasterlayer.parsestatus.FINISHED)
        rasterlayers_parser_ended.send(sender=self.rasterlayer.__class__,
                                       instance=self.rasterlayer)

    def compute_max_zoom(self):
        """
        Set max zoom property based on rasterlayer metadata.
        """
        # Return manual override value if provided
        if self.rasterlayer.max_zoom is not None:
            return self.rasterlayer.max_zoom

        if self.dataset.srs.srid == WEB_MERCATOR_SRID:
            # For rasters in web mercator, use the scale directly
            scale = abs(self.dataset.scale.x)
        else:
            # Create a line from the center of the raster to a point that is
            # one pixel width away from the center.
            xcenter = self.dataset.extent[0] + (self.dataset.extent[2] -
                                                self.dataset.extent[0]) / 2
            ycenter = self.dataset.extent[1] + (self.dataset.extent[3] -
                                                self.dataset.extent[1]) / 2
            linestring = 'LINESTRING({} {}, {} {})'.format(
                xcenter, ycenter, xcenter + self.dataset.scale.x, ycenter)
            line = OGRGeometry(linestring, srs=self.dataset.srs)

            # Tansform the line into web mercator.
            line.transform(WEB_MERCATOR_SRID)

            # Use the lenght of the transformed line as scale.
            scale = line.geos.length

        return utils.closest_zoomlevel(scale)

    @property
    def max_zoom(self):
        # Return manual override value if provided
        if self.rasterlayer.max_zoom is not None:
            return self.rasterlayer.max_zoom

        # Get max zoom from metadata
        if not hasattr(self.rasterlayer, 'metadata'):
            raise RasterException('Could not determine max zoom level.')
        max_zoom = self.rasterlayer.metadata.max_zoom

        # Reduce max zoom by one if zoomdown flag was disabled
        if not self.rasterlayer.next_higher:
            max_zoom -= 1

        return max_zoom

    def nr_of_tiles(self, zoom):
        """
        Compute the number of tiles for the rasterlayer on a given zoom level.
        """
        bbox = self.dataset.extent
        indexrange = utils.tile_index_range(bbox, zoom)
        return (indexrange[2] - indexrange[0] + 1) * (indexrange[3] -
                                                      indexrange[1] + 1)
Ejemplo n.º 13
0
    def process(self):
        """
        This method contains the logic for processing tasks asynchronously
        from a background thread or from the scheduler. Here tasks that are
        ready to be processed execute some logic. This could be communication
        with a processing node or executing a pending action.
        """

        try:
            if self.auto_processing_node and not self.status in [
                    status_codes.FAILED, status_codes.CANCELED
            ]:
                # No processing node assigned and need to auto assign
                if self.processing_node is None:
                    # Assign first online node with lowest queue count
                    self.processing_node = ProcessingNode.find_best_available_node(
                    )
                    if self.processing_node:
                        self.processing_node.queue_count += 1  # Doesn't have to be accurate, it will get overriden later
                        self.processing_node.save()

                        logger.info(
                            "Automatically assigned processing node {} to {}".
                            format(self.processing_node, self))
                        self.save()

                # Processing node assigned, but is offline and no errors
                if self.processing_node and not self.processing_node.is_online(
                ):
                    # Detach processing node, will be processed at the next tick
                    logger.info(
                        "Processing node {} went offline, reassigning {}...".
                        format(self.processing_node, self))
                    self.uuid = ''
                    self.processing_node = None
                    self.save()

            if self.processing_node:
                # Need to process some images (UUID not yet set and task doesn't have pending actions)?
                if not self.uuid and self.pending_action is None and self.status is None:
                    logger.info("Processing... {}".format(self))

                    images = [
                        image.path() for image in self.imageupload_set.all()
                    ]

                    # This takes a while
                    uuid = self.processing_node.process_new_task(
                        images, self.name, self.options)

                    # Refresh task object before committing change
                    self.refresh_from_db()
                    self.uuid = uuid
                    self.save()

                    # TODO: log process has started processing

            if self.pending_action is not None:
                if self.pending_action == pending_actions.CANCEL:
                    # Do we need to cancel the task on the processing node?
                    logger.info("Canceling {}".format(self))
                    if self.processing_node and self.uuid:
                        # Attempt to cancel the task on the processing node
                        # We don't care if this fails (we tried)
                        try:
                            self.processing_node.cancel_task(self.uuid)
                            self.status = None
                        except ProcessingException:
                            logger.warning(
                                "Could not cancel {} on processing node. We'll proceed anyway..."
                                .format(self))
                            self.status = status_codes.CANCELED

                        self.pending_action = None
                        self.save()
                    else:
                        raise ProcessingError(
                            "Cannot cancel a task that has no processing node or UUID"
                        )

                elif self.pending_action == pending_actions.RESTART:
                    logger.info("Restarting {}".format(self))
                    if self.processing_node:

                        # Check if the UUID is still valid, as processing nodes purge
                        # results after a set amount of time, the UUID might have eliminated.
                        uuid_still_exists = False

                        if self.uuid:
                            try:
                                info = self.processing_node.get_task_info(
                                    self.uuid)
                                uuid_still_exists = info['uuid'] == self.uuid
                            except ProcessingException:
                                pass

                        if uuid_still_exists:
                            # Good to go
                            try:
                                self.processing_node.restart_task(self.uuid)
                            except ProcessingError as e:
                                # Something went wrong
                                logger.warning(
                                    "Could not restart {}, will start a new one"
                                    .format(self))
                                self.uuid = ''
                        else:
                            # Task has been purged (or processing node is offline)
                            # Process this as a new task
                            # Removing its UUID will cause the scheduler
                            # to process this the next tick
                            self.uuid = ''

                        self.console_output = ""
                        self.processing_time = -1
                        self.status = None
                        self.last_error = None
                        self.pending_action = None
                        self.save()
                    else:
                        raise ProcessingError(
                            "Cannot restart a task that has no processing node"
                        )

                elif self.pending_action == pending_actions.REMOVE:
                    logger.info("Removing {}".format(self))
                    if self.processing_node and self.uuid:
                        # Attempt to delete the resources on the processing node
                        # We don't care if this fails, as resources on processing nodes
                        # Are expected to be purged on their own after a set amount of time anyway
                        try:
                            self.processing_node.remove_task(self.uuid)
                        except ProcessingException:
                            pass

                    # What's more important is that we delete our task properly here
                    self.delete()

                    # Stop right here!
                    return

            if self.processing_node:
                # Need to update status (first time, queued or running?)
                if self.uuid and self.status in [
                        None, status_codes.QUEUED, status_codes.RUNNING
                ]:
                    # Update task info from processing node
                    info = self.processing_node.get_task_info(self.uuid)

                    self.processing_time = info["processingTime"]
                    self.status = info["status"]["code"]

                    current_lines_count = len(
                        self.console_output.split("\n")) - 1
                    self.console_output += self.processing_node.get_task_console_output(
                        self.uuid, current_lines_count)

                    if "errorMessage" in info["status"]:
                        self.last_error = info["status"]["errorMessage"]

                    # Has the task just been canceled, failed, or completed?
                    if self.status in [
                            status_codes.FAILED, status_codes.COMPLETED,
                            status_codes.CANCELED
                    ]:
                        logger.info("Processing status: {} for {}".format(
                            self.status, self))

                        if self.status == status_codes.COMPLETED:
                            assets_dir = self.assets_path("")
                            if not os.path.exists(assets_dir):
                                os.makedirs(assets_dir)

                            logger.info(
                                "Downloading all.zip for {}".format(self))

                            # Download all assets
                            zip_stream = self.processing_node.download_task_asset(
                                self.uuid, "all.zip")
                            zip_path = os.path.join(assets_dir, "all.zip")
                            with open(zip_path, 'wb') as fd:
                                for chunk in zip_stream.iter_content(4096):
                                    fd.write(chunk)

                            logger.info(
                                "Done downloading all.zip for {}".format(self))

                            # Extract from zip
                            with zipfile.ZipFile(zip_path, "r") as zip_h:
                                zip_h.extractall(assets_dir)

                            logger.info(
                                "Extracted all.zip for {}".format(self))

                            # Add to database orthophoto
                            orthophoto_path = os.path.realpath(
                                self.assets_path("odm_orthophoto",
                                                 "odm_orthophoto.tif"))
                            if os.path.exists(orthophoto_path):
                                orthophoto = GDALRaster(orthophoto_path,
                                                        write=True)

                                # We need to transform to 4326 before we can store it
                                # as an offdb raster field
                                orthophoto_4326_path = os.path.realpath(
                                    self.assets_path(
                                        "odm_orthophoto",
                                        "odm_orthophoto_4326.tif"))
                                self.orthophoto = orthophoto.transform(
                                    4326, 'GTiff', orthophoto_4326_path)

                                logger.info(
                                    "Imported orthophoto {} for {}".format(
                                        orthophoto_4326_path, self))

                            # Remove old odm_texturing.zip archive (if any)
                            textured_model_archive = self.assets_path(
                                self.get_textured_model_filename())
                            if os.path.exists(textured_model_archive):
                                os.remove(textured_model_archive)

                            self.save()
                        else:
                            # FAILED, CANCELED
                            self.save()
                    else:
                        # Still waiting...
                        self.save()

        except ProcessingError as e:
            self.set_failure(str(e))
        except (ConnectionRefusedError, ConnectionError) as e:
            logger.warning(
                "{} cannot communicate with processing node: {}".format(
                    self, str(e)))
        except ProcessingTimeout as e:
            logger.warning(
                "{} timed out with error: {}. We'll try reprocessing at the next tick."
                .format(self, str(e)))
Ejemplo n.º 14
0
    def handle(self, *args, **options):
        verbose = options['verbosity'] != 0

        try:
            cmd = 'raster2pgsql -G > /dev/null'
            kwargs_raster = {'shell': True}
            ret = self.call_command_system(cmd, **kwargs_raster)
            if ret != 0:
                raise Exception('raster2pgsql failed with exit code %d' % ret)
        except Exception as e:
            msg = 'Caught %s: %s' % (
                e.__class__.__name__,
                e,
            )
            raise CommandError(msg)
        if verbose:
            self.stdout.write('-- Checking input DEM ------------------\n')
        # Obtain DEM path
        dem_path = options['dem_path']

        # Open GDAL dataset
        if not os.path.exists(dem_path):
            raise CommandError('DEM file does not exists at: %s' % dem_path)
        try:
            rst = GDALRaster(dem_path, write=False)
        except GDALException:
            raise CommandError('DEM format is not recognized by GDAL.')

        # GDAL dataset check 1: ensure dataset has a known SRS
        if not rst.srs:
            raise CommandError('DEM coordinate system is unknown.')
        # Obtain dataset SRS
        if settings.SRID != rst.srs.srid:
            rst = rst.transform(settings.SRID)
        cur = connection.cursor()
        sql = 'SELECT * FROM raster_columns WHERE r_table_name = \'mnt\''
        cur.execute(sql)
        dem_exists = cur.rowcount != 0
        cur.close()

        # Obtain replace mode
        replace = options['replace']

        # What to do with existing DEM (if any)
        if dem_exists and replace:
            # Drop table
            cur = connection.cursor()
            sql = 'DROP TABLE mnt'
            cur.execute(sql)
            cur.close()
        elif dem_exists and not replace:
            raise CommandError('DEM file exists, use --replace to overwrite')

        if verbose:
            self.stdout.write(
                'Everything looks fine, we can start loading DEM\n')

        output = tempfile.NamedTemporaryFile()  # SQL code for raster creation
        cmd = 'raster2pgsql -c -C -I -M -t 100x100 %s mnt %s' % (
            rst.name, '' if verbose else '2>/dev/null')
        try:
            if verbose:
                self.stdout.write(
                    '\n-- Relaying to raster2pgsql ------------\n')
                self.stdout.write(cmd)
            kwargs_raster2 = {
                'shell': True,
                'stdout': output.file,
                'stderr': PIPE
            }
            ret = self.call_command_system(cmd, **kwargs_raster2)
            if ret != 0:
                raise Exception('raster2pgsql failed with exit code %d' % ret)
        except Exception as e:
            output.close()
            msg = 'Caught %s: %s' % (
                e.__class__.__name__,
                e,
            )
            raise CommandError(msg)

        if verbose:
            self.stdout.write('DEM successfully converted to SQL.\n')

        # Step 3: Dump SQL code into database
        if verbose:
            self.stdout.write('\n-- Loading DEM into database -----------\n')
        cur = connection.cursor()
        output.file.seek(0)
        for sql_line in output.file:
            cur.execute(sql_line)
        cur.close()
        output.close()
        if verbose:
            self.stdout.write('DEM successfully loaded.\n')
        return
Ejemplo n.º 15
0
class RasterLayerParser(object):
    """
    Class to parse raster layers.
    """
    def __init__(self, rasterlayer_id):
        self.rasterlayer = RasterLayer.objects.get(id=rasterlayer_id)

        # Set raster tilesize
        self.tilesize = int(getattr(settings, 'RASTER_TILESIZE', WEB_MERCATOR_TILESIZE))

    def log(self, msg, status=None, zoom=None):
        """
        Write a message to the parse log of the rasterlayer instance and update
        the parse status object.
        """
        parsestatus = self.rasterlayer.parsestatus
        parsestatus.refresh_from_db()

        if status is not None:
            parsestatus.status = status

        if zoom is not None and zoom not in parsestatus.tile_levels:
            parsestatus.tile_levels.append(zoom)
            parsestatus.tile_levels.sort()

        # Prepare datetime stamp for log
        now = '[{0}] '.format(datetime.datetime.now().strftime('%Y-%m-%d %T'))

        if parsestatus.log:
            now = '\n' + now

        parsestatus.log += now + msg
        parsestatus.save()

    def open_raster_file(self):
        """
        Get raster source file to extract tiles from.

        This makes a local copy of rasterfile, unzips the raster and reprojects
        it into web mercator if necessary. The reprojected raster is stored for
        reuse such that reprojection does only happen once.

        The local copy of the raster is needed if files are stored on remote
        storages.
        """
        reproj, created = RasterLayerReprojected.objects.get_or_create(rasterlayer=self.rasterlayer)
        # Check if the raster has already been reprojected
        has_reprojected = reproj.rasterfile.name not in (None, '')

        # Create workdir
        raster_workdir = getattr(settings, 'RASTER_WORKDIR', None)
        self.tmpdir = tempfile.mkdtemp(dir=raster_workdir)

        # Choose source for raster data, use the reprojected version if it exists.
        if self.rasterlayer.source_url and not has_reprojected:
            url_path = urlparse(self.rasterlayer.source_url).path
            filename = url_path.split('/')[-1]
            filepath = os.path.join(self.tmpdir, filename)
            urlretrieve(self.rasterlayer.source_url, filepath)
        else:
            if has_reprojected:
                rasterfile_source = reproj.rasterfile
            else:
                rasterfile_source = self.rasterlayer.rasterfile

            if not rasterfile_source.name:
                raise RasterException('No data source found. Provide a rasterfile or a source url.')

            # Copy raster file source to local folder
            filepath = os.path.join(self.tmpdir, os.path.basename(rasterfile_source.name))
            rasterfile = open(filepath, 'wb')
            for chunk in rasterfile_source.chunks():
                rasterfile.write(chunk)
            rasterfile.close()

        # If the raster file is compressed, decompress it, otherwise try to
        # open the source file directly.
        if os.path.splitext(filepath)[1].lower() == '.zip':
            # Open and extract zipfile
            zf = zipfile.ZipFile(filepath)
            zf.extractall(self.tmpdir)

            # Remove zipfile
            os.remove(filepath)

            # Get filelist from directory
            matches = []
            for root, dirnames, filenames in os.walk(self.tmpdir):
                for filename in fnmatch.filter(filenames, '*.*'):
                    matches.append(os.path.join(root, filename))

            # Open the first raster file found in the matched files.
            self.dataset = None
            for match in matches:
                try:
                    self.dataset = GDALRaster(match)
                    break
                except GDALException:
                    pass

            # Raise exception if no file could be opened by gdal.
            if not self.dataset:
                raise RasterException('Could not open rasterfile.')
        else:
            self.dataset = GDALRaster(filepath)

        # Override srid if provided
        if self.rasterlayer.srid:
            try:
                self.dataset = GDALRaster(self.dataset.name, write=True)
            except GDALException:
                raise RasterException(
                    'Could not override srid because the driver for this '
                    'type of raster does not support write mode.'
                )
            self.dataset.srs = self.rasterlayer.srid

    def reproject_rasterfile(self):
        """
        Reproject the rasterfile into web mercator.
        """
        # Return if reprojected rasterfile already exists.
        if hasattr(self.rasterlayer, 'reprojected') and self.rasterlayer.reprojected.rasterfile.name:
            return

        # Return if the raster already has the right projection
        # and nodata value is acceptable.
        if self.dataset.srs.srid == WEB_MERCATOR_SRID:
            # SRID was not manually specified.
            if self.rasterlayer.nodata in ('', None):
                return
            # All bands from dataset already have the same nodata value as the
            # one that was manually specified.
            if all([self.rasterlayer.nodata == band.nodata_value
                    for band in self.dataset.bands]):
                return
        else:
            # Log projection change if original raster is not in web mercator.
            self.log(
                'Transforming raster to SRID {0}'.format(WEB_MERCATOR_SRID),
                status=self.rasterlayer.parsestatus.REPROJECTING_RASTER,
            )

        # Reproject the dataset.
        self.dataset = self.dataset.transform(
            WEB_MERCATOR_SRID,
            driver=INTERMEDIATE_RASTER_FORMAT,
        )

        # Manually override nodata value if neccessary
        if self.rasterlayer.nodata not in ('', None):
            self.log(
                'Setting no data values to {0}.'.format(self.rasterlayer.nodata),
                status=self.rasterlayer.parsestatus.REPROJECTING_RASTER,
            )
            for band in self.dataset.bands:
                band.nodata_value = float(self.rasterlayer.nodata)

        # Compress reprojected raster file and store it
        if self.rasterlayer.store_reprojected:
            dest = tempfile.NamedTemporaryFile(dir=self.tmpdir, suffix='.zip')
            dest_zip = zipfile.ZipFile(dest.name, 'w', allowZip64=True)
            dest_zip.write(
                filename=self.dataset.name,
                arcname=os.path.basename(self.dataset.name),
                compress_type=zipfile.ZIP_DEFLATED,
            )
            dest_zip.close()

            # Store zip file in reprojected raster model
            self.rasterlayer.reprojected.rasterfile = File(open(dest_zip.filename, 'rb'))
            self.rasterlayer.reprojected.save()

        self.log('Finished transforming raster.')

    def create_initial_histogram_buckets(self):
        """
        Gets the empty histogram arrays for statistics collection.
        """
        self.hist_values = []
        self.hist_bins = []

        for i, band in enumerate(self.dataset.bands):
            bandmeta = RasterLayerBandMetadata.objects.filter(rasterlayer=self.rasterlayer, band=i).first()
            self.hist_values.append(numpy.array(bandmeta.hist_values))
            self.hist_bins.append(numpy.array(bandmeta.hist_bins))

    def extract_metadata(self):
        """
        Extract and store metadata for the raster and its bands.
        """
        self.log('Extracting metadata from raster.')

        # Try to compute max zoom
        try:
            max_zoom = self.compute_max_zoom()
        except GDALException:
            raise RasterException('Failed to compute max zoom. Check the SRID of the raster.')

        # Extract global raster metadata
        meta = self.rasterlayer.metadata
        meta.uperleftx = self.dataset.origin.x
        meta.uperlefty = self.dataset.origin.y
        meta.width = self.dataset.width
        meta.height = self.dataset.height
        meta.scalex = self.dataset.scale.x
        meta.scaley = self.dataset.scale.y
        meta.skewx = self.dataset.skew.x
        meta.skewy = self.dataset.skew.y
        meta.numbands = len(self.dataset.bands)
        meta.srs_wkt = self.dataset.srs.wkt
        meta.srid = self.dataset.srs.srid
        meta.max_zoom = max_zoom
        meta.save()

        # Extract band metadata
        for i, band in enumerate(self.dataset.bands):
            bandmeta = RasterLayerBandMetadata.objects.filter(rasterlayer=self.rasterlayer, band=i).first()
            if not bandmeta:
                bandmeta = RasterLayerBandMetadata(rasterlayer=self.rasterlayer, band=i)

            bandmeta.nodata_value = band.nodata_value
            bandmeta.min = band.min
            bandmeta.max = band.max
            # Depending on Django version, the band statistics include std and mean.
            if hasattr(band, 'std'):
                bandmeta.std = band.std
            if hasattr(band, 'mean'):
                bandmeta.mean = band.mean
            bandmeta.save()

        self.log('Finished extracting metadata from raster.')

    def create_tiles(self, zoom_levels):
        """
        Create tiles for input zoom levels, either a list or an integer.
        """
        if isinstance(zoom_levels, int):
            self.populate_tile_level(zoom_levels)
        else:
            for zoom in zoom_levels:
                self.populate_tile_level(zoom)

    def populate_tile_level(self, zoom):
        """
        Create tiles for this raster at the given zoomlevel.

        This routine first snaps the raster to the grid of the zoomlevel,
        then creates  the tiles from the snapped raster.
        """
        # Abort if zoom level is above resolution of the raster layer
        if zoom > self.max_zoom:
            return
        elif zoom == self.max_zoom:
            self.create_initial_histogram_buckets()

        # Compute the tile x-y-z index range for the rasterlayer for this zoomlevel
        bbox = self.dataset.extent
        quadrants = utils.quadrants(bbox, zoom)

        self.log('Creating {0} tiles in {1} quadrants at zoom {2}.'.format(self.nr_of_tiles(zoom), len(quadrants), zoom))

        # Process quadrants in parallell
        quadrant_task_group = group(self.process_quadrant.si(indexrange, zoom) for indexrange in quadrants)
        quadrant_task_group.apply()

        # Store histogram data
        if zoom == self.max_zoom:
            bandmetas = RasterLayerBandMetadata.objects.filter(rasterlayer=self.rasterlayer)
            for bandmeta in bandmetas:
                bandmeta.hist_values = self.hist_values[bandmeta.band].tolist()
                bandmeta.save()

        self.log('Finished parsing at zoom level {0}.'.format(zoom), zoom=zoom)

    _quadrant_count = 0

    @current_app.task(filter=task_method)
    def process_quadrant(self, indexrange, zoom):
        """
        Create raster tiles for a quadrant of tiles defined by a x-y-z index
        range and a zoom level.
        """
        self._quadrant_count += 1
        self.log(
            'Starting tile creation for quadrant {0} at zoom level {1}'.format(self._quadrant_count, zoom),
            status=self.rasterlayer.parsestatus.CREATING_TILES
        )

        # Compute scale of tiles for this zoomlevel
        tilescale = utils.tile_scale(zoom)

        # Compute quadrant bounds and create destination file
        bounds = utils.tile_bounds(indexrange[0], indexrange[1], zoom)
        dest_file = tempfile.NamedTemporaryFile(dir=self.tmpdir, suffix='.tif')

        # Snap dataset to the quadrant
        snapped_dataset = self.dataset.warp({
            'name': dest_file.name,
            'origin': [bounds[0], bounds[3]],
            'scale': [tilescale, -tilescale],
            'width': (indexrange[2] - indexrange[0] + 1) * self.tilesize,
            'height': (indexrange[3] - indexrange[1] + 1) * self.tilesize,
        })

        # Create all tiles in this quadrant in batches
        batch = []
        for tilex in range(indexrange[0], indexrange[2] + 1):
            for tiley in range(indexrange[1], indexrange[3] + 1):
                # Calculate raster tile origin
                bounds = utils.tile_bounds(tilex, tiley, zoom)

                # Construct band data arrays
                pixeloffset = (
                    (tilex - indexrange[0]) * self.tilesize,
                    (tiley - indexrange[1]) * self.tilesize
                )

                band_data = [
                    {
                        'data': band.data(offset=pixeloffset, size=(self.tilesize, self.tilesize)),
                        'nodata_value': band.nodata_value
                    } for band in snapped_dataset.bands
                ]

                # Ignore tile if its only nodata.
                if all([numpy.all(dat['data'] == dat['nodata_value']) for dat in band_data]):
                    continue

                # Add tile data to histogram
                if zoom == self.max_zoom:
                    self.push_histogram(band_data)

                # Warp source raster into this tile (in memory)
                dest = GDALRaster({
                    'width': self.tilesize,
                    'height': self.tilesize,
                    'origin': [bounds[0], bounds[3]],
                    'scale': [tilescale, -tilescale],
                    'srid': WEB_MERCATOR_SRID,
                    'datatype': snapped_dataset.bands[0].datatype(),
                    'bands': band_data,
                })

                # Store tile in batch array
                batch.append(
                    RasterTile(
                        rast=dest,
                        rasterlayer_id=self.rasterlayer.id,
                        tilex=tilex,
                        tiley=tiley,
                        tilez=zoom
                    )
                )

                # Commit batch to database and reset it
                if len(batch) == BATCH_STEP_SIZE:
                    RasterTile.objects.bulk_create(batch)
                    batch = []

        # Commit remaining objects
        if len(batch):
            RasterTile.objects.bulk_create(batch)

    def push_histogram(self, data):
        """
        Add data to band level histogram.
        """
        # Loop through bands of this tile
        for i, dat in enumerate(data):
            # Create histogram for new data with the same bins
            new_hist = numpy.histogram(dat['data'], bins=self.hist_bins[i])
            # Add counts of this tile to band metadata histogram
            self.hist_values[i] += new_hist[0]

    def drop_all_tiles(self):
        """
        Delete all existing tiles for this parser's rasterlayer.
        """
        self.log('Clearing all existing tiles.')
        self.rasterlayer.rastertile_set.all().delete()
        self.log('Finished clearing existing tiles.')

    def send_success_signal(self):
        """
        Send parser end signal for other dependencies to be handling new tiles.
        """
        self.log(
            'Successfully finished parsing raster',
            status=self.rasterlayer.parsestatus.FINISHED
        )
        rasterlayers_parser_ended.send(sender=self.rasterlayer.__class__, instance=self.rasterlayer)

    def compute_max_zoom(self):
        """
        Set max zoom property based on rasterlayer metadata.
        """
        # Return manual override value if provided
        if self.rasterlayer.max_zoom is not None:
            return self.rasterlayer.max_zoom

        if self.dataset.srs.srid == WEB_MERCATOR_SRID:
            # For rasters in web mercator, use the scale directly
            scale = abs(self.dataset.scale.x)
        else:
            # Create a line from the center of the raster to a point that is
            # one pixel width away from the center.
            xcenter = self.dataset.extent[0] + (self.dataset.extent[2] - self.dataset.extent[0]) / 2
            ycenter = self.dataset.extent[1] + (self.dataset.extent[3] - self.dataset.extent[1]) / 2
            linestring = 'LINESTRING({} {}, {} {})'.format(
                xcenter, ycenter, xcenter + self.dataset.scale.x, ycenter
            )
            line = OGRGeometry(linestring, srs=self.dataset.srs)

            # Tansform the line into web mercator.
            line.transform(WEB_MERCATOR_SRID)

            # Use the lenght of the transformed line as scale.
            scale = line.geos.length

        return utils.closest_zoomlevel(scale)

    @property
    def max_zoom(self):
        # Return manual override value if provided
        if self.rasterlayer.max_zoom is not None:
            return self.rasterlayer.max_zoom

        # Get max zoom from metadata
        if not hasattr(self.rasterlayer, 'metadata'):
            raise RasterException('Could not determine max zoom level.')
        max_zoom = self.rasterlayer.metadata.max_zoom

        # Reduce max zoom by one if zoomdown flag was disabled
        if not self.rasterlayer.next_higher:
            max_zoom -= 1

        return max_zoom

    def nr_of_tiles(self, zoom):
        """
        Compute the number of tiles for the rasterlayer on a given zoom level.
        """
        bbox = self.dataset.extent
        indexrange = utils.tile_index_range(bbox, zoom)
        return (indexrange[2] - indexrange[0] + 1) * (indexrange[3] - indexrange[1] + 1)
Ejemplo n.º 16
0
    def process(self):
        """
        This method contains the logic for processing tasks asynchronously
        from a background thread or from the scheduler. Here tasks that are
        ready to be processed execute some logic. This could be communication
        with a processing node or executing a pending action.
        """

        try:
            if self.auto_processing_node and not self.status in [status_codes.FAILED, status_codes.CANCELED]:
                # No processing node assigned and need to auto assign
                if self.processing_node is None:
                    # Assign first online node with lowest queue count
                    self.processing_node = ProcessingNode.find_best_available_node()
                    if self.processing_node:
                        self.processing_node.queue_count += 1 # Doesn't have to be accurate, it will get overriden later
                        self.processing_node.save()

                        logger.info("Automatically assigned processing node {} to {}".format(self.processing_node, self))
                        self.save()

                # Processing node assigned, but is offline and no errors
                if self.processing_node and not self.processing_node.is_online():
                    # Detach processing node, will be processed at the next tick
                    logger.info("Processing node {} went offline, reassigning {}...".format(self.processing_node, self))
                    self.uuid = ''
                    self.processing_node = None
                    self.save()

            if self.processing_node:
                # Need to process some images (UUID not yet set and task doesn't have pending actions)?
                if not self.uuid and self.pending_action is None and self.status is None:
                    logger.info("Processing... {}".format(self))

                    images = [image.path() for image in self.imageupload_set.all()]

                    # This takes a while
                    uuid = self.processing_node.process_new_task(images, self.name, self.options)

                    # Refresh task object before committing change
                    self.refresh_from_db()
                    self.uuid = uuid
                    self.save()

                    # TODO: log process has started processing

            if self.pending_action is not None:
                if self.pending_action == pending_actions.CANCEL:
                    # Do we need to cancel the task on the processing node?
                    logger.info("Canceling {}".format(self))
                    if self.processing_node and self.uuid:
                        # Attempt to cancel the task on the processing node
                        # We don't care if this fails (we tried)
                        try:
                            self.processing_node.cancel_task(self.uuid)
                            self.status = None
                        except ProcessingException:
                            logger.warning("Could not cancel {} on processing node. We'll proceed anyway...".format(self))
                            self.status = status_codes.CANCELED

                        self.pending_action = None
                        self.save()
                    else:
                        raise ProcessingError("Cannot cancel a task that has no processing node or UUID")

                elif self.pending_action == pending_actions.RESTART:
                    logger.info("Restarting {}".format(self))
                    if self.processing_node:

                        # Check if the UUID is still valid, as processing nodes purge
                        # results after a set amount of time, the UUID might have eliminated.
                        uuid_still_exists = False

                        if self.uuid:
                            try:
                                info = self.processing_node.get_task_info(self.uuid)
                                uuid_still_exists = info['uuid'] == self.uuid
                            except ProcessingException:
                                pass

                        if uuid_still_exists:
                            # Good to go
                            try:
                                self.processing_node.restart_task(self.uuid)
                            except ProcessingError as e:
                                # Something went wrong
                                logger.warning("Could not restart {}, will start a new one".format(self))
                                self.uuid = ''
                        else:
                            # Task has been purged (or processing node is offline)
                            # Process this as a new task
                            # Removing its UUID will cause the scheduler
                            # to process this the next tick
                            self.uuid = ''

                        self.console_output = ""
                        self.processing_time = -1
                        self.status = None
                        self.last_error = None
                        self.pending_action = None
                        self.save()
                    else:
                        raise ProcessingError("Cannot restart a task that has no processing node")

                elif self.pending_action == pending_actions.REMOVE:
                    logger.info("Removing {}".format(self))
                    if self.processing_node and self.uuid:
                        # Attempt to delete the resources on the processing node
                        # We don't care if this fails, as resources on processing nodes
                        # Are expected to be purged on their own after a set amount of time anyway
                        try:
                            self.processing_node.remove_task(self.uuid)
                        except ProcessingException:
                            pass

                    # What's more important is that we delete our task properly here
                    self.delete()

                    # Stop right here!
                    return

            if self.processing_node:
                # Need to update status (first time, queued or running?)
                if self.uuid and self.status in [None, status_codes.QUEUED, status_codes.RUNNING]:
                    # Update task info from processing node
                    info = self.processing_node.get_task_info(self.uuid)

                    self.processing_time = info["processingTime"]
                    self.status = info["status"]["code"]

                    current_lines_count = len(self.console_output.split("\n")) - 1
                    self.console_output += self.processing_node.get_task_console_output(self.uuid, current_lines_count)

                    if "errorMessage" in info["status"]:
                        self.last_error = info["status"]["errorMessage"]

                    # Has the task just been canceled, failed, or completed?
                    if self.status in [status_codes.FAILED, status_codes.COMPLETED, status_codes.CANCELED]:
                        logger.info("Processing status: {} for {}".format(self.status, self))

                        if self.status == status_codes.COMPLETED:
                            assets_dir = self.assets_path("")
                            if not os.path.exists(assets_dir):
                                os.makedirs(assets_dir)

                            logger.info("Downloading all.zip for {}".format(self))

                            # Download all assets
                            zip_stream = self.processing_node.download_task_asset(self.uuid, "all.zip")
                            zip_path = os.path.join(assets_dir, "all.zip")
                            with open(zip_path, 'wb') as fd:
                                for chunk in zip_stream.iter_content(4096):
                                    fd.write(chunk)

                            logger.info("Done downloading all.zip for {}".format(self))

                            # Extract from zip
                            with zipfile.ZipFile(zip_path, "r") as zip_h:
                                zip_h.extractall(assets_dir)

                            logger.info("Extracted all.zip for {}".format(self))

                            # Add to database orthophoto
                            orthophoto_path = os.path.realpath(self.assets_path("odm_orthophoto", "odm_orthophoto.tif"))
                            if os.path.exists(orthophoto_path):
                                orthophoto = GDALRaster(orthophoto_path, write=True)

                                # We need to transform to 4326 before we can store it
                                # as an offdb raster field
                                orthophoto_4326_path = os.path.realpath(self.assets_path("odm_orthophoto", "odm_orthophoto_4326.tif"))
                                self.orthophoto = orthophoto.transform(4326, 'GTiff', orthophoto_4326_path)

                                logger.info("Imported orthophoto {} for {}".format(orthophoto_4326_path, self))

                            # Remove old odm_texturing.zip archive (if any)
                            textured_model_archive = self.assets_path(self.get_textured_model_filename())
                            if os.path.exists(textured_model_archive):
                                os.remove(textured_model_archive)

                            self.save()
                        else:
                            # FAILED, CANCELED
                            self.save()
                    else:
                        # Still waiting...
                        self.save()

        except ProcessingError as e:
            self.set_failure(str(e))
        except (ConnectionRefusedError, ConnectionError) as e:
            logger.warning("{} cannot communicate with processing node: {}".format(self, str(e)))
        except ProcessingTimeout as e:
            logger.warning("{} timed out with error: {}. We'll try reprocessing at the next tick.".format(self, str(e)))
Ejemplo n.º 17
0
class RasterLayerParser(object):
    """
    Class to parse raster layers.
    """

    def __init__(self, rasterlayer):
        self.rasterlayer = rasterlayer
        self.rastername = os.path.basename(rasterlayer.rasterfile.name)

        # Set raster tilesize
        self.tilesize = int(getattr(settings, "RASTER_TILESIZE", WEB_MERCATOR_TILESIZE))
        self.zoomdown = getattr(settings, "RASTER_ZOOM_NEXT_HIGHER", True)

    def log(self, msg, reset=False, status=None, zoom=None):
        """
        Write a message to the parse log of the rasterlayer instance and update
        the parse status object.
        """
        if status is not None:
            self.rasterlayer.parsestatus.status = status

        if zoom is not None:
            self.rasterlayer.parsestatus.tile_level = zoom

        # Prepare datetime stamp for log
        now = "[{0}] ".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

        # Write log, reset if requested
        if reset:
            self.rasterlayer.parsestatus.log = now + msg
        else:
            self.rasterlayer.parsestatus.log += "\n" + now + msg

        self.rasterlayer.save()
        self.rasterlayer.parsestatus.save()

    def get_raster_file(self):
        """
        Make local copy of rasterfile, which is needed if files are stored on
        remote storage, and unzip it if necessary.
        """
        self.log("Getting raster file from storage")

        raster_workdir = getattr(settings, "RASTER_WORKDIR", None)
        self.tmpdir = tempfile.mkdtemp(dir=raster_workdir)

        # Access rasterfile and store in a temp folder
        rasterfile = open(os.path.join(self.tmpdir, self.rastername), "wb")
        for chunk in self.rasterlayer.rasterfile.chunks():
            rasterfile.write(chunk)
        rasterfile.close()

        # If the raster file is compressed, decompress it
        fileName, fileExtension = os.path.splitext(self.rastername)

        if fileExtension == ".zip":

            # Open and extract zipfile
            zf = zipfile.ZipFile(os.path.join(self.tmpdir, self.rastername))
            zf.extractall(self.tmpdir)

            # Remove zipfile
            os.remove(os.path.join(self.tmpdir, self.rastername))

            # Get filelist from directory
            raster_list = glob.glob(os.path.join(self.tmpdir, "*.*"))

            # Check if only one file is found in zipfile
            if len(raster_list) > 1:
                self.log(
                    "WARNING: Found more than one file in zipfile "
                    "using only first file found. This might lead "
                    "to problems if its not a raster file."
                )

            # Return first one as raster file
            self.rastername = os.path.basename(raster_list[0])

    def open_raster_file(self):
        """
        Open the raster file as GDALRaster and set nodata-values.
        """
        self.log("Opening raster file as GDALRaster.")

        # Open raster file
        self.dataset = GDALRaster(os.path.join(self.tmpdir, self.rastername), write=True)

        # Make sure nodata value is set from input
        self.hist_values = []
        self.hist_bins = []
        for i, band in enumerate(self.dataset.bands):
            if self.rasterlayer.nodata is not None:
                band.nodata_value = float(self.rasterlayer.nodata)

            # Create band metatdata object
            bandmeta = RasterLayerBandMetadata.objects.create(
                rasterlayer=self.rasterlayer, band=i, nodata_value=band.nodata_value, min=band.min, max=band.max
            )

            # Prepare numpy hist values and bins
            self.hist_values.append(numpy.array(bandmeta.hist_values))
            self.hist_bins.append(numpy.array(bandmeta.hist_bins))

        # Store original metadata for this raster
        meta = self.rasterlayer.metadata

        meta.uperleftx = self.dataset.origin.x
        meta.uperlefty = self.dataset.origin.y
        meta.width = self.dataset.width
        meta.height = self.dataset.height
        meta.scalex = self.dataset.scale.x
        meta.scaley = self.dataset.scale.y
        meta.skewx = self.dataset.skew.x
        meta.skewy = self.dataset.skew.y
        meta.numbands = len(self.dataset.bands)
        meta.srs_wkt = self.dataset.srs.wkt
        meta.srid = self.dataset.srs.srid

        meta.save()

    def close_raster_file(self):
        """
        On Windows close and release the GDALRaster resources
        """
        try:
            if self.dataset:
                del self.dataset
                self.dataset = None
        except AttributeError:
            pass

    def create_tiles(self, zoom):
        """
        Create tiles for this raster at the given zoomlevel.

        This routine first snaps the raster to the grid of the zoomlevel,
        then creates  the tiles from the snapped raster.
        """
        # Compute the tile x-y-z index range for the rasterlayer for this zoomlevel
        bbox = self.rasterlayer.extent()
        indexrange = tiler.tile_index_range(bbox, zoom)

        # Compute scale of tiles for this zoomlevel
        tilescale = tiler.tile_scale(zoom)

        # Count the number of tiles that are required to cover the raster at this zoomlevel
        nr_of_tiles = (indexrange[2] - indexrange[0] + 1) * (indexrange[3] - indexrange[1] + 1)

        # Create destination raster file
        self.log("Snapping dataset to zoom level {0}".format(zoom))

        bounds = tiler.tile_bounds(indexrange[0], indexrange[1], zoom)
        sizex = (indexrange[2] - indexrange[0] + 1) * self.tilesize
        sizey = (indexrange[3] - indexrange[1] + 1) * self.tilesize
        dest_file = os.path.join(self.tmpdir, "djangowarpedraster" + str(zoom) + ".tif")

        snapped_dataset = self.dataset.warp(
            {
                "name": dest_file,
                "origin": [bounds[0], bounds[3]],
                "scale": [tilescale, -tilescale],
                "width": sizex,
                "height": sizey,
            }
        )

        self.log("Creating {0} tiles for zoom {1}.".format(nr_of_tiles, zoom))

        counter = 0
        for tilex in range(indexrange[0], indexrange[2] + 1):
            for tiley in range(indexrange[1], indexrange[3] + 1):
                # Log progress
                counter += 1
                if counter % 250 == 0:
                    self.log("{0} tiles created at zoom {1}".format(counter, zoom))

                # Calculate raster tile origin
                bounds = tiler.tile_bounds(tilex, tiley, zoom)

                # Construct band data arrays
                pixeloffset = ((tilex - indexrange[0]) * self.tilesize, (tiley - indexrange[1]) * self.tilesize)

                band_data = [
                    {
                        "data": band.data(offset=pixeloffset, size=(self.tilesize, self.tilesize)),
                        "nodata_value": band.nodata_value,
                    }
                    for band in snapped_dataset.bands
                ]

                # Add tile data to histogram
                if zoom == self.max_zoom:
                    self.push_histogram(band_data)

                # Warp source raster into this tile (in memory)
                dest = GDALRaster(
                    {
                        "width": self.tilesize,
                        "height": self.tilesize,
                        "origin": [bounds[0], bounds[3]],
                        "scale": [tilescale, -tilescale],
                        "srid": WEB_MERCATOR_SRID,
                        "datatype": snapped_dataset.bands[0].datatype(),
                        "bands": band_data,
                    }
                )

                # Store tile
                RasterTile.objects.create(rast=dest, rasterlayer=self.rasterlayer, tilex=tilex, tiley=tiley, tilez=zoom)

        # Store histogram data
        if zoom == self.max_zoom:
            bandmetas = RasterLayerBandMetadata.objects.filter(rasterlayer=self.rasterlayer)
            for bandmeta in bandmetas:
                bandmeta.hist_values = self.hist_values[bandmeta.band].tolist()
                bandmeta.save()

        # Remove snapped dataset
        self.log("Removing snapped dataset.", zoom=zoom)
        snapped_dataset = None
        os.remove(dest_file)

    def push_histogram(self, data):
        """
        Add data to band level histogram histogram.
        """
        # Loop through bands of this tile
        for i, dat in enumerate(data):
            # Create histogram for new data with the same bins
            new_hist = numpy.histogram(dat["data"], bins=self.hist_bins[i])
            # Add counts of this tile to band metadata histogram
            self.hist_values[i] += new_hist[0]

    def drop_empty_rasters(self):
        """
        Remove rasters that are only no-data from the current rasterlayer.
        """
        self.log("Dropping empty raster tiles.", status=self.rasterlayer.parsestatus.DROPPING_EMPTY_TILES)

        # Setup SQL command
        sql = ("DELETE FROM raster_rastertile " "WHERE ST_Count(rast)=0 " "AND rasterlayer_id={0}").format(
            self.rasterlayer.id
        )

        # Run SQL to drop empty tiles
        cursor = connection.cursor()
        cursor.execute(sql)

    def parse_raster_layer(self):
        """
        This function pushes the raster data from the Raster Layer into the
        RasterTile table.
        """
        try:
            # Clean previous parse log
            self.log("Started parsing raster file", reset=True, status=self.rasterlayer.parsestatus.DOWNLOADING_FILE)

            # Download, unzip and open raster file
            self.get_raster_file()
            self.open_raster_file()

            # Remove existing tiles for this layer before loading new ones
            self.rasterlayer.rastertile_set.all().delete()

            # Transform raster to global srid
            if self.dataset.srs.srid == WEB_MERCATOR_SRID:
                self.log("Dataset already in SRID {0}, skipping transform".format(WEB_MERCATOR_SRID))
            else:
                self.log(
                    "Transforming raster to SRID {0}".format(WEB_MERCATOR_SRID),
                    status=self.rasterlayer.parsestatus.REPROJECTING_RASTER,
                )
                self.dataset = self.dataset.transform(WEB_MERCATOR_SRID)

            # Compute max zoom at the web mercator projection
            self.max_zoom = tiler.closest_zoomlevel(abs(self.dataset.scale.x))

            # Store max zoom level in metadata
            self.rasterlayer.metadata.max_zoom = self.max_zoom
            self.rasterlayer.metadata.save()

            # Reduce max zoom by one if zoomdown flag was disabled
            if not self.zoomdown:
                self.max_zoom -= 1

            self.log("Started creating tiles", status=self.rasterlayer.parsestatus.CREATING_TILES)

            # Loop through all lower zoom levels and create tiles to
            # setup TMS aligned tiles in world mercator
            for iz in range(self.max_zoom + 1):
                self.create_tiles(iz)

            self.drop_empty_rasters()

            # Send signal for end of parsing
            rasterlayers_parser_ended.send(sender=self.rasterlayer.__class__, instance=self.rasterlayer)

            # Log success of parsing
            self.log("Successfully finished parsing raster", status=self.rasterlayer.parsestatus.FINISHED)
        except:
            self.log(traceback.format_exc(), status=self.rasterlayer.parsestatus.FAILED)
            raise
        finally:
            self.close_raster_file()
            shutil.rmtree(self.tmpdir)
Ejemplo n.º 18
0
    def test_raster_transform(self):
        if GDAL_VERSION < (1, 8, 1):
            self.skipTest("GDAL >= 1.8.1 is required for this test")
        # Prepare tempfile and nodata value
        rstfile = tempfile.NamedTemporaryFile(suffix=".tif")
        ndv = 99

        # Create in file based raster
        source = GDALRaster(
            {
                "datatype": 1,
                "driver": "tif",
                "name": rstfile.name,
                "width": 5,
                "height": 5,
                "nr_of_bands": 1,
                "srid": 4326,
                "origin": (-5, 5),
                "scale": (2, -2),
                "skew": (0, 0),
                "bands": [{"data": range(25), "nodata_value": ndv}],
            }
        )

        # Transform raster into srid 4326.
        target = source.transform(3086)

        # Reload data from disk
        target = GDALRaster(target.name)

        self.assertEqual(target.srs.srid, 3086)
        self.assertEqual(target.width, 7)
        self.assertEqual(target.height, 7)
        self.assertEqual(target.bands[0].datatype(), source.bands[0].datatype())
        self.assertEqual(target.origin, [9124842.791079799, 1589911.6476407414])
        self.assertEqual(target.scale, [223824.82664250192, -223824.82664250192])
        self.assertEqual(target.skew, [0, 0])

        result = target.bands[0].data()
        if numpy:
            result = result.flatten().tolist()

        # The reprojection of a raster that spans over a large area
        # skews the data matrix and might introduce nodata values.
        self.assertEqual(
            result,
            [
                ndv,
                ndv,
                ndv,
                ndv,
                4,
                ndv,
                ndv,
                ndv,
                ndv,
                2,
                3,
                9,
                ndv,
                ndv,
                ndv,
                1,
                2,
                8,
                13,
                19,
                ndv,
                0,
                6,
                6,
                12,
                18,
                18,
                24,
                ndv,
                10,
                11,
                16,
                22,
                23,
                ndv,
                ndv,
                ndv,
                15,
                21,
                22,
                ndv,
                ndv,
                ndv,
                ndv,
                20,
                ndv,
                ndv,
                ndv,
                ndv,
            ],
        )