Example #1
0
def test_read_raster_window_input_list(cleantopo_br):
    process_zoom = 5
    conf = dict(**cleantopo_br.dict)
    conf["output"].update(metatiling=1)
    with mapchete.open(conf) as mp:
        mp.batch_process(process_zoom)
        tiles = [(tile, mp.config.output.get_path(tile))
                 for tile in mp.config.output_pyramid.tiles_from_bounds(
                     mp.config.bounds, process_zoom)
                 if path_exists(mp.config.output.get_path(tile))]
        upper_tile = next(mp.get_process_tiles(process_zoom - 1))
        assert len(tiles) > 1
        resampled = resample_from_array(in_raster=create_mosaic([
            (tile, read_raster_window(path, tile)) for tile, path in tiles
        ]),
                                        out_tile=upper_tile)
    resampled2 = read_raster_window([p for _, p in tiles],
                                    upper_tile,
                                    src_nodata=0,
                                    dst_nodata=0)
    assert resampled.dtype == resampled2.dtype
    assert resampled.shape == resampled2.shape
    assert np.array_equal(resampled.mask, resampled2.mask)
    # TODO slight rounding errors occur
    assert np.allclose(resampled, resampled2, rtol=0.01)
Example #2
0
 def __init__(self, out_path=None, output=None, out_pyramid=None):
     # see if lxml is installed before checking all output tiles
     from lxml.builder import ElementMaker
     self.path = out_path
     self._tp = out_pyramid
     self._output = output
     self._bucket = self.path.split("/")[2] if self.path.startswith(
         "s3://") else None
     self.bucket_resource = get_boto3_bucket(
         self._bucket) if self._bucket else None
     logger.debug("initialize VRT writer for %s", self.path)
     if path_exists(self.path):
         if self._bucket:
             key = "/".join(self.path.split("/")[3:])
             for obj in self.bucket_resource.objects.filter(Prefix=key):
                 if obj.key == key:
                     self._existing = {
                         k: v
                         for k, v in self._xml_to_entries(
                             obj.get()['Body'].read().decode())
                     }
         else:
             with open(self.path) as src:
                 self._existing = {
                     k: v
                     for k, v in self._xml_to_entries(src.read())
                 }
     else:
         self._existing = {}
     logger.debug("%s existing entries", len(self._existing))
     self.new_entries = 0
     self._new = {}
Example #3
0
def read_raster_no_crs(input_file, indexes=None, gdal_opts=None):
    """
    Wrapper function around rasterio.open().read().

    Parameters
    ----------
    input_file : str
        Path to file
    indexes : int or list
        Band index or list of band indexes to be read.
    gdal_opts : dict
        GDAL options passed on to rasterio.Env()

    Returns
    -------
    MaskedArray

    Raises
    ------
    FileNotFoundError if file cannot be found.
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        try:
            with rasterio.Env(**get_gdal_options(
                    gdal_opts, is_remote=path_is_remote(input_file, s3=True))):
                with rasterio.open(input_file, "r") as src:
                    return src.read(indexes=indexes, masked=True)
        except RasterioIOError as e:
            try:
                if path_exists(input_file):
                    raise e
            except:
                raise e
            raise FileNotFoundError("%s not found" % input_file)
Example #4
0
def driver_from_file(input_file):
    """
    Guess driver from file by opening it.

    Returns
    -------
    driver : string
        driver name
    """
    file_ext = os.path.splitext(input_file)[1].split(".")[1]
    if file_ext == "mapchete":
        return "Mapchete"
    try:
        with rasterio.open(input_file):
            return "raster_file"
    except Exception as rio_exception:
        try:
            with fiona.open(input_file):
                return "vector_file"
        except Exception as fio_exception:
            if path_exists(input_file):
                logger.exception(f"fiona error: {fio_exception}")
                logger.exception(f"rasterio error: {rio_exception}")
                raise MapcheteDriverError(
                    "%s has an unknown file extension or could not be opened by neither "
                    "rasterio nor fiona." % input_file)
            else:
                raise FileNotFoundError("%s does not exist" % input_file)
Example #5
0
def test_output_single_gtiff_cog_s3(output_single_gtiff_cog, mp_s3_tmpdir):
    tile_id = (5, 3, 7)
    with mapchete.open(
            dict(output_single_gtiff_cog.dict,
                 output=dict(output_single_gtiff_cog.dict["output"],
                             path=os.path.join(mp_s3_tmpdir,
                                               "cog.tif")))) as mp:
        process_tile = mp.config.process_pyramid.tile(*tile_id)
        # basic functions
        assert mp.config.output.profile()
        assert mp.config.output.empty(process_tile).mask.all()
        assert mp.config.output.get_path(process_tile)
        # check if tile exists
        assert not mp.config.output.tiles_exist(process_tile)
        # write
        mp.batch_process(multi=2)
        # check if tile exists
        assert mp.config.output.tiles_exist(process_tile)
        # read again, this time with data
        data = mp.config.output.read(process_tile)
        assert isinstance(data, np.ndarray)
        assert not data[0].mask.all()
        # write empty array
        data = ma.masked_array(
            data=np.ones(process_tile.shape),
            mask=np.ones(process_tile.shape),
        )
        mp.config.output.write(process_tile, data)
    assert path_exists(mp.config.output.path)
    assert cog_validate(mp.config.output.path, strict=True)
Example #6
0
 def __init__(self, out_path=None):
     self.path = out_path
     self._bucket = self.path.split("/")[2] if self.path.startswith(
         "s3://") else None
     self.bucket_resource = get_boto3_bucket(
         self._bucket) if self._bucket else None
     logger.debug("initialize TXT writer")
     if path_exists(self.path):
         if self._bucket:
             key = "/".join(self.path.split("/")[3:])
             for obj in self.bucket_resource.objects.filter(Prefix=key):
                 if obj.key == key:
                     self._existing = {
                         l + '\n'
                         for l in obj.get()['Body'].read().decode().split(
                             '\n') if l
                     }
         else:
             with open(self.path) as src:
                 self._existing = {l for l in src}
     else:
         self._existing = {}
     self.new_entries = 0
     if self._bucket:
         self.sink = ""
     else:
         self.sink = open(self.path, "w")
     for l in self._existing:
         self._write_line(l)
Example #7
0
    def open(self, tile, **kwargs):
        """
        Return InputTile object.

        Parameters
        ----------
        tile : ``Tile``

        Returns
        -------
        input tile : ``InputTile``
            tile view of input data
        """
        return InputTile(
            tile,
            tiles_paths=[(_tile, _path) for _tile, _path in [(
                t,
                os.path.join(*(
                    [self.path, str(t.zoom),
                     str(t.row),
                     str(t.col)])) + "." +
                self._ext) for t in self.td_pyramid.tiles_from_bounds(
                    tile.bounds, tile.zoom)] if path_exists(_path)],
            file_type=self._file_type,
            profile=self._profile,
            **kwargs)
Example #8
0
 def prepare(self, process_area=None, **kwargs):
     bounds = snap_bounds(
         bounds=Bounds(*process_area.intersection(
             box(*self.output_params["delimiters"]
                 ["effective_bounds"])).bounds),
         pyramid=self.pyramid,
         zoom=self.zoom) if process_area else self.output_params[
             "delimiters"]["effective_bounds"]
     height = math.ceil((bounds.top - bounds.bottom) /
                        self.pyramid.pixel_x_size(self.zoom))
     width = math.ceil((bounds.right - bounds.left) /
                       self.pyramid.pixel_x_size(self.zoom))
     logger.debug("output raster bounds: %s", bounds)
     logger.debug("output raster shape: %s, %s", height, width)
     self._profile = dict(
         GTIFF_DEFAULT_PROFILE,
         driver="GTiff",
         transform=Affine(self.pyramid.pixel_x_size(self.zoom), 0,
                          bounds.left, 0,
                          -self.pyramid.pixel_y_size(self.zoom),
                          bounds.top),
         height=height,
         width=width,
         count=self.output_params["bands"],
         crs=self.pyramid.crs,
         **{
             k: self.output_params.get(k, GTIFF_DEFAULT_PROFILE[k])
             for k in GTIFF_DEFAULT_PROFILE.keys()
         },
         bigtiff=self.output_params.get("bigtiff", "NO"))
     logger.debug("single GTiff profile: %s", self._profile)
     self.in_memory = (self.in_memory if self.in_memory is False else
                       height * width < IN_MEMORY_THRESHOLD)
     # set up rasterio
     if path_exists(self.path):
         if self.output_params["mode"] != "overwrite":
             raise MapcheteConfigError(
                 "single GTiff file already exists, use overwrite mode to replace"
             )
         else:
             logger.debug("remove existing file: %s", self.path)
             os.remove(self.path)
     # create output directory if necessary
     makedirs(os.path.dirname(self.path))
     logger.debug("open output file: %s", self.path)
     self._ctx = ExitStack()
     # (1) use memfile if output is remote or COG
     if self.cog or path_is_remote(self.path):
         if self.in_memory:
             self._memfile = self._ctx.enter_context(MemoryFile())
             self.dst = self._ctx.enter_context(
                 self._memfile.open(**self._profile))
         else:
             # in case output raster is too big, use tempfile on disk
             self._tempfile = self._ctx.enter_context(NamedTemporaryFile())
             self.dst = self._ctx.enter_context(
                 rasterio.open(self._tempfile.name, "w+", **self._profile))
     else:
         self.dst = self._ctx.enter_context(
             rasterio.open(self.path, "w+", **self._profile))
Example #9
0
def _get_tiles_paths(basepath=None,
                     ext=None,
                     pyramid=None,
                     bounds=None,
                     zoom=None):
    return [(_tile, _path) for _tile, _path in [(t, "%s.%s" % (os.path.join(*(
        [basepath, str(t.zoom), str(t.row),
         str(t.col)])), ext)) for t in pyramid.tiles_from_bounds(bounds, zoom)]
            if path_exists(_path)]
Example #10
0
def _rasterio_read(
    input_file=None,
    indexes=None,
    dst_bounds=None,
    dst_shape=None,
    dst_crs=None,
    resampling=None,
    src_nodata=None,
    dst_nodata=None,
):
    def _read(src, indexes, dst_bounds, dst_shape, dst_crs, resampling,
              src_nodata, dst_nodata):
        height, width = dst_shape[-2:]
        if indexes is None:
            dst_shape = (len(src.indexes), height, width)
            indexes = list(src.indexes)
        src_nodata = src.nodata if src_nodata is None else src_nodata
        dst_nodata = src.nodata if dst_nodata is None else dst_nodata
        dst_left, dst_bottom, dst_right, dst_top = dst_bounds
        with WarpedVRT(src,
                       crs=dst_crs,
                       src_nodata=src_nodata,
                       nodata=dst_nodata,
                       width=width,
                       height=height,
                       transform=affine_from_bounds(dst_left, dst_bottom,
                                                    dst_right, dst_top, width,
                                                    height),
                       resampling=Resampling[resampling]) as vrt:
            return vrt.read(window=vrt.window(*dst_bounds),
                            out_shape=dst_shape,
                            indexes=indexes,
                            masked=True)

    if isinstance(input_file, str):
        logger.debug("got file path %s", input_file)
        try:
            with rasterio.open(input_file, "r") as src:
                return _read(src, indexes, dst_bounds, dst_shape, dst_crs,
                             resampling, src_nodata, dst_nodata)
        except RasterioIOError as e:
            try:
                if path_exists(input_file):
                    raise e
            except:
                raise e
            raise FileNotFoundError("%s not found" % input_file)
    else:  # pragma: no cover
        logger.debug("assuming file object %s", input_file)
        warnings.warn(
            "passing on a rasterio dataset object is not recommended, see "
            "https://github.com/mapbox/rasterio/issues/1309")
        return _read(input_file, indexes, dst_bounds, dst_shape, dst_crs,
                     resampling, src_nodata, dst_nodata)
Example #11
0
    def tiles_exist(self, process_tile=None, output_tile=None):
        """
        Check whether output tiles of a tile (either process or output) exists.

        Parameters
        ----------
        process_tile : ``BufferedTile``
            must be member of process ``TilePyramid``
        output_tile : ``BufferedTile``
            must be member of output ``TilePyramid``

        Returns
        -------
        exists : bool
        """
        if process_tile and output_tile:
            raise ValueError(
                "just one of 'process_tile' and 'output_tile' allowed")
        if process_tile:
            return any(
                path_exists(self.get_path(tile))
                for tile in self.pyramid.intersecting(process_tile))
        if output_tile:
            return path_exists(self.get_path(output_tile))
Example #12
0
 def prepare(self, process_area=None, **kwargs):
     bounds = snap_bounds(
         bounds=Bounds(*process_area.intersection(
             box(*self.output_params["delimiters"]
                 ["effective_bounds"])).bounds),
         pyramid=self.pyramid,
         zoom=self.zoom) if process_area else self.output_params[
             "delimiters"]["effective_bounds"]
     height = math.ceil((bounds.top - bounds.bottom) /
                        self.pyramid.pixel_x_size(self.zoom))
     width = math.ceil((bounds.right - bounds.left) /
                       self.pyramid.pixel_x_size(self.zoom))
     logger.debug("output raster bounds: %s", bounds)
     logger.debug("output raster shape: %s, %s", height, width)
     self._profile = dict(
         GTIFF_DEFAULT_PROFILE,
         driver="GTiff",
         transform=Affine(self.pyramid.pixel_x_size(self.zoom), 0,
                          bounds.left, 0,
                          -self.pyramid.pixel_y_size(self.zoom),
                          bounds.top),
         height=height,
         width=width,
         count=self.output_params["bands"],
         crs=self.pyramid.crs,
         **{
             k: self.output_params.get(k, GTIFF_DEFAULT_PROFILE[k])
             for k in GTIFF_DEFAULT_PROFILE.keys()
         })
     logger.debug("single GTiff profile: %s", self._profile)
     if height * width > 20000 * 20000:
         raise ValueError("output GeoTIFF too big")
     # set up rasterio
     if path_exists(self.path):
         if self.output_params["mode"] != "overwrite":
             raise MapcheteConfigError(
                 "single GTiff file already exists, use overwrite mode to replace"
             )
         else:
             logger.debug("remove existing file: %s", self.path)
             os.remove(self.path)
     logger.debug("open output file: %s", self.path)
     self.rio_file = rasterio.open(self.path, "w+", **self._profile)
Example #13
0
def test_remote_path_exists(http_raster):
    assert path_exists(http_raster)
    assert not path_exists("http://ungarj.github.io/invalid_file.tif")
Example #14
0
def test_s3_path_exists(s2_band_remote):
    assert path_exists(s2_band_remote)