Esempio n. 1
0
    def _open(self) -> Union[SelfCleaningDatasetReader, WarpedVRT]:
        with self._env.open:
            with time(f"Reopen {self._url!r} in {_curthread()}: {{t}}"):
                result = ds = SelfCleaningDatasetReader(
                    rio.parse_path(self._url),
                    sharing=False,
                    driver=self._driver,
                    **self._open_options,
                )
                log_event("open_dataset", dict(url=self._url))
            if self._vrt_params:
                with self._env.open_vrt:
                    result = vrt = WarpedVRT(ds,
                                             sharing=False,
                                             **self._vrt_params)
                    log_event("open_vrt", dict(url=self._url))
            else:
                vrt = None

        with self._lock:
            self._threadlocal.ds = ds
            self._threadlocal.vrt = vrt

        weakref.ref(
            ds,
            functools.partial(log_event, "close_dataset", dict(url=self._url)))
        weakref.ref(
            vrt, functools.partial(log_event, "close_vrt",
                                   dict(url=self._url)))
        # NOTE: functools.partial to hopefully avoid taking a closure over `self`
        return result
Esempio n. 2
0
    def _open(self) -> ThreadsafeRioDataset:
        with self.env.open:
            with time(
                    f"Initial read for {self.url!r} on {_curthread()}: {{t}}"):
                ds = SelfCleaningDatasetReader(rio.parse_path(self.url),
                                               sharing=False)
            if ds.count != 1:
                ds.close()
                raise RuntimeError(
                    f"Assets must have exactly 1 band, but file {self.url!r} has {ds.count}. "
                    "We can't currently handle multi-band rasters (each band has to be "
                    "a separate STAC asset), so you'll need to exclude this asset from your analysis."
                )

            log_event("open_dataset_initial", dict(url=self.url))
            weakref.ref(
                ds,
                functools.partial(log_event, "close_dataset_initial",
                                  dict(url=self.url)),
            )

            # Only make a VRT if the dataset doesn't match the spatial spec we want
            if self.spec.vrt_params != {
                    "crs": ds.crs.to_epsg(),
                    "transform": ds.transform,
                    "height": ds.height,
                    "width": ds.width,
            }:
                with self.env.open_vrt:
                    vrt = WarpedVRT(
                        ds,
                        sharing=False,
                        resampling=self.resampling,
                        **self.spec.vrt_params,
                    )
                    log_event("open_vrt_initial", dict(url=self.url))
                    weakref.ref(
                        vrt,
                        functools.partial(log_event, "close_vrt_initial",
                                          dict(url=self.url)),
                    )
            else:
                logger.info(f"Skipping VRT for {self.url!r}")
                vrt = None

        if ds.driver in MULTITHREADED_DRIVER_ALLOWLIST:
            return ThreadLocalRioDataset(self.env, ds, vrt=vrt)
            # ^ NOTE: this forces all threads to wait for the `open()` we just did before they can open their
            # thread-local datasets. In principle, this would double the wall-clock open time, but if the above `open()`
            # is cached, it can actually be faster than all threads duplicating the same request in parallel.
            # This is worth profiling eventually for cases when STAC tells us the media type is a GeoTIFF.
        else:
            # logger.warning(
            #     f"Falling back on single-threaded reader for {self.url!r} (driver: {ds.driver!r}). "
            #     "This will be slow!"
            # )
            return SingleThreadedRioDataset(self.env, ds, vrt=vrt)
Esempio n. 3
0
 def action(url, *args):
     url = rasterio.parse_path(url)
     with local_env():
         with rasterio.DatasetReader(url, sharing=False) as src:
             return fn(src, *args, **kwargs)
Esempio n. 4
0
 def read_from_url(url):
     url = rasterio.parse_path(url)
     with rasterio.DatasetReader(url, sharing=False) as src:
         return extractor(src, coord=coord)