def _open(self) -> Union[SelfCleaningDatasetReader, WarpedVRT]: with self._env.open: with time(f"Reopen {self._url!r} in {_curthread()}: {{t}}"): result = ds = SelfCleaningDatasetReader( rio.parse_path(self._url), sharing=False, driver=self._driver, **self._open_options, ) log_event("open_dataset", dict(url=self._url)) if self._vrt_params: with self._env.open_vrt: result = vrt = WarpedVRT(ds, sharing=False, **self._vrt_params) log_event("open_vrt", dict(url=self._url)) else: vrt = None with self._lock: self._threadlocal.ds = ds self._threadlocal.vrt = vrt weakref.ref( ds, functools.partial(log_event, "close_dataset", dict(url=self._url))) weakref.ref( vrt, functools.partial(log_event, "close_vrt", dict(url=self._url))) # NOTE: functools.partial to hopefully avoid taking a closure over `self` return result
def _open(self) -> ThreadsafeRioDataset: with self.env.open: with time( f"Initial read for {self.url!r} on {_curthread()}: {{t}}"): ds = SelfCleaningDatasetReader(rio.parse_path(self.url), sharing=False) if ds.count != 1: ds.close() raise RuntimeError( f"Assets must have exactly 1 band, but file {self.url!r} has {ds.count}. " "We can't currently handle multi-band rasters (each band has to be " "a separate STAC asset), so you'll need to exclude this asset from your analysis." ) log_event("open_dataset_initial", dict(url=self.url)) weakref.ref( ds, functools.partial(log_event, "close_dataset_initial", dict(url=self.url)), ) # Only make a VRT if the dataset doesn't match the spatial spec we want if self.spec.vrt_params != { "crs": ds.crs.to_epsg(), "transform": ds.transform, "height": ds.height, "width": ds.width, }: with self.env.open_vrt: vrt = WarpedVRT( ds, sharing=False, resampling=self.resampling, **self.spec.vrt_params, ) log_event("open_vrt_initial", dict(url=self.url)) weakref.ref( vrt, functools.partial(log_event, "close_vrt_initial", dict(url=self.url)), ) else: logger.info(f"Skipping VRT for {self.url!r}") vrt = None if ds.driver in MULTITHREADED_DRIVER_ALLOWLIST: return ThreadLocalRioDataset(self.env, ds, vrt=vrt) # ^ NOTE: this forces all threads to wait for the `open()` we just did before they can open their # thread-local datasets. In principle, this would double the wall-clock open time, but if the above `open()` # is cached, it can actually be faster than all threads duplicating the same request in parallel. # This is worth profiling eventually for cases when STAC tells us the media type is a GeoTIFF. else: # logger.warning( # f"Falling back on single-threaded reader for {self.url!r} (driver: {ds.driver!r}). " # "This will be slow!" # ) return SingleThreadedRioDataset(self.env, ds, vrt=vrt)
def action(url, *args): url = rasterio.parse_path(url) with local_env(): with rasterio.DatasetReader(url, sharing=False) as src: return fn(src, *args, **kwargs)
def read_from_url(url): url = rasterio.parse_path(url) with rasterio.DatasetReader(url, sharing=False) as src: return extractor(src, coord=coord)