def get_band_paths(self, band_list: list, resolution: float = None) -> dict: """ Return the paths of required bands. .. WARNING:: This functions orthorectifies SAR bands if not existing ! .. code-block:: python >>> from eoreader.reader import Reader >>> from eoreader.bands.alias import * >>> path = r"S1A_IW_GRDH_1SDV_20191215T060906_20191215T060931_030355_0378F7_3696.zip" >>> prod = Reader().open(path) >>> prod.get_band_paths([VV, HH]) { <SarBandNames.VV: 'VV'>: '20191215T060906_S1_IW_GRD\\20191215T060906_S1_IW_GRD_VV.tif' # HH doesn't exist } Args: band_list (list): List of the wanted bands resolution (float): Band resolution Returns: dict: Dictionary containing the path of each queried band """ band_paths = {} for band in band_list: bname = self.band_names[band] if bname is None: raise InvalidProductError( f"Non existing band ({band.name}) for {self.name}") try: # Try to load orthorectified bands band_paths[band] = files.get_file_in_dir( self._get_band_folder(), f"{self.condensed_name}_{bname}.tif", exact_name=True, ) except FileNotFoundError: speckle_band = sbn.corresponding_speckle(band) if speckle_band in self.pol_channels: if sbn.is_despeckle(band): # Despeckle the noisy band band_paths[band] = self._despeckle_sar(speckle_band) else: all_band_paths = self._pre_process_sar(resolution) band_paths = { band: path for band, path in all_band_paths.items() if band in band_list } return band_paths
def get_band_paths(self, band_list: list, resolution: float = None) -> dict: """ Return the paths of required bands. .. WARNING:: If not existing, this function will orthorectify your bands ! .. code-block:: python >>> from eoreader.reader import Reader >>> from eoreader.bands.alias import * >>> path = "S3B_SL_1_RBT____20191115T233722_20191115T234022_20191117T031722_0179_032_144_3420_LN2_O_NT_003.SEN3" >>> prod = Reader().open(path) >>> prod.get_band_paths([GREEN, RED]) Executing processing graph ...11%...21%...31%...42%...52%...62%...73%...83%... done. { <OpticalBandNames.GREEN: 'GREEN'>: '20191115T233722_S3_SLSTR_RBT\\S1_reflectance.tif', <OpticalBandNames.RED: 'RED'>: '20191115T233722_S3_SLSTR_RBT\\S2_reflectance.tif', } Args: band_list (list): List of the wanted bands resolution (float): Useless here Returns: dict: Dictionary containing the path of each queried band """ band_paths = {} use_snap = False for band in band_list: # Get standard band names band_name = self._get_band_filename(band) try: # Try to open converted images band_paths[band] = files.get_file_in_dir( self._get_band_folder(), band_name + ".tif") except (FileNotFoundError, TypeError): use_snap = True # If not existing (file or output), convert them if use_snap: all_band_paths = self._preprocess_s3(resolution) band_paths = {band: all_band_paths[band] for band in band_list} return band_paths
def get_band_paths(self, band_list: list, resolution: float = None) -> dict: """ Return the paths of required bands. .. code-block:: python >>> from eoreader.reader import Reader >>> from eoreader.bands.alias import * >>> path = r"S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip" >>> prod = Reader().open(path) >>> prod.get_band_paths([GREEN, RED]) { <OpticalBandNames.GREEN: 'GREEN'>: 'zip+file://S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip!/S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE/GRANULE/L1C_T30TTK_A027018_20200824T111345/IMG_DATA/T30TTK_20200824T110631_B03.jp2', <OpticalBandNames.RED: 'RED'>: 'zip+file://S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip!/S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE/GRANULE/L1C_T30TTK_A027018_20200824T111345/IMG_DATA/T30TTK_20200824T110631_B04.jp2' } Args: band_list (list): List of the wanted bands resolution (float): Band resolution Returns: dict: Dictionary containing the path of each queried band """ band_folders = self._get_res_band_folder(band_list, resolution) band_paths = {} for band in band_list: try: if self.is_archived: band_paths[band] = files.get_archived_rio_path( self.path, f".*{band_folders[band]}.*_B{self.band_names[band]}.*.jp2", ) else: band_paths[band] = files.get_file_in_dir( band_folders[band], "_B" + self.band_names[band], extension="jp2", ) except (FileNotFoundError, IndexError) as ex: raise InvalidProductError( f"Non existing {band} ({self.band_names[band]}) band for {self.path}" ) from ex return band_paths
def get_band_paths(self, band_list: list, resolution: float = None) -> dict: """ Return the paths of required bands. .. code-block:: python >>> from eoreader.reader import Reader >>> from eoreader.bands.alias import * >>> path = r"SENTINEL2A_20190625-105728-756_L2A_T31UEQ_C_V2-2" >>> prod = Reader().open(path) >>> prod.get_band_paths([GREEN, RED]) { <OpticalBandNames.GREEN: 'GREEN'>: 'SENTINEL2A_20190625-105728-756_L2A_T31UEQ_C_V2-2\\SENTINEL2A_20190625-105728-756_L2A_T31UEQ_C_V2-2_FRE_B3.tif', <OpticalBandNames.RED: 'RED'>: 'SENTINEL2A_20190625-105728-756_L2A_T31UEQ_C_V2-2\\SENTINEL2A_20190625-105728-756_L2A_T31UEQ_C_V2-2_FRE_B4.tif' } Args: band_list (list): List of the wanted bands resolution (float): Band resolution Returns: dict: Dictionary containing the path of each queried band """ band_paths = {} for band in band_list: try: if self.is_archived: band_paths[band] = files.get_archived_rio_path( self.path, f".*FRE_B{self.band_names[band]}\.tif" ) else: band_paths[band] = files.get_file_in_dir( self.path, f"FRE_B{self.band_names[band]}.tif" ) except (FileNotFoundError, IndexError) as ex: raise InvalidProductError( f"Non existing {band} ({self.band_names[band]}) band for {self.path}" ) from ex return band_paths
def _get_raw_band_paths(self) -> dict: """ Return the existing band paths (as they come with th archived products). Returns: dict: Dictionary containing the path of every band existing in the raw products """ extended_fmt = _ExtendedFormatter() band_paths = {} for band, band_name in self.band_names.items(): band_regex = extended_fmt.format(self._raw_band_regex, band_name) if self.is_archived: if self.path.endswith(".zip"): # Open the zip file with zipfile.ZipFile(self.path, "r") as zip_ds: # Get the correct band path regex = re.compile(band_regex.replace("*", ".*")) try: band_paths[band] = list( filter(regex.match, [f.filename for f in zip_ds.filelist]))[0] except IndexError: continue else: raise InvalidProductError( f"Only zipped products can be processed without extraction: {self.path}" ) else: try: band_paths[band] = files.get_file_in_dir(self._band_folder, band_regex, exact_name=True, get_list=True) except FileNotFoundError: continue return band_paths
def _get_path(self, band_id: str) -> str: """ Get either the archived path of the normal path of a tif file Args: band_id (str): Band ID Returns: str: band path """ if self.is_archived: # Because of gap_mask files that have the same name structure and exists only for L7 if self.product_type == LandsatProductType.L1_ETM: regex = f".*RT{band_id}.*" else: regex = f".*{band_id}.*" path = files.get_archived_rio_path(self.path, regex) else: path = files.get_file_in_dir(self.path, band_id, extension="TIF") return path
def get_mask_path(self, mask_id: str, res_id: str) -> str: """ Get mask path from its id and file_id (`R1` for 10m resolution, `R2` for 20m resolution) Accepted mask IDs: - `DFP`: Defective pixels (do not always exist ! Will raise `InvalidProductError` if not) - `EDG`: Nodata pixels mask - `SAT`: Saturated pixels mask - `MG2`: Geophysical mask (classification) - `IAB`: Mask where water vapor and TOA pixels have been interpolated - `CLM`: Cloud mask Args: mask_id (str): Mask ID res_id (str): Resolution ID (`R1` or `R2`) Returns: str: Mask path """ assert res_id in ["R1", "R2"] mask_regex = f"*{mask_id}_{res_id}.tif" try: if self.is_archived: mask_path = files.get_archived_rio_path( self.path, mask_regex.replace("*", ".*") ) else: mask_path = files.get_file_in_dir( os.path.join(self.path, "MASKS"), mask_regex, exact_name=True ) except (FileNotFoundError, IndexError) as ex: raise InvalidProductError( f"Non existing mask {mask_regex} in {self.name}" ) from ex return mask_path
def _test_core(pattern: str, prod_dir: str, possible_bands: list, debug=False): """ Core function testing all data Args: pattern (str): Pattern of the satellite prod_dir (str): Product directory possible_bands(list): Possible bands debug (bool): Debug option """ with xr.set_options(warn_for_unclosed_files=debug): # Init logger logs.init_logger(LOGGER) # DATA paths pattern_paths = files.get_file_in_dir(prod_dir, pattern, exact_name=True, get_list=True) for path in pattern_paths: LOGGER.info(os.path.basename(path)) # Open product and set output prod: Product = READER.open(path, method=CheckMethod.MTD) prod_name = READER.open(path, method=CheckMethod.NAME) prod_both = READER.open(path, method=CheckMethod.BOTH) assert prod is not None assert prod == prod_name assert prod == prod_both # Discard the case where an invalid file/directory is in the CI folder if prod is not None: with tempfile.TemporaryDirectory() as tmp_dir: # tmp_dir = os.path.join(get_ci_data_dir(), "OUTPUT") prod.output = tmp_dir if (prod.platform == Platform.S3 or prod.sensor_type == SensorType.SAR): os.environ[CI_EOREADER_BAND_FOLDER] = os.path.join( get_ci_data_dir(), prod.condensed_name) else: if CI_EOREADER_BAND_FOLDER in os.environ: os.environ.pop(CI_EOREADER_BAND_FOLDER) # Extent LOGGER.info("Checking extent") extent = prod.extent() extent_path = os.path.join(get_ci_data_dir(), prod.condensed_name, "extent.geojson") if not os.path.isfile(extent_path): extent.to_file(extent_path, driver="GeoJSON") try: ci.assert_geom_equal(extent, gpd.read_file(extent_path)) except AssertionError: assert_geom_almost_equal( extent, gpd.read_file(extent_path)) # TODO: WHY ??? # Footprint LOGGER.info("Checking footprint") footprint = prod.footprint() footprint_path = os.path.join(get_ci_data_dir(), prod.condensed_name, "footprint.geojson") if not os.path.isfile(footprint_path): footprint.to_file(footprint_path, driver="GeoJSON") try: ci.assert_geom_equal(footprint, gpd.read_file(footprint_path)) except AssertionError: assert_geom_almost_equal( footprint, gpd.read_file( footprint_path)) # Has not happen for now # Remove DEM tifs if existing remove_dem(prod) # Get stack bands LOGGER.info("Checking load and stack") # DO NOT RECOMPUTE BANDS WITH SNAP --> WAY TOO SLOW stack_bands = [ band for band in possible_bands if prod.has_band(band) ] # Manage S3 resolution to speed up processes if prod.sensor_type == SensorType.SAR: res = 1000.0 os.environ[SAR_DEF_RES] = str(res) else: res = prod.resolution * 50 os.environ[S3_DEF_RES] = str(res) # Stack data ci_data = os.path.join(get_ci_data_dir(), prod.condensed_name, "stack.tif") if debug: curr_path = os.path.join(get_ci_data_dir(), prod.condensed_name, "stack.tif") else: curr_path = os.path.join( tmp_dir, f"{prod.condensed_name}_stack.tif") prod.stack(stack_bands, resolution=res, stack_path=curr_path) # Test ci.assert_raster_almost_equal(curr_path, ci_data, decimal=4) # CRS LOGGER.info("Checking CRS") assert prod.crs().is_projected
def open_mask(self, mask_str: str, band: Union[obn, str]) -> gpd.GeoDataFrame: """ Open S2 mask (GML files stored in QI_DATA) as `gpd.GeoDataFrame`. Masks than can be called that way are: - `TECQUA`: Technical quality mask - `SATURA`: Saturated Pixels - `NODATA`: Pixel nodata (inside the detectors) - `DETFOO`: Detectors footprint -> used to process nodata outside the detectors - `DEFECT`: Defective pixels - `CLOUDS`, **only with `00` as a band !** .. code-block:: python >>> from eoreader.reader import Reader >>> from eoreader.bands.alias import * >>> path = r"S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip" >>> prod.open_mask("NODATA", GREEN) Empty GeoDataFrame Columns: [geometry] Index: [] >>> prod.open_mask("SATURA", GREEN) Empty GeoDataFrame Columns: [geometry] Index: [] >>> prod.open_mask("DETFOO", GREEN) gml_id ... geometry 0 detector_footprint-B03-02-0 ... POLYGON Z ((199980.000 4500000.000 0.000, 1999... 1 detector_footprint-B03-03-1 ... POLYGON Z ((222570.000 4500000.000 0.000, 2225... 2 detector_footprint-B03-05-2 ... POLYGON Z ((273050.000 4500000.000 0.000, 2730... 3 detector_footprint-B03-07-3 ... POLYGON Z ((309770.000 4453710.000 0.000, 3097... 4 detector_footprint-B03-04-4 ... POLYGON Z ((248080.000 4500000.000 0.000, 2480... 5 detector_footprint-B03-06-5 ... POLYGON Z ((297980.000 4500000.000 0.000, 2979... [6 rows x 3 columns] Args: mask_str (str): Mask name, such as DEFECT, NODATA, SATURA... band (Union[obn, str]): Band number as an OpticalBandNames or str (for clouds: 00) Returns: gpd.GeoDataFrame: Mask as a vector """ # Check inputs assert mask_str in [ "DEFECT", "DETFOO", "NODATA", "SATURA", "TECQUA", "CLOUDS" ] if mask_str == "CLOUDS": band = "00" def _open_mask(fct, *args, **kwargs): # Read the GML file try: # Discard some weird error concerning a NULL pointer that outputs a ValueError (as we already except it) fiona_logger = logging.getLogger("fiona._env") fiona_logger.setLevel(logging.CRITICAL) # Read mask mask = fct(*args, **kwargs) # Set fiona logger back to what it was fiona_logger.setLevel(logging.INFO) except (ValueError, UnsupportedGeometryTypeError): mask = gpd.GeoDataFrame(geometry=[], crs=self.crs()) return mask # Get QI_DATA path if isinstance(band, obn): band_name = self.band_names[band] else: band_name = band if self.is_archived: # Open the zip file # WE DON'T KNOW WHY BUT DO NOT USE files.read_archived_vector HERE !!! with zipfile.ZipFile(self.path, "r") as zip_ds: filenames = [f.filename for f in zip_ds.filelist] regex = re.compile( f".*GRANULE.*QI_DATA.*MSK_{mask_str}_B{band_name}.gml") with zip_ds.open(list(filter(regex.match, filenames))[0]) as mask_path: mask = _open_mask(gpd.read_file, mask_path) # mask = _open_mask(files.read_archived_vector, # self.path, # f".*GRANULE.*QI_DATA.*MSK_{mask_str}_B{band_name}\.gml") else: qi_data_path = os.path.join(self.path, "GRANULE", "*", "QI_DATA") # Get mask path mask_path = files.get_file_in_dir( qi_data_path, f"MSK_{mask_str}_B{band_name}.gml", exact_name=True) mask = _open_mask(gpd.read_file, mask_path) return mask
def _load_clouds(self, bands: list, resolution: float = None, size: Union[list, tuple] = None) -> dict: """ Load cloud files as xarrays. Read S3 SLSTR clouds from the flags file:cloud netcdf file. https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-3-slstr/level-1/cloud-identification bit_id flag_masks (ushort) flag_meanings === === === 0 1US visible 1 2US 1.37_threshold 2 4US 1.6_small_histogram 3 8US 1.6_large_histogram 4 16US 2.25_small_histogram 5 32US 2.25_large_histogram 6 64US 11_spatial_coherence 7 128US gross_cloud 8 256US thin_cirrus 9 512US medium_high 10 1024US fog_low_stratus 11 2048US 11_12_view_difference 12 4096US 3.7_11_view_difference 13 8192US thermal_histogram 14 16384US spare 15 32768US spare Args: bands (list): List of the wanted bands resolution (int): Band resolution in meters size (Union[tuple, list]): Size of the array (width, height). Not used if resolution is provided. Returns: dict: Dictionary {band_name, band_xarray} """ band_dict = {} if bands: if self._instrument_name == S3Instrument.OLCI: raise InvalidTypeError( "Sentinel-3 OLCI sensor does not provide any cloud file.") all_ids = list(np.arange(0, 14)) cir_id = 8 cloud_ids = [id for id in all_ids if id != cir_id] try: cloud_path = files.get_file_in_dir(self._get_band_folder(), "cloud_RAD.tif") except FileNotFoundError: self._preprocess_s3(resolution) cloud_path = files.get_file_in_dir(self.output, "cloud_RAD.tif") if not cloud_path: raise FileNotFoundError( f"Unable to find the cloud mask for {self.path}") # Open cloud file clouds_array = rasters.read( cloud_path, resolution=resolution, size=size, resampling=Resampling.nearest, masked=False, ).astype(np.uint16) # Get nodata mask # nodata = np.where(np.isnan(clouds_array), 1, 0) nodata = np.where(clouds_array == 65535, 1, 0) for band in bands: if band == ALL_CLOUDS: band_dict[band] = self._create_mask( clouds_array, all_ids, nodata) elif band == CLOUDS: band_dict[band] = self._create_mask( clouds_array, cloud_ids, nodata) elif band == CIRRUS: band_dict[band] = self._create_mask( clouds_array, cir_id, nodata) elif band == RAW_CLOUDS: band_dict[band] = clouds_array else: raise InvalidTypeError( f"Non existing cloud band for Sentinel-3 SLSTR: {band}" ) return band_dict