Beispiel #1
0
    def get_band_paths(self,
                       band_list: list,
                       resolution: float = None) -> dict:
        """
        Return the paths of required bands.

        .. WARNING:: This functions orthorectifies SAR bands if not existing !

        .. code-block:: python

            >>> from eoreader.reader import Reader
            >>> from eoreader.bands.alias import *
            >>> path = r"S1A_IW_GRDH_1SDV_20191215T060906_20191215T060931_030355_0378F7_3696.zip"
            >>> prod = Reader().open(path)
            >>> prod.get_band_paths([VV, HH])
            {
                <SarBandNames.VV: 'VV'>: '20191215T060906_S1_IW_GRD\\20191215T060906_S1_IW_GRD_VV.tif'  # HH doesn't exist
            }

        Args:
            band_list (list): List of the wanted bands
            resolution (float): Band resolution

        Returns:
            dict: Dictionary containing the path of each queried band
        """
        band_paths = {}
        for band in band_list:
            bname = self.band_names[band]
            if bname is None:
                raise InvalidProductError(
                    f"Non existing band ({band.name}) for {self.name}")
            try:
                # Try to load orthorectified bands
                band_paths[band] = files.get_file_in_dir(
                    self._get_band_folder(),
                    f"{self.condensed_name}_{bname}.tif",
                    exact_name=True,
                )
            except FileNotFoundError:
                speckle_band = sbn.corresponding_speckle(band)
                if speckle_band in self.pol_channels:
                    if sbn.is_despeckle(band):
                        # Despeckle the noisy band
                        band_paths[band] = self._despeckle_sar(speckle_band)
                    else:
                        all_band_paths = self._pre_process_sar(resolution)
                        band_paths = {
                            band: path
                            for band, path in all_band_paths.items()
                            if band in band_list
                        }

        return band_paths
Beispiel #2
0
    def get_band_paths(self,
                       band_list: list,
                       resolution: float = None) -> dict:
        """
        Return the paths of required bands.

        .. WARNING:: If not existing, this function will orthorectify your bands !

        .. code-block:: python

            >>> from eoreader.reader import Reader
            >>> from eoreader.bands.alias import *
            >>> path = "S3B_SL_1_RBT____20191115T233722_20191115T234022_20191117T031722_0179_032_144_3420_LN2_O_NT_003.SEN3"
            >>> prod = Reader().open(path)
            >>> prod.get_band_paths([GREEN, RED])
            Executing processing graph
            ...11%...21%...31%...42%...52%...62%...73%...83%... done.
            {
                <OpticalBandNames.GREEN: 'GREEN'>: '20191115T233722_S3_SLSTR_RBT\\S1_reflectance.tif',
                <OpticalBandNames.RED: 'RED'>: '20191115T233722_S3_SLSTR_RBT\\S2_reflectance.tif',
            }

        Args:
            band_list (list): List of the wanted bands
            resolution (float): Useless here

        Returns:
            dict: Dictionary containing the path of each queried band
        """
        band_paths = {}
        use_snap = False
        for band in band_list:
            # Get standard band names
            band_name = self._get_band_filename(band)

            try:
                # Try to open converted images
                band_paths[band] = files.get_file_in_dir(
                    self._get_band_folder(), band_name + ".tif")
            except (FileNotFoundError, TypeError):
                use_snap = True

        # If not existing (file or output), convert them
        if use_snap:
            all_band_paths = self._preprocess_s3(resolution)
            band_paths = {band: all_band_paths[band] for band in band_list}

        return band_paths
Beispiel #3
0
    def get_band_paths(self,
                       band_list: list,
                       resolution: float = None) -> dict:
        """
        Return the paths of required bands.

        .. code-block:: python

            >>> from eoreader.reader import Reader
            >>> from eoreader.bands.alias import *
            >>> path = r"S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip"
            >>> prod = Reader().open(path)
            >>> prod.get_band_paths([GREEN, RED])
            {
                <OpticalBandNames.GREEN: 'GREEN'>: 'zip+file://S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip!/S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE/GRANULE/L1C_T30TTK_A027018_20200824T111345/IMG_DATA/T30TTK_20200824T110631_B03.jp2',
                <OpticalBandNames.RED: 'RED'>: 'zip+file://S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip!/S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE/GRANULE/L1C_T30TTK_A027018_20200824T111345/IMG_DATA/T30TTK_20200824T110631_B04.jp2'
            }

        Args:
            band_list (list): List of the wanted bands
            resolution (float): Band resolution

        Returns:
            dict: Dictionary containing the path of each queried band
        """
        band_folders = self._get_res_band_folder(band_list, resolution)
        band_paths = {}
        for band in band_list:
            try:
                if self.is_archived:
                    band_paths[band] = files.get_archived_rio_path(
                        self.path,
                        f".*{band_folders[band]}.*_B{self.band_names[band]}.*.jp2",
                    )
                else:
                    band_paths[band] = files.get_file_in_dir(
                        band_folders[band],
                        "_B" + self.band_names[band],
                        extension="jp2",
                    )
            except (FileNotFoundError, IndexError) as ex:
                raise InvalidProductError(
                    f"Non existing {band} ({self.band_names[band]}) band for {self.path}"
                ) from ex

        return band_paths
Beispiel #4
0
    def get_band_paths(self, band_list: list, resolution: float = None) -> dict:
        """
        Return the paths of required bands.

        .. code-block:: python

            >>> from eoreader.reader import Reader
            >>> from eoreader.bands.alias import *
            >>> path = r"SENTINEL2A_20190625-105728-756_L2A_T31UEQ_C_V2-2"
            >>> prod = Reader().open(path)
            >>> prod.get_band_paths([GREEN, RED])
            {
                <OpticalBandNames.GREEN: 'GREEN'>: 'SENTINEL2A_20190625-105728-756_L2A_T31UEQ_C_V2-2\\SENTINEL2A_20190625-105728-756_L2A_T31UEQ_C_V2-2_FRE_B3.tif',
                <OpticalBandNames.RED: 'RED'>: 'SENTINEL2A_20190625-105728-756_L2A_T31UEQ_C_V2-2\\SENTINEL2A_20190625-105728-756_L2A_T31UEQ_C_V2-2_FRE_B4.tif'
            }

        Args:
            band_list (list): List of the wanted bands
            resolution (float): Band resolution

        Returns:
            dict: Dictionary containing the path of each queried band
        """
        band_paths = {}
        for band in band_list:
            try:
                if self.is_archived:
                    band_paths[band] = files.get_archived_rio_path(
                        self.path, f".*FRE_B{self.band_names[band]}\.tif"
                    )
                else:
                    band_paths[band] = files.get_file_in_dir(
                        self.path, f"FRE_B{self.band_names[band]}.tif"
                    )
            except (FileNotFoundError, IndexError) as ex:
                raise InvalidProductError(
                    f"Non existing {band} ({self.band_names[band]}) band for {self.path}"
                ) from ex

        return band_paths
Beispiel #5
0
    def _get_raw_band_paths(self) -> dict:
        """
        Return the existing band paths (as they come with th archived products).

        Returns:
            dict: Dictionary containing the path of every band existing in the raw products
        """
        extended_fmt = _ExtendedFormatter()
        band_paths = {}
        for band, band_name in self.band_names.items():
            band_regex = extended_fmt.format(self._raw_band_regex, band_name)

            if self.is_archived:
                if self.path.endswith(".zip"):
                    # Open the zip file
                    with zipfile.ZipFile(self.path, "r") as zip_ds:
                        # Get the correct band path
                        regex = re.compile(band_regex.replace("*", ".*"))
                        try:
                            band_paths[band] = list(
                                filter(regex.match,
                                       [f.filename
                                        for f in zip_ds.filelist]))[0]
                        except IndexError:
                            continue
                else:
                    raise InvalidProductError(
                        f"Only zipped products can be processed without extraction: {self.path}"
                    )
            else:
                try:
                    band_paths[band] = files.get_file_in_dir(self._band_folder,
                                                             band_regex,
                                                             exact_name=True,
                                                             get_list=True)
                except FileNotFoundError:
                    continue

        return band_paths
Beispiel #6
0
    def _get_path(self, band_id: str) -> str:
        """
        Get either the archived path of the normal path of a tif file

        Args:
            band_id (str): Band ID

        Returns:
            str: band path

        """
        if self.is_archived:
            # Because of gap_mask files that have the same name structure and exists only for L7
            if self.product_type == LandsatProductType.L1_ETM:
                regex = f".*RT{band_id}.*"
            else:
                regex = f".*{band_id}.*"
            path = files.get_archived_rio_path(self.path, regex)
        else:
            path = files.get_file_in_dir(self.path, band_id, extension="TIF")

        return path
Beispiel #7
0
    def get_mask_path(self, mask_id: str, res_id: str) -> str:
        """
        Get mask path from its id and file_id (`R1` for 10m resolution, `R2` for 20m resolution)

        Accepted mask IDs:

        - `DFP`: Defective pixels (do not always exist ! Will raise `InvalidProductError` if not)
        - `EDG`: Nodata pixels mask
        - `SAT`: Saturated pixels mask
        - `MG2`: Geophysical mask (classification)
        - `IAB`: Mask where water vapor and TOA pixels have been interpolated
        - `CLM`: Cloud mask

        Args:
            mask_id (str): Mask ID
            res_id (str): Resolution ID (`R1` or `R2`)

        Returns:
            str: Mask path
        """
        assert res_id in ["R1", "R2"]

        mask_regex = f"*{mask_id}_{res_id}.tif"
        try:
            if self.is_archived:
                mask_path = files.get_archived_rio_path(
                    self.path, mask_regex.replace("*", ".*")
                )
            else:
                mask_path = files.get_file_in_dir(
                    os.path.join(self.path, "MASKS"), mask_regex, exact_name=True
                )
        except (FileNotFoundError, IndexError) as ex:
            raise InvalidProductError(
                f"Non existing mask {mask_regex} in {self.name}"
            ) from ex

        return mask_path
Beispiel #8
0
def _test_core(pattern: str, prod_dir: str, possible_bands: list, debug=False):
    """
    Core function testing all data
    Args:
        pattern (str): Pattern of the satellite
        prod_dir (str): Product directory
        possible_bands(list): Possible bands
        debug (bool): Debug option
    """
    with xr.set_options(warn_for_unclosed_files=debug):

        # Init logger
        logs.init_logger(LOGGER)

        # DATA paths
        pattern_paths = files.get_file_in_dir(prod_dir,
                                              pattern,
                                              exact_name=True,
                                              get_list=True)

        for path in pattern_paths:
            LOGGER.info(os.path.basename(path))

            # Open product and set output
            prod: Product = READER.open(path, method=CheckMethod.MTD)
            prod_name = READER.open(path, method=CheckMethod.NAME)
            prod_both = READER.open(path, method=CheckMethod.BOTH)
            assert prod is not None
            assert prod == prod_name
            assert prod == prod_both

            # Discard the case where an invalid file/directory is in the CI folder
            if prod is not None:
                with tempfile.TemporaryDirectory() as tmp_dir:
                    # tmp_dir = os.path.join(get_ci_data_dir(), "OUTPUT")
                    prod.output = tmp_dir
                    if (prod.platform == Platform.S3
                            or prod.sensor_type == SensorType.SAR):
                        os.environ[CI_EOREADER_BAND_FOLDER] = os.path.join(
                            get_ci_data_dir(), prod.condensed_name)
                    else:
                        if CI_EOREADER_BAND_FOLDER in os.environ:
                            os.environ.pop(CI_EOREADER_BAND_FOLDER)

                    # Extent
                    LOGGER.info("Checking extent")
                    extent = prod.extent()
                    extent_path = os.path.join(get_ci_data_dir(),
                                               prod.condensed_name,
                                               "extent.geojson")
                    if not os.path.isfile(extent_path):
                        extent.to_file(extent_path, driver="GeoJSON")

                    try:
                        ci.assert_geom_equal(extent,
                                             gpd.read_file(extent_path))
                    except AssertionError:
                        assert_geom_almost_equal(
                            extent,
                            gpd.read_file(extent_path))  # TODO: WHY ???

                    # Footprint
                    LOGGER.info("Checking footprint")
                    footprint = prod.footprint()
                    footprint_path = os.path.join(get_ci_data_dir(),
                                                  prod.condensed_name,
                                                  "footprint.geojson")
                    if not os.path.isfile(footprint_path):
                        footprint.to_file(footprint_path, driver="GeoJSON")

                    try:
                        ci.assert_geom_equal(footprint,
                                             gpd.read_file(footprint_path))
                    except AssertionError:
                        assert_geom_almost_equal(
                            footprint, gpd.read_file(
                                footprint_path))  # Has not happen for now

                    # Remove DEM tifs if existing
                    remove_dem(prod)

                    # Get stack bands
                    LOGGER.info("Checking load and stack")
                    # DO NOT RECOMPUTE BANDS WITH SNAP --> WAY TOO SLOW
                    stack_bands = [
                        band for band in possible_bands if prod.has_band(band)
                    ]

                    # Manage S3 resolution to speed up processes
                    if prod.sensor_type == SensorType.SAR:
                        res = 1000.0
                        os.environ[SAR_DEF_RES] = str(res)
                    else:
                        res = prod.resolution * 50
                        os.environ[S3_DEF_RES] = str(res)

                    # Stack data
                    ci_data = os.path.join(get_ci_data_dir(),
                                           prod.condensed_name, "stack.tif")
                    if debug:
                        curr_path = os.path.join(get_ci_data_dir(),
                                                 prod.condensed_name,
                                                 "stack.tif")
                    else:
                        curr_path = os.path.join(
                            tmp_dir, f"{prod.condensed_name}_stack.tif")
                    prod.stack(stack_bands,
                               resolution=res,
                               stack_path=curr_path)

                    # Test
                    ci.assert_raster_almost_equal(curr_path,
                                                  ci_data,
                                                  decimal=4)

                # CRS
                LOGGER.info("Checking CRS")
                assert prod.crs().is_projected
Beispiel #9
0
    def open_mask(self, mask_str: str, band: Union[obn,
                                                   str]) -> gpd.GeoDataFrame:
        """
        Open S2 mask (GML files stored in QI_DATA) as `gpd.GeoDataFrame`.

        Masks than can be called that way are:

        - `TECQUA`: Technical quality mask
        - `SATURA`: Saturated Pixels
        - `NODATA`: Pixel nodata (inside the detectors)
        - `DETFOO`: Detectors footprint -> used to process nodata outside the detectors
        - `DEFECT`: Defective pixels
        - `CLOUDS`, **only with `00` as a band !**

        .. code-block:: python

            >>> from eoreader.reader import Reader
            >>> from eoreader.bands.alias import *
            >>> path = r"S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip"
            >>> prod.open_mask("NODATA", GREEN)
            Empty GeoDataFrame
            Columns: [geometry]
            Index: []
            >>> prod.open_mask("SATURA", GREEN)
            Empty GeoDataFrame
            Columns: [geometry]
            Index: []
            >>> prod.open_mask("DETFOO", GREEN)
                                    gml_id  ...                                           geometry
            0  detector_footprint-B03-02-0  ...  POLYGON Z ((199980.000 4500000.000 0.000, 1999...
            1  detector_footprint-B03-03-1  ...  POLYGON Z ((222570.000 4500000.000 0.000, 2225...
            2  detector_footprint-B03-05-2  ...  POLYGON Z ((273050.000 4500000.000 0.000, 2730...
            3  detector_footprint-B03-07-3  ...  POLYGON Z ((309770.000 4453710.000 0.000, 3097...
            4  detector_footprint-B03-04-4  ...  POLYGON Z ((248080.000 4500000.000 0.000, 2480...
            5  detector_footprint-B03-06-5  ...  POLYGON Z ((297980.000 4500000.000 0.000, 2979...
            [6 rows x 3 columns]

        Args:
            mask_str (str): Mask name, such as DEFECT, NODATA, SATURA...
            band (Union[obn, str]): Band number as an OpticalBandNames or str (for clouds: 00)

        Returns:
            gpd.GeoDataFrame: Mask as a vector
        """
        # Check inputs
        assert mask_str in [
            "DEFECT", "DETFOO", "NODATA", "SATURA", "TECQUA", "CLOUDS"
        ]
        if mask_str == "CLOUDS":
            band = "00"

        def _open_mask(fct, *args, **kwargs):
            # Read the GML file
            try:
                # Discard some weird error concerning a NULL pointer that outputs a ValueError (as we already except it)
                fiona_logger = logging.getLogger("fiona._env")
                fiona_logger.setLevel(logging.CRITICAL)

                # Read mask
                mask = fct(*args, **kwargs)

                # Set fiona logger back to what it was
                fiona_logger.setLevel(logging.INFO)
            except (ValueError, UnsupportedGeometryTypeError):
                mask = gpd.GeoDataFrame(geometry=[], crs=self.crs())

            return mask

        # Get QI_DATA path
        if isinstance(band, obn):
            band_name = self.band_names[band]
        else:
            band_name = band

        if self.is_archived:
            # Open the zip file
            # WE DON'T KNOW WHY BUT DO NOT USE files.read_archived_vector HERE !!!
            with zipfile.ZipFile(self.path, "r") as zip_ds:
                filenames = [f.filename for f in zip_ds.filelist]
                regex = re.compile(
                    f".*GRANULE.*QI_DATA.*MSK_{mask_str}_B{band_name}.gml")
                with zip_ds.open(list(filter(regex.match,
                                             filenames))[0]) as mask_path:
                    mask = _open_mask(gpd.read_file, mask_path)

            # mask = _open_mask(files.read_archived_vector,
            #                   self.path,
            #                   f".*GRANULE.*QI_DATA.*MSK_{mask_str}_B{band_name}\.gml")
        else:
            qi_data_path = os.path.join(self.path, "GRANULE", "*", "QI_DATA")

            # Get mask path
            mask_path = files.get_file_in_dir(
                qi_data_path,
                f"MSK_{mask_str}_B{band_name}.gml",
                exact_name=True)

            mask = _open_mask(gpd.read_file, mask_path)

        return mask
Beispiel #10
0
    def _load_clouds(self,
                     bands: list,
                     resolution: float = None,
                     size: Union[list, tuple] = None) -> dict:
        """
        Load cloud files as xarrays.

        Read S3 SLSTR clouds from the flags file:cloud netcdf file.
        https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-3-slstr/level-1/cloud-identification

        bit_id  flag_masks (ushort)     flag_meanings
        ===     ===                     ===
        0       1US                     visible
        1       2US                     1.37_threshold
        2       4US                     1.6_small_histogram
        3       8US                     1.6_large_histogram
        4       16US                    2.25_small_histogram
        5       32US                    2.25_large_histogram
        6       64US                    11_spatial_coherence
        7       128US                   gross_cloud
        8       256US                   thin_cirrus
        9       512US                   medium_high
        10      1024US                  fog_low_stratus
        11      2048US                  11_12_view_difference
        12      4096US                  3.7_11_view_difference
        13      8192US                  thermal_histogram
        14      16384US                 spare
        15      32768US                 spare

        Args:
            bands (list): List of the wanted bands
            resolution (int): Band resolution in meters
            size (Union[tuple, list]): Size of the array (width, height). Not used if resolution is provided.
        Returns:
            dict: Dictionary {band_name, band_xarray}
        """
        band_dict = {}

        if bands:
            if self._instrument_name == S3Instrument.OLCI:
                raise InvalidTypeError(
                    "Sentinel-3 OLCI sensor does not provide any cloud file.")

            all_ids = list(np.arange(0, 14))
            cir_id = 8
            cloud_ids = [id for id in all_ids if id != cir_id]

            try:
                cloud_path = files.get_file_in_dir(self._get_band_folder(),
                                                   "cloud_RAD.tif")
            except FileNotFoundError:
                self._preprocess_s3(resolution)
                cloud_path = files.get_file_in_dir(self.output,
                                                   "cloud_RAD.tif")

            if not cloud_path:
                raise FileNotFoundError(
                    f"Unable to find the cloud mask for {self.path}")

            # Open cloud file
            clouds_array = rasters.read(
                cloud_path,
                resolution=resolution,
                size=size,
                resampling=Resampling.nearest,
                masked=False,
            ).astype(np.uint16)

            # Get nodata mask
            # nodata = np.where(np.isnan(clouds_array), 1, 0)
            nodata = np.where(clouds_array == 65535, 1, 0)

            for band in bands:
                if band == ALL_CLOUDS:
                    band_dict[band] = self._create_mask(
                        clouds_array, all_ids, nodata)
                elif band == CLOUDS:
                    band_dict[band] = self._create_mask(
                        clouds_array, cloud_ids, nodata)
                elif band == CIRRUS:
                    band_dict[band] = self._create_mask(
                        clouds_array, cir_id, nodata)
                elif band == RAW_CLOUDS:
                    band_dict[band] = clouds_array
                else:
                    raise InvalidTypeError(
                        f"Non existing cloud band for Sentinel-3 SLSTR: {band}"
                    )

        return band_dict