Example #1
0
    def create_water_tile(self, tile):
        arg = tile.datasets[DatasetType.ARG25]
        pqa = tile.datasets[DatasetType.PQ25]

        _log.info("ARG tile [%s]", arg)
        _log.info("PQ tile [%s]", pqa)

        filename = os.path.basename(arg.path)
        filename = filename.replace("NBAR", "WETNESS")
        filename = filename.replace(".vrt", ".tif")
        filename = os.path.join(self.output_directory, filename)

        metadata = get_dataset_metadata(arg)

        data = get_dataset_data_with_pq(arg, Ls57Arg25Bands, pqa)

        # Calculate TCI Wetness

        tci = calculate_tassel_cap_index(data,
                                         coefficients=TCI_COEFFICIENTS[arg.satellite][TasselCapIndex.WETNESS])

        _log.info("TCI shape is %s | min = %s | max = %s", numpy.shape(tci), tci.min(), tci.max())
        raster_create(filename,
                      [tci],
                      metadata.transform, metadata.projection, numpy.nan, gdal.GDT_Float32)
Example #2
0
    def run(self):

        ndv = NDV

        nbar = self.tile.datasets[DatasetType.ARG25]

        _log.info("Processing tile [%s]", nbar.path)

        # Apply PQA if specified

        pqa = None

        if self.mask_pqa_apply and DatasetType.PQ25 in self.tile.datasets:
            pqa = self.tile.datasets[DatasetType.PQ25]

        mask = None

        log_mem("Before get PQA mask")

        if pqa:
            mask = get_mask_pqa(pqa, self.mask_pqa_mask)

        data = get_dataset_data_masked(nbar, mask=mask, ndv=ndv)

        log_mem("After get data (masked)")

        metadata = get_dataset_metadata(nbar)

        data = calculate_tassel_cap_index(data, coefficients=TCI_COEFFICIENTS[nbar.satellite][TasselCapIndex.WETNESS])

        raster_create(self.output().path, [data], metadata.transform, metadata.projection, numpy.nan, gdal.GDT_Float32)
Example #3
0
    def doit(self):

        shape = (4000, 4000)

        masks = [PQ_MASK_CLEAR,
                 PQ_MASK_SATURATION_OPTICAL,
                 PQ_MASK_SATURATION_THERMAL,
                 PQ_MASK_CONTIGUITY,
                 PQ_MASK_LAND,
                 PQ_MASK_CLOUD_ACCA,
                 PQ_MASK_CLOUD_FMASK,
                 PQ_MASK_CLOUD_SHADOW_ACCA,
                 PQ_MASK_CLOUD_SHADOW_FMASK]

        observation_count = empty_array(shape=shape, dtype=numpy.int16, ndv=0)

        observation_count_clear = dict()

        for mask in masks:
            observation_count_clear[mask] = empty_array(shape=shape, dtype=numpy.int16, ndv=0)

        metadata = None

        for tile in self.get_tiles():

            # Get the PQ mask

            pq = tile.datasets[DatasetType.PQ25]
            data = get_dataset_data(pq, [Pq25Bands.PQ])[Pq25Bands.PQ]

            #
            # Count any pixels that are no NDV - don't think we should actually have any but anyway
            #

            # Mask out any no data pixels - should actually be none but anyway
            pq = numpy.ma.masked_equal(data, NDV)

            # Count the data pixels - i.e. pixels that were NOT masked out
            observation_count += numpy.where(data.mask, 0, 1)

            #
            # Count and pixels that are not masked due to pixel quality
            #

            for mask in masks:
                # Apply the particular pixel mask
                pqm = numpy.ma.masked_where(numpy.bitwise_and(data, mask) != mask, data)

                # Count the pixels that were not masked out
                observation_count_clear[mask] += numpy.where(pqm.mask, 0, 1)

            if not metadata:
                metadata = get_dataset_metadata(pq)

        # Create the output datasets

        # Observation Count

        raster_create(self.output()[0].path, [observation_count] + [observation_count_clear[mask] for mask in masks],
                      metadata.transform, metadata.projection, NDV, GDT_Int16)
def retrieve_pixel_value(dataset, pqa, pqa_masks, wofs, wofs_masks, latitude, longitude, ndv=NDV):

    _log.debug(
        "Retrieving pixel value(s) at lat=[%f] lon=[%f] from [%s] with pqa [%s] and paq mask [%s] and wofs [%s] and wofs mask [%s]",
        latitude, longitude, dataset.path, pqa and pqa.path or "", pqa and pqa_masks or "",
        wofs and wofs.path or "", wofs and wofs_masks or "")

    metadata = get_dataset_metadata(dataset)

    x, y = latlon_to_xy(latitude, longitude, metadata.transform)

    _log.info("Retrieving value at x=[%d] y=[%d] from %s", x, y, dataset.path)

    x_size = y_size = 1

    mask = None

    if pqa:
        mask = get_mask_pqa(pqa, pqa_masks, x=x, y=y, x_size=x_size, y_size=y_size)

    if wofs:
        mask = get_mask_wofs(wofs, wofs_masks, x=x, y=y, x_size=x_size, y_size=y_size, mask=mask)

    data = get_dataset_data_masked(dataset, x=x, y=y, x_size=x_size, y_size=y_size, mask=mask, ndv=ndv)

    _log.debug("data is [%s]", data)

    return data
Example #5
0
    def generate_derived_nbar(self, dataset_types, nbar, pqa, pqa_masks, overwrite=False):
        for dataset_type in dataset_types:
            filename = self.get_output_filename_derived_nbar(nbar, dataset_type)
            _log.info("Generating data from [%s] with pq [%s] and pq mask [%s] to [%s]", nbar.path, pqa and pqa.path or "", pqa and pqa_masks or "", filename)

            metadata = get_dataset_metadata(nbar)

            data = None

            if pqa:
                data = get_dataset_data_with_pq(nbar, pqa, pq_masks=pqa_masks)
            else:
                data = get_dataset_data(nbar)

            _log.debug("data is [%s]", data)

            if dataset_type == DatasetType.NDVI:
                ndvi = calculate_ndvi(data[nbar.bands.RED], data[nbar.bands.NEAR_INFRARED])
                raster_create(filename, [ndvi], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32)

            elif dataset_type == DatasetType.EVI:
                evi = calculate_evi(data[nbar.bands.RED], data[nbar.bands.BLUE], data[nbar.bands.NEAR_INFRARED])
                raster_create(filename, [evi], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32)

            elif dataset_type == DatasetType.NBR:
                nbr = calculate_nbr(data[nbar.bands.NEAR_INFRARED], data[nbar.bands.SHORT_WAVE_INFRARED_2])
                raster_create(filename, [nbr], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32)
Example #6
0
    def run(self):

        print "****", self.output().path

        dataset = self.tile.datasets[self.dataset_type]

        metadata = get_dataset_metadata(dataset)

        mask = None

        # If doing PQA masking then get PQA mask

        if self.mask_pqa_apply and DatasetType.PQ25 in self.tile.datasets:
            mask = get_mask_pqa(self.tile.datasets[DatasetType.PQ25], self.mask_pqa_mask, mask=mask)

        # If doing WOFS masking then get WOFS mask

        if self.mask_wofs_apply and DatasetType.WATER in self.tile.datasets:
            mask = get_mask_wofs(self.tile.datasets[DatasetType.WATER], self.mask_wofs_mask, mask=mask)

        # TODO - no data value and data type
        ndv = get_dataset_ndv(dataset)

        data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

        raster_create(self.output().path, [data[b] for b in dataset.bands],
                      metadata.transform, metadata.projection, ndv, gdal.GDT_Int16,
                      dataset_metadata=self.generate_raster_metadata(dataset),
                      band_ids=[b.name for b in dataset.bands])
Example #7
0
def retrieve_data(dataset, pq, pq_masks, path, x, y, overwrite=False, stack=False):
    _log.info("Retrieving data from [%s] with pq [%s] and pq mask [%s] to [%s]", dataset.path, pq and pq.path or "", pq and pq_masks or "", path)

    if os.path.exists(path) and not overwrite:
        _log.error("Output file [%s] exists", path)
        raise Exception("Output file [%s] already exists" % path)

    data = None

    metadata = get_dataset_metadata(dataset)

    if pq:
        data = get_dataset_data_with_pq(dataset, pq, pq_masks=pq_masks)
    else:
        data = get_dataset_data(dataset)

    _log.debug("data is [%s]", data)

    raster_create(path, [data[b] for b in dataset.bands],
              metadata.transform, metadata.projection, NDV, gdal.GDT_Int16)

    # If we are creating a stack then also add to a file list file...
    if stack:
        path_file_list = os.path.join(os.path.dirname(path), get_filename_file_list(dataset.satellite, dataset.dataset_type, x, y))
        _log.info("Also going to write file list to [%s]", path_file_list)
        with open(path_file_list, "ab") as f:
            print >>f, path
Example #8
0
def test_calculate_ndvi():

    nbar = DatasetTile(satellite_id="LS5", type_id="ARG25", path="/data/tmp/cube/data/from.calum/LS5_TM_NBAR_150_-034_2004-01-13T23-22-17.088044.tif")

    metadata = get_dataset_metadata(nbar)

    band_data = calculate_ndvi(nbar)

    raster_create("/data/tmp/cube/data/unit_test/LS5_TM_NDVI_150_-034_2004-01-13T23-22-17.088044.tif",
                  [band_data.filled(numpy.NaN)],
                  metadata.transform, metadata.projection, numpy.NaN, gdal.GDT_Float32)
Example #9
0
def test_apply_pq():

    nbar = DatasetTile(satellite_id="LS5", type_id="ARG25", path="/data/tmp/cube/data/from.calum/LS5_TM_NBAR_150_-034_2004-01-13T23-22-17.088044.tif")
    pq = DatasetTile(satellite_id="LS5", type_id="PQ25", path="/data/tmp/cube/data/from.calum/LS5_TM_PQA_150_-034_2004-01-13T23-22-17.088044.tif")

    metadata = get_dataset_metadata(nbar)

    band_data = raster_get_band_data_with_pq(nbar, Ls57Arg25Bands, pq)

    raster_create("/data/tmp/cube/data/from.calum/LS5_TM_NBAR_PQD_150_-034_2004-01-13T23-22-17.088044.tif",
                  [band_data[b].filled(-999) for b in Ls57Arg25Bands],
                  metadata.transform, metadata.projection, -999, gdal.GDT_Int16)
Example #10
0
def retrieve_data(x, y, acq_dt, dataset, band_names, pqa, pqa_masks, wofs, wofs_masks, path, output_format,
                  overwrite=False, data_type=None, ndv=None, mask=None):

    _log.info("Retrieving data from [%s] bands [%s] with pq [%s] and pq mask [%s] and wofs [%s] and wofs mask [%s] to [%s] file [%s]",
              dataset.path,
              band_names,
              pqa and pqa.path or "",
              pqa and pqa_masks or "",
              wofs and wofs.path or "", wofs and wofs_masks or "",
              output_format.name, path)

    if os.path.exists(path) and not overwrite:
        _log.error("Output file [%s] exists", path)
        raise Exception("Output file [%s] already exists" % path)

    metadata = get_dataset_metadata(dataset)

    # mask = None

    if pqa:
        mask = get_mask_pqa(pqa, pqa_masks, mask=mask)

    if wofs:
        mask = get_mask_wofs(wofs, wofs_masks, mask=mask)

    bands = []

    for b in dataset.bands:
        if b.name in band_names:
            bands.append(b)

    ndv = ndv or get_dataset_ndv(dataset)

    data = get_dataset_data_masked(dataset, bands=bands, mask=mask, ndv=ndv)

    _log.debug("data is [%s]", data)

    data_type = data_type or get_dataset_datatype(dataset)

    dataset_info = generate_raster_metadata(x, y, acq_dt, dataset, bands,
                                            pqa is not None, pqa_masks,
                                            wofs is not None, wofs_masks)

    band_info = [b.name for b in bands]

    if output_format == OutputFormat.GEOTIFF:
        raster_create_geotiff(path, [data[b] for b in bands], metadata.transform, metadata.projection, ndv,
                              data_type, dataset_metadata=dataset_info, band_ids=band_info)

    elif output_format == OutputFormat.ENVI:
        raster_create_envi(path, [data[b] for b in bands], metadata.transform, metadata.projection, ndv,
                           data_type, dataset_metadata=dataset_info, band_ids=band_info)
    def create(
        self, x, y, satellites, acq_min, acq_max, dataset_types, bands, months=None, exclude=None, sort=SortType.ASC
    ):
        self.x = x
        self.y = y
        self.satellites = satellites
        self.acq_min = acq_min
        self.acq_max = acq_max
        self.stack = {}
        self.acq_stack = []
        self.meta_stack = []
        self.bands = bands
        self.tile_shape = None
        self.shape_stack = []
        tiles = list_tiles([x], [y], satellites, acq_min, acq_max, dataset_types, months, exclude, sort)
        for tile in tiles:
            dataset = DatasetType.ARG25 in tile.datasets and tile.datasets[DatasetType.ARG25] or None
            if dataset is None:
                continue
            tile_metadata = get_dataset_metadata(dataset)
            if tile_metadata is None:
                continue
            pqa = DatasetType.PQ25 in tile.datasets and tile.datasets[DatasetType.PQ25] or None
            if pqa is None:
                continue
            if self.pqa_stack is None:
                self.pqa_stack = [pqa]
            else:
                self.pqa_stack.append(pqa)
            data = get_dataset_data(dataset, bands)
            need_shape = True
            for band in data:
                if need_shape:
                    self.shape_stack.append(np.array(data[band]).shape)
                    need_shape = False

                if band in self.stack:
                    """
                    Append it
                    """
                    self.stack[band] = np.vstack((self.stack[band], np.array(data[band]).ravel()))
                else:
                    self.stack[band] = np.array(data[band]).ravel()

            self.acq_stack.append(tile.start_datetime)
            self.meta_stack.append(tile_metadata)
            del data
            del pqa
            del tile_metadata
            del dataset
def retrieve_pixel_value(dataset, pq, pq_masks, latitude, longitude, ndv=NDV):
    _log.debug("Retrieving pixel value(s) at lat=[%f] lon=[%f] from [%s] with pq [%s] and pq mask [%s]", latitude, longitude, dataset.path, pq and pq.path or "", pq and pq_masks or "")

    metadata = get_dataset_metadata(dataset)
    x, y = latlon_to_xy(latitude, longitude, metadata.transform)

    _log.debug("Retrieving value at x=[%d] y=[%d]", x, y)

    data = None

    if pq:
        data = get_dataset_data_with_pq(dataset, pq, x=x, y=y, x_size=1, y_size=1, pq_masks=pq_masks, ndv=ndv)
    else:
        data = get_dataset_data(dataset, x=x, y=y, x_size=1, y_size=1)

    _log.debug("data is [%s]", data)

    return data
    def run(self):

        _log.info("Creating stack for band [%s]", self.band.name)

        data_type = get_dataset_type_datatype(self.dataset_type)
        ndv = get_dataset_type_ndv(self.dataset_type)
        metadata = None
        driver = None
        raster = None

        acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season,
                                                                seasons=SEASONS, extend=True)

        _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_list(x=[self.x], y=[self.y], satellites=self.satellites,
                                   acq_min=acq_min, acq_max=acq_max,
                                   dataset_types=dataset_types, include=criteria)

        for index, tile in enumerate(tiles, start=1):

            dataset = tile.datasets[self.dataset_type]
            assert dataset

            # band = dataset.bands[self.band]
            # assert band
            band = self.band

            pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets) and tile.datasets[DatasetType.PQ25] or None

            if self.dataset_type not in tile.datasets:
                _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime)
                continue

            filename = self.output().path

            if not metadata:
                metadata = get_dataset_metadata(dataset)
                assert metadata

            if not driver:

                if self.output_format == OutputFormat.GEOTIFF:
                    driver = gdal.GetDriverByName("GTiff")

                elif self.output_format == OutputFormat.ENVI:
                    driver = gdal.GetDriverByName("ENVI")

                assert driver

            if not raster:

                if self.output_format == OutputFormat.GEOTIFF:
                    raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"])

                elif self.output_format == OutputFormat.ENVI:
                    raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["INTERLEAVE=BSQ"])

                assert raster

                # NOTE: could do this without the metadata!!
                raster.SetGeoTransform(metadata.transform)
                raster.SetProjection(metadata.projection)

            raster.SetMetadata(self.generate_raster_metadata())

            mask = None

            if pqa:
                mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask)

            _log.info("Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]",
                      band.name, dataset.path,
                      pqa and pqa.path or "", pqa and self.mask_pqa_mask or "",
                      filename)

            data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

            _log.debug("data is [%s]", data)

            stack_band = raster.GetRasterBand(index)

            stack_band.SetDescription(os.path.basename(dataset.path))
            stack_band.SetNoDataValue(ndv)
            stack_band.WriteArray(data[band])
            stack_band.ComputeStatistics(True)
            stack_band.SetMetadata({"ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name})

            stack_band.FlushCache()
            del stack_band

        if raster:
            raster.FlushCache()
            del raster
            raster = None
Example #14
0
    def go(self):

        # If we are applying a vector mask then calculate it not (once as it is the same for all tiles)

        mask_vector = None

        if self.mask_vector_apply:
            mask_vector = get_mask_vector_for_cell(self.x, self.y, self.mask_vector_file, self.mask_vector_layer, self.mask_vector_feature)

        # TODO move the dicking around with bands stuff into utils?

        import gdal

        if self.output_format == OutputFormat.GEOTIFF:
            driver = gdal.GetDriverByName("GTiff")
        elif self.output_format == OutputFormat.ENVI:
            driver = gdal.GetDriverByName("ENVI")

        assert driver

        tiles = self.get_tiles()
        _log.info("Total tiles found [%d]", len(tiles))

        for band_name in self.bands:
            _log.info("Creating stack for band [%s]", band_name)

            relevant_tiles = []

            for tile in tiles:

                dataset = self.dataset_type in tile.datasets and tile.datasets[self.dataset_type] or None

                if not dataset:
                    _log.info("No applicable [%s] dataset for [%s]", self.dataset_type.name, tile.end_datetime)
                    continue

                if band_name in [b.name for b in tile.datasets[self.dataset_type].bands]:
                    relevant_tiles.append(tile)

            _log.info("Total tiles for band [%s] is [%d]", band_name, len(relevant_tiles))

            filename = None

            raster = None
            metadata = None
            data_type = ndv = None

            for index, tile in enumerate(relevant_tiles, start=1):

                dataset = tile.datasets[self.dataset_type]
                assert dataset

                band = dataset.bands[band_name]
                assert band

                if self.list_only:
                    _log.info("Would stack band [%s] from dataset [%s]", band.name, dataset.path)
                    continue

                pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets) and tile.datasets[DatasetType.PQ25] or None
                wofs = (self.mask_wofs_apply and DatasetType.WATER in tile.datasets) and tile.datasets[DatasetType.WATER] or None

                if self.dataset_type not in tile.datasets:
                    _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime)
                    continue

                if not filename:
                    filename = os.path.join(self.output_directory,
                                            get_dataset_band_stack_filename(satellites=self.satellites,
                                                                            dataset_type=self.dataset_type,
                                                                            band=band,
                                                                            x=self.x, y=self.y,
                                                                            acq_min=self.acq_min, acq_max=self.acq_max,
                                                                            season=self.season,
                                                                            output_format=self.output_format,
                                                                            mask_pqa_apply=self.mask_pqa_apply,
                                                                            mask_wofs_apply=self.mask_wofs_apply,
                                                                            mask_vector_apply=self.mask_vector_apply))

                _log.info("Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] and WOFS [%s] and WOFS mask [%s] to band [%d] of [%s]",
                          band.name, dataset.path,
                          pqa and pqa.path or "",
                          pqa and self.mask_pqa_mask or "",
                          wofs and wofs.path or "", wofs and self.mask_wofs_mask or "",
                          index,
                          filename)

                if not metadata:
                    metadata = get_dataset_metadata(dataset)
                    assert metadata

                if not data_type:
                    data_type = get_dataset_datatype(dataset)
                    assert data_type

                if not ndv:
                    ndv = get_dataset_ndv(dataset)
                    assert ndv

                if not raster:

                    if self.output_format == OutputFormat.GEOTIFF:
                        raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(relevant_tiles), data_type, options=["TILED=YES", "BIGTIFF=YES", "COMPRESS=LZW", "INTERLEAVE=BAND"])
                    elif self.output_format == OutputFormat.ENVI:
                        raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(relevant_tiles), data_type, options=["INTERLEAVE=BSQ"])

                    assert raster

                    # NOTE: could do this without the metadata!!
                    raster.SetGeoTransform(metadata.transform)
                    raster.SetProjection(metadata.projection)

                raster.SetMetadata(self.generate_raster_metadata())

                mask = mask_vector

                if pqa:
                    mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask)

                if wofs:
                    mask = get_mask_wofs(wofs, self.mask_wofs_mask, mask=mask)

                # _log.info("mask[3500,3500] is [%s]", mask[3500, 3500])

                data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

                _log.debug("data is [%s]", data)
                # _log.info("data[3500,3500] is [%s]", data[band][3500, 3500])

                stack_band = raster.GetRasterBand(index)

                stack_band.SetDescription(os.path.basename(dataset.path))
                stack_band.SetNoDataValue(ndv)
                stack_band.WriteArray(data[band])
                stack_band.ComputeStatistics(True)
                stack_band.SetMetadata({"ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name})

                stack_band.FlushCache()
                del stack_band

            if raster:
                raster.FlushCache()
                raster = None
                del raster
    def go(self):

        import numpy
        from datacube.api.query import list_cells_as_list, list_tiles_as_list
        from datacube.config import Config

        x_min, x_max, y_max, y_min = self.extract_bounds_from_vector()
        _log.debug("The bounds are [%s]", (x_min, x_max, y_min, y_max))

        cells_vector = self.extract_cells_from_vector()
        _log.debug("Intersecting cells_vector are [%d] [%s]", len(cells_vector), cells_vector)

        config = Config()
        _log.debug(config.to_str())

        x_list = range(x_min, x_max + 1)
        y_list = range(y_min, y_max + 1)

        _log.debug("x = [%s] y=[%s]", x_list, y_list)

        cells_db = list()

        for cell in list_cells_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max,
                                       satellites=[satellite for satellite in self.satellites],
                                       dataset_types=[self.dataset_type]):
            cells_db.append((cell.x, cell.y))

        _log.debug("Cells from DB are [%d] [%s]", len(cells_db), cells_db)

        cells = intersection(cells_vector, cells_db)
        _log.debug("Combined cells are [%d] [%s]", len(cells), cells)

        for (x, y) in cells:
            _log.info("Processing cell [%3d/%4d]", x, y)

            tiles = list_tiles_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max,
                                       satellites=[satellite for satellite in self.satellites],
                                       dataset_types=[self.dataset_type])

            _log.info("There are [%d] tiles", len(tiles))

            if self.list_only:
                for tile in tiles:
                    _log.info("Would process [%s]", tile.datasets[self.dataset_type].path)
                continue

            # Calculate the mask for the cell

            mask_aoi = self.get_mask_aoi_cell(x, y)

            pixel_count = 4000 * 4000

            pixel_count_aoi = (mask_aoi == False).sum()

            _log.debug("mask_aoi is [%s]\n[%s]", numpy.shape(mask_aoi), mask_aoi)

            metadata = None

            with self.get_output_file() as csv_file:

                csv_writer = csv.writer(csv_file)

                import operator

                header = reduce(operator.add, [["DATE", "INSTRUMENT", "# PIXELS", "# PIXELS IN AOI"]] + [
                    ["%s - # DATA PIXELS" % band_name,
                     "%s - # DATA PIXELS AFTER PQA" % band_name,
                     "%s - # DATA PIXELS AFTER PQA WOFS" % band_name,
                     "%s - # DATA PIXELS AFTER PQA WOFS AOI" % band_name,
                     "%s - MIN" % band_name, "%s - MAX" % band_name, "%s - MEAN" % band_name] for band_name in self.bands])

                csv_writer.writerow(header)

                for tile in tiles:

                    _log.info("Processing tile [%s]", tile.datasets[self.dataset_type].path)

                    if self.list_only:
                        continue

                    if not metadata:
                        metadata = get_dataset_metadata(tile.datasets[self.dataset_type])

                    # Apply PQA if specified

                    pqa = None
                    mask_pqa = None

                    if self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets:
                        pqa = tile.datasets[DatasetType.PQ25]
                        mask_pqa = get_mask_pqa(pqa, self.mask_pqa_mask)

                    _log.debug("mask_pqa is [%s]\n[%s]", numpy.shape(mask_pqa), mask_pqa)

                    # Apply WOFS if specified

                    wofs = None
                    mask_wofs = None

                    if self.mask_wofs_apply and DatasetType.WATER in tile.datasets:
                        wofs = tile.datasets[DatasetType.WATER]
                        mask_wofs = get_mask_wofs(wofs, self.mask_wofs_mask)

                    _log.debug("mask_wofs is [%s]\n[%s]", numpy.shape(mask_wofs), mask_wofs)

                    dataset = tile.datasets[self.dataset_type]

                    bands = []

                    dataset_band_names = [b.name for b in dataset.bands]

                    for b in self.bands:
                        if b in dataset_band_names:
                            bands.append(dataset.bands[b])

                    data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands)
                    _log.debug("data is [%s]\n[%s]", numpy.shape(data), data)

                    pixel_count_data = dict()
                    pixel_count_data_pqa = dict()
                    pixel_count_data_pqa_wofs = dict()
                    pixel_count_data_pqa_wofs_aoi = dict()
                    mmin = dict()
                    mmax = dict()
                    mmean = dict()

                    for band_name in self.bands:

                        # Add "zeroed" entries for non-present bands - should only be if outputs for those bands have been explicitly requested

                        if band_name not in dataset_band_names:
                            pixel_count_data[band_name] = 0
                            pixel_count_data_pqa[band_name] = 0
                            pixel_count_data_pqa_wofs[band_name] = 0
                            pixel_count_data_pqa_wofs_aoi[band_name] = 0
                            mmin[band_name] = numpy.ma.masked
                            mmax[band_name] = numpy.ma.masked
                            mmean[band_name] = numpy.ma.masked
                            continue

                        band = dataset.bands[band_name]

                        data[band] = numpy.ma.masked_equal(data[band], NDV)
                        _log.debug("masked data is [%s] [%d]\n[%s]", numpy.shape(data), numpy.ma.count(data), data)

                        pixel_count_data[band_name] = numpy.ma.count(data[band])

                        if pqa:
                            data[band].mask = numpy.ma.mask_or(data[band].mask, mask_pqa)
                            _log.debug("PQA masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa[band_name] = numpy.ma.count(data[band])

                        if wofs:
                            data[band].mask = numpy.ma.mask_or(data[band].mask, mask_wofs)
                            _log.debug("WOFS masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa_wofs[band_name] = numpy.ma.count(data[band])

                        data[band].mask = numpy.ma.mask_or(data[band].mask, mask_aoi)
                        _log.debug("AOI masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa_wofs_aoi[band_name] = numpy.ma.count(data[band])

                        mmin[band_name] = numpy.ma.min(data[band])
                        mmax[band_name] = numpy.ma.max(data[band])
                        mmean[band_name] = numpy.ma.mean(data[band])

                        # Convert the mean to an int...taking into account masking....

                        if not numpy.ma.is_masked(mmean[band_name]):
                            mmean[band_name] = mmean[band_name].astype(numpy.int16)

                    pixel_count_data_pqa_wofs_aoi_all_bands = reduce(operator.add, pixel_count_data_pqa_wofs_aoi.itervalues())

                    if pixel_count_data_pqa_wofs_aoi_all_bands == 0 and not self.output_no_data:
                        _log.info("Skipping dataset with no non-masked data values in ANY band")
                        continue

                    row = reduce(
                        operator.add,
                            [[tile.end_datetime,
                              self.decode_satellite_as_instrument(tile.datasets[self.dataset_type].satellite),
                              pixel_count, pixel_count_aoi]] +

                            [[pixel_count_data[band_name], pixel_count_data_pqa[band_name],
                              pixel_count_data_pqa_wofs[band_name], pixel_count_data_pqa_wofs_aoi[band_name],
                              mmin[band_name], mmax[band_name], mmean[band_name]] for band_name in self.bands])

                    csv_writer.writerow(row)
Example #16
0
def classifier(arg25_dataset, pq25_dataset):
    """
    Runs the classifier designed by SF.
    """
    # Get the metadata
    md = get_dataset_metadata(arg25_dataset)
    cols, rows = md.shape

    # Read the data and mask pixels via the PQ dataset
    data = get_dataset_data_with_pq(arg25_dataset, pq25_dataset)

    # Get the wetness coefficients and calculate
    coef = TCI_COEFFICIENTS[arg25_dataset.satellite][TasselCapIndex.WETNESS]
    wetness = calculate_tassel_cap_index(data, coef)

    # NDVI
    ndvi = calculate_ndvi(data[arg25_dataset.bands.RED],
                          data[arg25_dataset.bands.NEAR_INFRARED],
                          output_ndv=numpy.nan)

    # Dump the reflectance data, the classifier only needs tc_wetness and ndvi
    del data

    # Allocate the result
    classified = numpy.zeros((rows, cols), dtype='uint8')

    # Water
    r1 = wetness > 0
    classified[r1] = 1
    _tmp = ~r1

    #r2 = _tmp & ((wetness >= -250) & (wetness < 0))
    r2 = (wetness >= -250) & (wetness < 0)
    r3 = ndvi <= 0.3
    #_tmp2 = _tmp & r2 & ~r3
    _tmp2 = _tmp & r2

    # non-veg
    classified[_tmp2 & r3] = 2
    _tmp3 = _tmp2 & ~r3

    r4 = ndvi <= 0.45

    # saltmarsh
    classified[_tmp3 & r4] = 3
    _tmp2 = _tmp3 & ~r4

    r5 = ndvi <= 0.6

    # mangrove/saltmarsh
    classified[_tmp2 & r5] = 4

    # mangrove
    classified[_tmp2 & ~r5] = 5

    # finished rhs of r2
    _tmp2 = _tmp & ~r2

    r6 = wetness < -750
    r7 = ndvi >= 0.3
    _tmp3 = _tmp2 & r6

    # saltmarsh
    classified[_tmp3 & r7] = 3

    # non-veg
    classified[_tmp3 & ~r7] = 2

    r8 = ndvi <= 0.3
    _tmp3 = _tmp2 & ~r6

    # non-veg
    classified[_tmp3 & r8] = 2

    r9 = ndvi <= 0.45
    _tmp2 = _tmp3 & ~r8

    # saltmarsh
    classified[_tmp2 & r9] = 3

    r10 = ndvi <= 0.6
    _tmp3 = _tmp2 & ~r9

    # mangrove-saltmarsh
    classified[_tmp3 & r10] = 4

    # mangrove
    classified[_tmp3 & ~r10] = 5

    # set any nulls
    valid = numpy.isfinite(ndvi)
    classified[~valid] = 0

    return classified
Example #17
0
def obtain_water_statistics(x,y,start,end,satellite,months=None):
    StartDate = start
    EndDate = end

    f_name = [str(x),str(y),str(start),str(end),str(satellite)]
    if not months is None:
        mstr = "+".join([str(m) for m in months])
        f_name.append(mstr)
        pass
    f_name = "_".join(f_name)
    t_name = f_name
    t_name = t_name.replace('[','')
    t_name = t_name.replace(']','')
    t_name = t_name.replace('<','')
    t_name = t_name.replace('>','')
    t_name = hashlib.sha512(t_name).hexdigest()[0:32]
    f_name = '/tilestore/tile_cache/'+f_name+'.png'
    t_name = '/tilestore/tile_cache/'+t_name+'.tif'
    total_count = None
    wet_count = None

    tile_metadata = None

    tiles = list_tiles(x=[x],y=[y],acq_min=StartDate,acq_max=EndDate,satellites=satellite,dataset_types=[DatasetType.ARG25,DatasetType.PQ25],sort=SortType.ASC,months=months)
    for tile in tiles:
       dataset = DatasetType.ARG25 in tile.datasets and tile.datasets[DatasetType.ARG25] or None
       if dataset is None:
           continue
       pqa = DatasetType.PQ25 in tile.datasets and tile.datasets[DatasetType.PQ25] or None
       tile_metadata = get_dataset_metadata(dataset)
       mask1 = None
       mask2 = None
       wet = get_mask_pqa(pqa,[PqaMask.PQ_MASK_LAND],mask=mask2)
       #wet = ~wet
       #print 'wet mask initial'
       #print wet
       clear = get_mask_pqa(pqa, [PqaMask.PQ_MASK_CLOUD,PqaMask.PQ_MASK_CONTIGUITY,PqaMask.PQ_MASK_SATURATION],mask=mask1)
       clear = ~clear
       #print 'clear mask initial'
       #print clear


       wet_mask = wet & clear

       """
       Count total entries
       """
       if total_count is None:
           #print 'Init total_count'
           total_count = numpy.zeros((clear.shape))
           pass
       if wet_count is None:
           #print 'Init wet_count'
           wet_count = numpy.zeros((wet.shape))
           pass
       try:
           total_count[clear] = total_count[clear]+1
           wet_count[wet_mask] = wet_count[wet_mask]+1
       except:
           pass

       """
       Next iteration
       """
       #print 'DING'
       continue
    #print 'Wet Count'
    #print wet_count
    #print 'Total Count'
    #print total_count
    #print 'Percentage'
    if total_count is None or wet_count is None:
        return 'None'
    if not numpy.any(total_count):
        return 'None'
    wetper = wet_count/total_count
    #print wetper


    """
    Make a colorized image
    """

    """
    1%: Red
    5% Yellow
    20%: Green
    50%: Light Blue
    80%: Blue
    """

    rgb = numpy.zeros((wet_count.shape[0],wet_count.shape[1],3),'uint8')

    red_mask = numpy.array(wetper)
    red_mask[(red_mask>=0.01)*(red_mask<0.05)] = -998.0
    red_mask = numpy.in1d(red_mask.ravel(),-998.0).reshape(red_mask.shape)

    yellow_mask = numpy.array(wetper)
    yellow_mask[(yellow_mask<0.2)*(yellow_mask>=0.05)] = -998.0
    yellow_mask = numpy.in1d(yellow_mask.ravel(),-998.0).reshape(yellow_mask.shape)

    green_mask = numpy.array(wetper)
    green_mask[(green_mask<0.5)*(green_mask>=0.2)] = -998.0
    green_mask = numpy.in1d(green_mask.ravel(),-998.0).reshape(green_mask.shape)

    lblue_mask = numpy.array(wetper)
    lblue_mask[(lblue_mask<0.8)*(lblue_mask>=0.5)] = -998.0
    lblue_mask = numpy.in1d(lblue_mask.ravel(),-998.0).reshape(lblue_mask.shape)

    blue_mask = numpy.array(wetper)
    blue_mask[blue_mask>=0.8] = -998.0
    blue_mask = numpy.in1d(blue_mask.ravel(),-998.0).reshape(blue_mask.shape)

    rgb[...,2][blue_mask] = 255

    rgb[...,2][lblue_mask] = 150
    rgb[...,1][lblue_mask] = 150

    rgb[...,1][green_mask] = 255

    rgb[...,1][yellow_mask] = 255
    rgb[...,0][yellow_mask] = 255

    rgb[...,0][red_mask] = 255

    driver = gdal.GetDriverByName("GTiff") #Produce output
    raster = driver.Create(t_name, wet.shape[1], wet.shape[0], 3, gdal.gdalconst.GDT_Int16, options=["BIGTIFF=YES", "INTERLEAVE=BAND"])
    raster.SetGeoTransform(tile_metadata.transform)
    raster.SetProjection(tile_metadata.projection)
    index = 1
    for i in range(3):
        stack_band = raster.GetRasterBand(index)
        stack_band.SetNoDataValue(0)
        stack_band.WriteArray(rgb[...,i])
        stack_band.ComputeStatistics(True)
        stack_band.FlushCache()
        del stack_band
        index+=1
    raster.FlushCache()
    del raster

    return t_name
def tidal_workflow(tiles, percentile=10, xtile=None, ytile=None, low_off=0, high_off=0,
                out_fnames=None):
    """
    A baseline workflow for doing the baresoil percentile, NBAR, FC
    corresponding mosaics.
    """
    # Get some basic image info
    ds_type = DatasetType.ARG25
    ds = tiles[0]
    dataset = ds.datasets[ds_type]
    md = get_dataset_metadata(dataset)
    _log.info("low and high offset %s , %s ", low_off, high_off) 
    if md is None:
        _log.info("Tile path not exists %s",dataset.path)
        return
    samples, lines = md.shape
    #_log.info("dataset shape %s for %s", md.shape, out_fnames)
    time_slices = len(tiles)
    _log.info("length of time slices [%d] for %s", time_slices, out_fnames)
    geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path))
    lat_lon = ""
    for line in out_fnames:
        lat_lon = line.split("/")[-2]
        break;
    # Initialise the tiling scheme for processing                                  
    if xtile is None:                                                             
        xtile = samples                                                              
    if ytile is None:                                                             
        ytile = lines
    chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile,
                            generator=False)

    # Define no-data
    no_data_value = NDV
    nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64

    # Define the output files
    if out_fnames is None:
        nbar_outfname = 'nbar_best_pixel'
    else:
        nbar_outfname = out_fnames[0]

    #nbar_outnb = len(TidalProd)
    nbar_outnb = len(extraInfo)
    #fc_outnb = len(Fc25Bands)
    out_dtype = gdal.GDT_Int16
    #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames)
    nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines,
                             bands=nbar_outnb, dtype=out_dtype,
                             nodata=no_data_value, geobox=geobox, fmt="GTiff")

    satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8}
    count=0

    # Loop over each spatial tile/chunk and build up the time series
    for chunk in chunks:
        count=0
        ys, ye = chunk[0]
        xs, xe = chunk[1]
        ysize = ye - ys
        xsize = xe - xs
        dims = (time_slices, ysize, xsize)

	#_log.info("got chunk  [%s] for %s", chunk, out_fnames)
        # Initialise the intermediate and best_pixel output arrays
        data = {}
        median_nbar = {}
        stack_tidal = numpy.zeros(dims, dtype='float32')
        stack_lowOff = numpy.zeros(dims, dtype='int16')
        stack_highOff = numpy.zeros(dims, dtype='int16')
        stack_count = numpy.zeros(dims, dtype='int16')

        median_lowOff = numpy.zeros((ysize, xsize), dtype='int16')
        median_highOff = numpy.zeros((ysize, xsize), dtype='int16')
        median_count = numpy.zeros((ysize, xsize), dtype='int16')
        median_lowOff.fill(no_data_value)
        median_highOff.fill(no_data_value)
        median_count.fill(no_data_value)
        stack_nbar = {}
        #_log.info("all initialised successfully")
        for band in Ls57Arg25Bands:
            stack_nbar[band] = numpy.zeros(dims, dtype='int16')
            median_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16')
            median_nbar[band].fill(no_data_value)

        for idx, ds in enumerate(tiles):

            pqa = ds.datasets[DatasetType.PQ25]
            nbar = ds.datasets[DatasetType.ARG25]
            mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize)

            # NBAR
            data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys,
                                                       x_size=xsize,
                                                       y_size=ysize)
            #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)")A
            errcnt=0
            # apply the mask to each dataset and insert into the 3D array
	    if satellite_code[nbar.satellite] == 8:
                for band in Ls57Arg25Bands:
		    for oband in Ls8Arg25Bands:
                        try:
                            if oband.name == band.name: 
	                        data[DatasetType.ARG25][oband][mask] = no_data_value
        	                stack_nbar[band][idx] = data[DatasetType.ARG25][oband]
			        break
                        except ValueError:
                            errcnt=1
                            _log.info("Data converting error LS8")
                        except IOError:
                            errcnt=1
                            _log.info("reading error LS8")
                        except KeyError:
                            errcnt=1
                            _log.info("Key error LS8")
                        except:
                            errcnt=1
                            _log.info("Unexpected error for LS8: %s",sys.exc_info()[0])

	    else:
                 for band in Ls57Arg25Bands:
                     try:
                         data[DatasetType.ARG25][band][mask] = no_data_value
                         stack_nbar[band][idx] = data[DatasetType.ARG25][band]
                     except ValueError:
                         errcnt=1
                         _log.info("Data converting error LS57")
                     except IOError:
                         errcnt=1
                         _log.info("NBAR reading error LS57")
                     except KeyError:
                         errcnt=1
                         _log.info("Key error LS57")
                     except:
                         errcnt=1
                         _log.info("Unexpected error LS57: %s",sys.exc_info()[0])

            if errcnt != 0:
                if errcnt == 1:
                    _log.info("nbar tile has problem  %s",nbar.path)
		errcnt=0
                continue

            # Add bare soil, satellite and date to the 3D arrays
            try:
                #_log.info("bare soil for %s %s",bare_soil, out_fnames)
                low=int(float(low_off) * 100)
                high = int(float(high_off) * 100)
                stack_lowOff[idx][:] = low
                stack_highOff[idx][:] = high
                #_log.info("count observed  [%d] on %d", count, dtime)

                count1 = int(numpy.ma.count(numpy.ma.masked_less(stack_nbar, 1)))
                if count1 < 1 :
                    _log.info("no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon )
                else:
                    count=count+1 
                stack_count[idx][:] = count

            except:
                _log.info("stacking - Unexpected error: %s",sys.exc_info()[0])

        # Loop over each time slice and generate a mosaic for each dataset_type
        _log.info("checking - flow path: ")
        ndv = get_dataset_type_ndv(DatasetType.ARG25)
        try:
            _log.info("ndv is %s", ndv)
            for idx in range(time_slices):
                median_count = stack_count[idx]
                median_lowOff = stack_lowOff[idx]
                median_highOff = stack_highOff[idx]
            _log.info("ccccc_data  ")
            for band in TidalProd:
                bn = band.value
                if bn == 1:
		    nbar_outds.write_tile(median_count, chunk, raster_band=bn)
	        elif bn == 2:
                    nbar_outds.write_tile(median_lowOff, chunk, raster_band=bn)
	        elif bn == 3:
                    nbar_outds.write_tile(median_highOff, chunk, raster_band=bn)
	except ValueError:
            _log.info("Data converting final error")
        except IOError:
            _log.info("writing error LS57")
        except KeyError:
            _log.info("Key error final")
        except:
            _log.info("Final Unexpected error: %s",sys.exc_info()[0])    
        _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon)

    # Close the output files
    nbar_outds.close()
    def go(self):

        import numpy
        from datacube.api.query import list_cells_as_list, list_tiles_as_list
        from datacube.config import Config

        # Verify that all the requested satellites have the same band combinations

        dataset_bands = get_bands(self.dataset_type, self.satellites[0])

        _log.info("dataset bands is [%s]", " ".join([b.name for b in dataset_bands]))

        for satellite in self.satellites:
            if dataset_bands != get_bands(self.dataset_type, satellite):
                _log.error("Satellites [%s] have differing bands", " ".join([satellite.name for satellite in self.satellites]))
                raise Exception("Satellites with different band combinations selected")

        bands = []

        dataset_bands_list = list(dataset_bands)

        if not self.bands:
            bands = dataset_bands_list

        else:
            for b in self.bands:
                bands.append(dataset_bands_list[b - 1])

        _log.info("Using bands [%s]", " ".join(band.name for band in bands))

        x_min, x_max, y_max, y_min = self.extract_bounds_from_vector()
        _log.debug("The bounds are [%s]", (x_min, x_max, y_min, y_max))

        cells_vector = self.extract_cells_from_vector()
        _log.debug("Intersecting cells_vector are [%d] [%s]", len(cells_vector), cells_vector)

        config = Config(os.path.expanduser("~/.datacube/config"))
        _log.debug(config.to_str())

        x_list = range(x_min, x_max + 1)
        y_list = range(y_min, y_max + 1)

        _log.debug("x = [%s] y=[%s]", x_list, y_list)

        cells_db = list()

        for cell in list_cells_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max,
                                       satellites=[satellite for satellite in self.satellites],
                                       dataset_types=[self.dataset_type]):
            cells_db.append((cell.x, cell.y))

        _log.debug("Cells from DB are [%d] [%s]", len(cells_db), cells_db)

        cells = intersection(cells_vector, cells_db)
        _log.debug("Combined cells are [%d] [%s]", len(cells), cells)

        for (x, y) in cells:
            _log.info("Processing cell [%3d/%4d]", x, y)

            tiles = list_tiles_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max,
                                       satellites=[satellite for satellite in self.satellites],
                                       dataset_types=[self.dataset_type])

            _log.info("There are [%d] tiles", len(tiles))

            if self.list_only:
                for tile in tiles:
                    _log.info("Would process [%s]", tile.datasets[self.dataset_type].path)
                continue

            # Calculate the mask for the cell

            mask_aoi = self.get_mask_aoi_cell(x, y)

            pixel_count = 4000 * 4000

            pixel_count_aoi = (mask_aoi == False).sum()

            _log.debug("mask_aoi is [%s]\n[%s]", numpy.shape(mask_aoi), mask_aoi)

            metadata = None

            with self.get_output_file() as csv_file:

                csv_writer = csv.writer(csv_file)

                import operator

                header = reduce(operator.add, [["DATE", "INSTRUMENT", "# PIXELS", "# PIXELS IN AOI"]] + [
                    ["%s - # DATA PIXELS" % b.name,
                     "%s - # DATA PIXELS AFTER PQA" % b.name,
                     "%s - # DATA PIXELS AFTER PQA WOFS" % b.name,
                     "%s - # DATA PIXELS AFTER PQA WOFS AOI" % b.name,
                     "%s - MIN" % b.name, "%s - MAX" % b.name, "%s - MEAN" % b.name] for b in bands])

                csv_writer.writerow(header)

                for tile in tiles:

                    _log.info("Processing tile [%s]", tile.datasets[self.dataset_type].path)

                    if self.list_only:
                        continue

                    if not metadata:
                        metadata = get_dataset_metadata(tile.datasets[self.dataset_type])

                    # Apply PQA if specified

                    pqa = None
                    mask_pqa = None

                    if self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets:
                        pqa = tile.datasets[DatasetType.PQ25]
                        mask_pqa = get_mask_pqa(pqa, self.mask_pqa_mask)

                    _log.debug("mask_pqa is [%s]\n[%s]", numpy.shape(mask_pqa), mask_pqa)

                    # Apply WOFS if specified

                    wofs = None
                    mask_wofs = None

                    if self.mask_wofs_apply and DatasetType.WATER in tile.datasets:
                        wofs = tile.datasets[DatasetType.WATER]
                        mask_wofs = get_mask_wofs(wofs, self.mask_wofs_mask)

                    _log.debug("mask_wofs is [%s]\n[%s]", numpy.shape(mask_wofs), mask_wofs)

                    data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands)
                    _log.debug("data is [%s]\n[%s]", numpy.shape(data), data)

                    pixel_count_data = dict()
                    pixel_count_data_pqa = dict()
                    pixel_count_data_pqa_wofs = dict()
                    pixel_count_data_pqa_wofs_aoi = dict()
                    mmin = dict()
                    mmax = dict()
                    mmean = dict()

                    for band in bands:

                        data[band] = numpy.ma.masked_equal(data[band], NDV)
                        _log.debug("masked data is [%s] [%d]\n[%s]", numpy.shape(data), numpy.ma.count(data), data)

                        pixel_count_data[band] = numpy.ma.count(data[band])

                        if pqa:
                            data[band].mask = numpy.ma.mask_or(data[band].mask, mask_pqa)
                            _log.debug("PQA masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa[band] = numpy.ma.count(data[band])

                        if wofs:
                            data[band].mask = numpy.ma.mask_or(data[band].mask, mask_wofs)
                            _log.debug("WOFS masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa_wofs[band] = numpy.ma.count(data[band])

                        data[band].mask = numpy.ma.mask_or(data[band].mask, mask_aoi)
                        _log.debug("AOI masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa_wofs_aoi[band] = numpy.ma.count(data[band])

                        mmin[band] = numpy.ma.min(data[band])
                        mmax[band] = numpy.ma.max(data[band])
                        mmean[band] = numpy.ma.mean(data[band])

                        # Convert the mean to an int...which is actually trickier than you would expect due to masking....

                        if numpy.ma.count(mmean[band]) != 0:
                            mmean[band] = mmean[band].astype(numpy.int16)

                    # Should we output if no data values found?
                    pixel_count_data_pqa_wofs_aoi_all_bands = reduce(operator.add, pixel_count_data_pqa_wofs_aoi.itervalues())
                    if pixel_count_data_pqa_wofs_aoi_all_bands == 0 and not self.output_no_data:
                        _log.info("Skipping dataset with no non-masked data values in ANY band")
                        continue

                    row = reduce(
                        operator.add,
                            [[tile.end_datetime,
                              self.decode_satellite_as_instrument(tile.datasets[self.dataset_type].satellite),
                              pixel_count, pixel_count_aoi]] +

                            [[pixel_count_data[band], pixel_count_data_pqa[band],
                              pixel_count_data_pqa_wofs[band], pixel_count_data_pqa_wofs_aoi[band],
                              mmin[band], mmax[band], mmean[band]] for band in bands])

                    csv_writer.writerow(row)
Example #20
0
    def run(self):

        shape = (4000, 4000)
        no_data_value = NDV

        best_pixel_fc = dict()

        for band in Fc25Bands:
            # best_pixel_fc[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=INT16_MIN)
            best_pixel_fc[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)

        best_pixel_nbar = dict()

        for band in Ls57Arg25Bands:
            best_pixel_nbar[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)

        best_pixel_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        best_pixel_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)

        current_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        current_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)

        SATELLITE_DATA_VALUES = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8}

        metadata_nbar = None
        metadata_fc = None

        for tile in self.get_tiles():

            pqa = tile.datasets[DatasetType.PQ25]
            nbar = tile.datasets[DatasetType.ARG25]
            fc = tile.datasets[DatasetType.FC25]
            wofs = DatasetType.WATER in tile.datasets and tile.datasets[DatasetType.WATER] or None

            _log.info("Processing [%s]", fc.path)

            data = dict()

            # Create an initial "no mask" mask

            mask = numpy.ma.make_mask_none((4000, 4000))
            # _log.info("### mask is [%s]", mask[1000][1000])

            # Add the PQA mask if we are doing PQA masking

            if self.mask_pqa_apply:
                mask = get_mask_pqa(pqa, pqa_masks=self.mask_pqa_mask, mask=mask)
                # _log.info("### mask PQA is [%s]", mask[1000][1000])

            # Add the WOFS mask if we are doing WOFS masking

            if self.mask_wofs_apply and wofs:
                mask = get_mask_wofs(wofs, wofs_masks=self.mask_wofs_mask, mask=mask)
                # _log.info("### mask PQA is [%s]", mask[1000][1000])

            # Get NBAR dataset

            data[DatasetType.ARG25] = get_dataset_data_masked(nbar, mask=mask)
            # _log.info("### NBAR/RED is [%s]", data[DatasetType.ARG25][Ls57Arg25Bands.RED][1000][1000])

            # Get the NDVI dataset

            data[DatasetType.NDVI] = calculate_ndvi(data[DatasetType.ARG25][Ls57Arg25Bands.RED],
                                                    data[DatasetType.ARG25][Ls57Arg25Bands.NEAR_INFRARED])
            # _log.info("### NDVI is [%s]", data[DatasetType.NDVI][1000][1000])

            # Add the NDVI value range mask (to the existing mask)

            mask = self.get_mask_range(data[DatasetType.NDVI], min_val=0.0, max_val=0.3, mask=mask)
            # _log.info("### mask NDVI is [%s]", mask[1000][1000])

            # Get FC25 dataset

            data[DatasetType.FC25] = get_dataset_data_masked(fc, mask=mask)
            # _log.info("### FC/BS is [%s]", data[DatasetType.FC25][Fc25Bands.BARE_SOIL][1000][1000])

            # Add the bare soil value range mask (to the existing mask)

            mask = self.get_mask_range(data[DatasetType.FC25][Fc25Bands.BARE_SOIL], min_val=0, max_val=8000, mask=mask)
            # _log.info("### mask BS is [%s]", mask[1000][1000])

            # Apply the final mask to the FC25 bare soil data

            data_bare_soil = numpy.ma.MaskedArray(data=data[DatasetType.FC25][Fc25Bands.BARE_SOIL], mask=mask).filled(NDV)
            # _log.info("### bare soil is [%s]", data_bare_soil[1000][1000])

            # Compare the bare soil value from this dataset to the current "best" value

            best_pixel_fc[Fc25Bands.BARE_SOIL] = numpy.fmax(best_pixel_fc[Fc25Bands.BARE_SOIL], data_bare_soil)
            # _log.info("### best pixel bare soil is [%s]", best_pixel_fc[Fc25Bands.BARE_SOIL][1000][1000])

            # Now update the other best pixel datasets/bands to grab the pixels we just selected

            for band in Ls57Arg25Bands:
                best_pixel_nbar[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BARE_SOIL],
                                                                       data_bare_soil,
                                                                       data[DatasetType.ARG25][band],
                                                                       best_pixel_nbar[band])

            for band in [Fc25Bands.PHOTOSYNTHETIC_VEGETATION, Fc25Bands.NON_PHOTOSYNTHETIC_VEGETATION, Fc25Bands.UNMIXING_ERROR]:
                best_pixel_fc[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BARE_SOIL],
                                                                     data_bare_soil,
                                                                     data[DatasetType.FC25][band],
                                                                     best_pixel_fc[band])

            # And now the other "provenance" data

            # Satellite "provenance" data

            current_satellite.fill(SATELLITE_DATA_VALUES[fc.satellite])

            best_pixel_satellite = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BARE_SOIL],
                                                                  data_bare_soil,
                                                                  current_satellite,
                                                                  best_pixel_satellite)

            # Date "provenance" data

            current_date.fill(date_to_integer(tile.end_datetime))

            best_pixel_date = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BARE_SOIL],
                                                             data_bare_soil,
                                                             current_date,
                                                             best_pixel_date)

            # Grab the metadata from the input datasets for use later when creating the output datasets

            if not metadata_nbar:
                metadata_nbar = get_dataset_metadata(nbar)

            if not metadata_fc:
                metadata_fc = get_dataset_metadata(fc)

        # Create the output datasets

        # FC composite

        raster_create(self.get_dataset_filename("FC"),
                      [best_pixel_fc[b] for b in Fc25Bands],
                      metadata_fc.transform, metadata_fc.projection,
                      metadata_fc.bands[Fc25Bands.BARE_SOIL].no_data_value,
                      metadata_fc.bands[Fc25Bands.BARE_SOIL].data_type)

        # NBAR composite

        raster_create(self.get_dataset_filename("NBAR"),
                      [best_pixel_nbar[b] for b in Ls57Arg25Bands],
                      metadata_nbar.transform, metadata_nbar.projection,
                      metadata_nbar.bands[Ls57Arg25Bands.BLUE].no_data_value,
                      metadata_nbar.bands[Ls57Arg25Bands.BLUE].data_type)

        # Satellite "provenance" composites

        raster_create(self.get_dataset_filename("SAT"),
                      [best_pixel_satellite],
                      metadata_nbar.transform, metadata_nbar.projection, no_data_value,
                      gdal.GDT_Int16)

        # Date "provenance" composites

        raster_create(self.get_dataset_filename("DATE"),
                      [best_pixel_date],
                      metadata_nbar.transform, metadata_nbar.projection, no_data_value,
                      gdal.GDT_Int32)
    def run(self):
        self.parse_arguments()

        config = Config()
        _log.debug(config.to_str())

        path = self.get_output_filename(self.dataset_type)
        _log.info("Output file is [%s]", path)

        if os.path.exists(path):
            if self.overwrite:
                _log.info("Removing existing output file [%s]", path)
                os.remove(path)
            else:
                _log.error("Output file [%s] exists", path)
                raise Exception("Output file [%s] already exists" % path)

        # TODO
        bands = get_bands(self.dataset_type, self.satellites[0])

        # TODO once WOFS is in the cube

        tiles = list_tiles_as_list(x=[self.x], y=[self.y], acq_min=self.acq_min, acq_max=self.acq_max,
                                   satellites=[satellite for satellite in self.satellites],
                                   dataset_types=[self.dataset_type],
                                   database=config.get_db_database(),
                                   user=config.get_db_username(),
                                   password=config.get_db_password(),
                                   host=config.get_db_host(), port=config.get_db_port())

        raster = None
        metadata = None

        # TODO - PQ is UNIT16 (others are INT16) and so -999 NDV doesn't work
        ndv = self.dataset_type == DatasetType.PQ25 and UINT16_MAX or NDV

        _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

        import itertools
        for x, y in itertools.product(range(0, 4000, self.chunk_size_x), range(0, 4000, self.chunk_size_y)):

            _log.info("About to read data chunk ({xmin:4d},{ymin:4d}) to ({xmax:4d},{ymax:4d})".format(xmin=x, ymin=y, xmax=x+self.chunk_size_x-1, ymax=y+self.chunk_size_y-1))
            _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

            stack = dict()

            for tile in tiles:

                if self.list_only:
                    _log.info("Would summarise dataset [%s]", tile.datasets[self.dataset_type].path)
                    continue

                pqa = None

                _log.debug("Reading dataset [%s]", tile.datasets[self.dataset_type].path)

                if not metadata:
                    metadata = get_dataset_metadata(tile.datasets[self.dataset_type])

                # Apply PQA if specified

                if self.apply_pqa_filter:
                    data = get_dataset_data_with_pq(tile.datasets[self.dataset_type], tile.datasets[DatasetType.PQ25], bands=bands, x=x, y=y, x_size=self.chunk_size_x, y_size=self.chunk_size_y, pq_masks=self.pqa_mask, ndv=ndv)

                else:
                    data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands, x=x, y=y, x_size=self.chunk_size_x, y_size=self.chunk_size_y)

                for band in bands:
                    if band in stack:
                        stack[band].append(data[band])

                    else:
                        stack[band] = [data[band]]

                    _log.debug("data[%s] has shape [%s] and MB [%s]", band.name, numpy.shape(data[band]), data[band].nbytes/1000/1000)
                    _log.debug("stack[%s] has [%s] elements", band.name, len(stack[band]))

            # Apply summary method

            _log.info("Finished reading {count} datasets for chunk ({xmin:4d},{ymin:4d}) to ({xmax:4d},{ymax:4d}) - about to summarise them".format(count=len(tiles), xmin=x, ymin=y, xmax=x+self.chunk_size_x-1, ymax=y+self.chunk_size_y-1))
            _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

            masked_stack = dict()

            for band in bands:
                masked_stack[band] = numpy.ma.masked_equal(stack[band], ndv)
                _log.debug("masked_stack[%s] is %s", band.name, masked_stack[band])
                _log.debug("masked stack[%s] has shape [%s] and MB [%s]", band.name, numpy.shape(masked_stack[band]), masked_stack[band].nbytes/1000/1000)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                if self.summary_method == TimeSeriesSummaryMethod.MIN:
                    masked_summary = numpy.min(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.MAX:
                    masked_summary = numpy.max(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.MEAN:
                    masked_summary = numpy.mean(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.MEDIAN:
                    masked_summary = numpy.median(masked_stack[band], axis=0)

                # aka 50th percentile

                elif self.summary_method == TimeSeriesSummaryMethod.MEDIAN_NON_INTERPOLATED:
                    masked_sorted = numpy.ma.sort(masked_stack[band], axis=0)
                    masked_percentile_index = numpy.ma.floor(numpy.ma.count(masked_sorted, axis=0) * 0.95).astype(numpy.int16)
                    masked_summary = numpy.ma.choose(masked_percentile_index, masked_sorted)

                elif self.summary_method == TimeSeriesSummaryMethod.COUNT:
                    # TODO Need to artificially create masked array here since it is being expected/filled below!!!
                    masked_summary = numpy.ma.masked_equal(masked_stack[band].count(axis=0), ndv)

                elif self.summary_method == TimeSeriesSummaryMethod.SUM:
                    masked_summary = numpy.sum(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.STANDARD_DEVIATION:
                    masked_summary = numpy.std(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.VARIANCE:
                    masked_summary = numpy.var(masked_stack[band], axis=0)

                # currently 95th percentile

                elif self.summary_method == TimeSeriesSummaryMethod.PERCENTILE:
                    masked_sorted = numpy.ma.sort(masked_stack[band], axis=0)
                    masked_percentile_index = numpy.ma.floor(numpy.ma.count(masked_sorted, axis=0) * 0.95).astype(numpy.int16)
                    masked_summary = numpy.ma.choose(masked_percentile_index, masked_sorted)

                elif self.summary_method == TimeSeriesSummaryMethod.YOUNGEST_PIXEL:

                    # TODO the fact that this is band at a time might be problematic.  We really should be considering
                    # all bands at once (that is what the landsat_mosaic logic did).  If PQA is being applied then
                    # it's probably all good but if not then we might get odd results....

                    masked_summary = empty_array(shape=(self.chunk_size_x, self.chunk_size_x), dtype=numpy.int16, ndv=ndv)

                    # Note the reversed as the stack is created oldest first
                    for d in reversed(stack[band]):
                        masked_summary = numpy.where(masked_summary == ndv, d, masked_summary)

                        # If the summary doesn't contain an no data values then we can stop
                        if not numpy.any(masked_summary == ndv):
                            break

                    # TODO Need to artificially create masked array here since it is being expected/filled below!!!
                    masked_summary = numpy.ma.masked_equal(masked_summary, ndv)

                elif self.summary_method == TimeSeriesSummaryMethod.OLDEST_PIXEL:

                    # TODO the fact that this is band at a time might be problematic.  We really should be considering
                    # all bands at once (that is what the landsat_mosaic logic did).  If PQA is being applied then
                    # it's probably all good but if not then we might get odd results....

                    masked_summary = empty_array(shape=(self.chunk_size_x, self.chunk_size_x), dtype=numpy.int16, ndv=ndv)

                    # Note the NOT reversed as the stack is created oldest first
                    for d in stack[band]:
                        masked_summary = numpy.where(masked_summary == ndv, d, masked_summary)

                        # If the summary doesn't contain an no data values then we can stop
                        if not numpy.any(masked_summary == ndv):
                            break

                    # TODO Need to artificially create masked array here since it is being expected/filled below!!!
                    masked_summary = numpy.ma.masked_equal(masked_summary, ndv)

                masked_stack[band] = None
                _log.debug("NONE-ing masked stack[%s]", band.name)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                _log.debug("masked summary is [%s]", masked_summary)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                # Create the output file

                if not os.path.exists(path):
                    _log.info("Creating raster [%s]", path)

                    driver = gdal.GetDriverByName("GTiff")
                    assert driver

                    raster = driver.Create(path, metadata.shape[0], metadata.shape[1], len(bands), gdal.GDT_Int16)
                    assert raster

                    raster.SetGeoTransform(metadata.transform)
                    raster.SetProjection(metadata.projection)

                    for b in bands:
                        raster.GetRasterBand(b.value).SetNoDataValue(ndv)

                _log.info("Writing band [%s] data to raster [%s]", band.name, path)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                raster.GetRasterBand(band.value).WriteArray(masked_summary.filled(ndv), xoff=x, yoff=y)
                raster.GetRasterBand(band.value).ComputeStatistics(True)

                raster.FlushCache()

                masked_summary = None
                _log.debug("NONE-ing the masked summary")
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

            stack = None
            _log.debug("Just NONE-ed the stack")
            _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

        raster = None

        _log.debug("Just NONE'd the raster")
        _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

        _log.info("Memory usage was [%d MB]", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)
        _log.info("CPU time used [%s]", timedelta(seconds=int(resource.getrusage(resource.RUSAGE_SELF).ru_utime)))
Example #22
0
    def doit(self):

        _log.debug("Bare Soil Cell Task - doit()")
        shape = (4000, 4000)
        no_data_value = NDV

        best_pixel_fc = dict()

        for band in Fc25Bands:
            best_pixel_fc[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=INT16_MIN)

        best_pixel_nbar = dict()

        for band in Ls57Arg25Bands:
            best_pixel_nbar[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)

        best_pixel_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        best_pixel_year = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        best_pixel_month = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        best_pixel_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)

        current_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        current_year = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        current_month = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        current_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)

        SATELLITE_DATA_VALUES = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8}

        metadata_nbar = None
        metadata_fc = None

        for tile in self.get_tiles():
            # Get the PQ mask

            pq = tile.datasets[DatasetType.PQ25]
            data_pq = get_dataset_data(pq, [Pq25Bands.PQ])[Pq25Bands.PQ]

            mask_pq = get_pq_mask(data_pq)

            # Get NBAR dataset

            nbar = tile.datasets[DatasetType.ARG25]
            _log.info("Processing NBAR tile [%s]", nbar.path)

            if not metadata_nbar:
                metadata_nbar = get_dataset_metadata(nbar)

            data_nbar = get_dataset_data_with_pq(nbar, Ls57Arg25Bands, tile.datasets[DatasetType.PQ25])

            # Get the NDVI mask

            red = data_nbar[Ls57Arg25Bands.RED]
            nir = data_nbar[Ls57Arg25Bands.NEAR_INFRARED]

            ndvi_data = calculate_ndvi(red, nir)

            ndvi_data = numpy.ma.masked_equal(ndvi_data, NDV)
            ndvi_data = numpy.ma.masked_outside(ndvi_data, 0, 0.3, copy=False)

            mask_ndvi = ndvi_data.mask

            # Get FC25 dataset

            fc = tile.datasets[DatasetType.FC25]
            _log.info("Processing FC tile [%s]", fc.path)

            if not metadata_fc:
                metadata_fc = get_dataset_metadata(fc)

            _log.debug("metadata fc is %s", metadata_fc)

            data_fc = get_dataset_data(fc, Fc25Bands)

            data_bare_soil = data_fc[Fc25Bands.BS]
            data_bare_soil = numpy.ma.masked_equal(data_bare_soil, -999)
            data_bare_soil = numpy.ma.masked_outside(data_bare_soil, 0, 8000)
            data_bare_soil.mask = (data_bare_soil.mask | mask_pq | mask_ndvi)
            data_bare_soil = data_bare_soil.filled(NDV)

            # Compare the bare soil value from this dataset to the current "best" value
            best_pixel_fc[Fc25Bands.BS] = numpy.fmax(best_pixel_fc[Fc25Bands.BS], data_bare_soil)

            # Now update the other best pixel datasets/bands to grab the pixels we just selected

            for band in Ls57Arg25Bands:
                best_pixel_nbar[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                                       data_bare_soil,
                                                                       data_nbar[band],
                                                                       best_pixel_nbar[band])

            for band in [Fc25Bands.PV, Fc25Bands.NPV, Fc25Bands.ERROR]:
                best_pixel_fc[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                                     data_bare_soil,
                                                                     data_fc[band],
                                                                     best_pixel_fc[band])

            # And now the other "provenance" data

            current_satellite.fill(SATELLITE_DATA_VALUES[fc.satellite])

            best_pixel_satellite = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                                  data_bare_soil,
                                                                  current_satellite,
                                                                  best_pixel_satellite)

            current_year.fill(tile.end_datetime_year)

            best_pixel_year = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                             data_bare_soil,
                                                             current_year,
                                                             best_pixel_year)

            current_month.fill(tile.end_datetime_month)

            best_pixel_month = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                              data_bare_soil,
                                                              current_month,
                                                              best_pixel_month)

            current_epoch.fill(calendar.timegm(tile.end_datetime.timetuple()))

            best_pixel_epoch = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                              data_bare_soil,
                                                              current_epoch,
                                                              best_pixel_epoch)

        # Create the output datasets

        # FC composite

        raster_create(self.get_dataset_filename("FC"),
                      [best_pixel_fc[b] for b in Fc25Bands],
                      metadata_fc.transform, metadata_fc.projection, metadata_fc.bands[Fc25Bands.BS].no_data_value,
                      metadata_fc.bands[Fc25Bands.BS].data_type)

        # NBAR composite

        raster_create(self.get_dataset_filename("NBAR"),
                      [best_pixel_nbar[b] for b in Ls57Arg25Bands],
                      metadata_nbar.transform, metadata_nbar.projection,
                      metadata_nbar.bands[Ls57Arg25Bands.BLUE].no_data_value,
                      metadata_nbar.bands[Ls57Arg25Bands.BLUE].data_type)

        # "Provenance" composites

        raster_create(self.get_dataset_filename("SAT"),
                      [best_pixel_satellite],
                      metadata_nbar.transform, metadata_nbar.projection, no_data_value,
                      gdal.GDT_Int16)

        raster_create(self.get_dataset_filename("YEAR"),
                      [best_pixel_year],
                      metadata_nbar.transform, metadata_nbar.projection, no_data_value,
                      gdal.GDT_Int16)

        raster_create(self.get_dataset_filename("MONTH"),
                      [best_pixel_month],
                      metadata_nbar.transform, metadata_nbar.projection, no_data_value,
                      gdal.GDT_Int16)

        raster_create(self.get_dataset_filename("EPOCH"),
                      [best_pixel_epoch],
                      metadata_nbar.transform, metadata_nbar.projection, no_data_value,
                      gdal.GDT_Int32)
Example #23
0
def obtain_cloudfree_mosaic(x,y,start,end, bands, satellite,iterations=0,xsize=4000,ysize=4000,file_format="GTiff",data_type=gdal.GDT_CInt16,months=None):
    StartDate = start
    EndDate = end

    best_data = {}
    band_str = "+".join([band.name for band in bands])
    sat_str = "+".join([sat.name for sat in satellite])
    cache_id = [str(x),str(y),str(start),str(end),band_str,sat_str,str(xsize),str(ysize),file_format,str(iterations)]
    f_name = "_".join(cache_id)
    f_name = f_name.replace(" ","_")
    c_name = f_name
    cached_res = cache.get(c_name)
    if cached_res:
        return str(cached_res)
    f_name = os.path.join("/tilestore/tile_cache",f_name)
    tiles = list_tiles(x=[x], y=[y],acq_min=StartDate,acq_max=EndDate,satellites=satellite,dataset_types=[DatasetType.ARG25,DatasetType.PQ25], sort=SortType.ASC)
    tile_metadata = None
    tile_count = 0
    tile_filled = False
    stats_file = open(f_name+'.csv','w+')
    total_ins = 0
    for tile in tiles:
        if tile_filled:
           break
        if months:
            print tile.start_datetime.month
            if not tile.start_datetime.month in months:
                continue
        #print "merging on tile "+str(tile.x)+", "+str(tile.y)
        tile_count+=1
        dataset =  DatasetType.ARG25 in tile.datasets and tile.datasets[DatasetType.ARG25] or None
        if dataset is None:
            print "No dataset availible"
            tile_count-=1
            continue
        tile_metadata = get_dataset_metadata(dataset)
        if tile_metadata is None:
            print "NO METADATA"
            tile_count-=1
            continue
        pqa = DatasetType.PQ25 in tile.datasets and tile.datasets[DatasetType.PQ25] or None
        mask = None
        mask = get_mask_pqa(pqa,[PqaMask.PQ_MASK_CLEAR],mask=mask)
        band_data = get_dataset_data_masked(dataset, mask=mask,bands=bands)
        swap_arr = None
        best = None
        good_ins = None

        for band in band_data:
            if not band in best_data:
                #print "Adding "+band.name
                #print band_data[band]
                best_data[band]=band_data[band]
                best = numpy.array(best_data[band])
                swap_arr=numpy.in1d(best.ravel(),-999).reshape(best.shape)
                good_ins = len(numpy.where(best[swap_arr]!=-999)[0])
            else:
                best = numpy.array(best_data[band])

                swap_arr=numpy.in1d(best.ravel(),-999).reshape(best.shape)
                b_data = numpy.array(band_data[band])
                best[swap_arr]=b_data[swap_arr]
                best_data[band]=numpy.copy(best)
                good_ins = len(numpy.where(b_data[swap_arr]!=-999)[0])
                del b_data
        total_ins+=good_ins
        stats_file.write(str(tile.x)+','+str(tile.y)+','+str(tile.start_datetime.year)+','+str(tile.start_datetime.month)+','+str(len(best[swap_arr]))+','+str(good_ins)+','+str(total_ins)+','+str(tile.dataset)+"\n")
        del swap_arr
        del best
        del good_ins
        if iterations > 0:
            if tile_count>iterations:
                print "Exiting after "+str(iterations)+" iterations"
                break
    numberOfBands=len(bands)
    if numberOfBands == 0:
       return "None"
    if bands[0] not in best_data:
       print "No data was merged for "+str(x)+", "+str(y)
       return "None"
    numberOfPixelsInXDirection=len(best_data[bands[0]])
    print numberOfPixelsInXDirection
    numberOfPixelsInYDirection=len(best_data[bands[0]][0])
    print numberOfPixelsInYDirection
    pixels = numberOfPixelsInXDirection
    if numberOfPixelsInYDirection > numberOfPixelsInXDirection:
        pixels = numberOfPixelsInYDirection
    if tile_count <1:
        print "No tiles found for "+str(x)+", "+str(y)
        return "None"
    driver = gdal.GetDriverByName(file_format)
    if driver is None:
        print "No driver found for "+file_format
        return "None"
    #print f_name+'.tif'
    raster = driver.Create(f_name+'.tif', pixels, pixels, numberOfBands, data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"])
    raster.SetGeoTransform(tile_metadata.transform)
    raster.SetProjection(tile_metadata.projection)
    index = 1
    stats_file.close()
    for band in bands:
        stack_band = raster.GetRasterBand(index)
        stack_band.SetNoDataValue(-999)
        stack_band.WriteArray(best_data[band])
        stack_band.ComputeStatistics(True)
        index+=1
        stack_band.FlushCache()
        del stack_band
    raster.FlushCache()
    del raster
    cache.set(c_name,f_name+".tif")
    return f_name+".tif"
    def run(self):

        # TODO move the dicking around with bands stuff into utils?

        import gdal

        driver = raster = None
        metadata = None
        data_type = ndv = None

        tiles = self.get_tiles()
        _log.info("Total tiles found [%d]", len(tiles))

        _log.info("Creating stack for band [%s]", self.band)

        relevant_tiles = []

        for tile in tiles:

            dataset = self.dataset_type in tile.datasets and tile.datasets[
                self.dataset_type] or None

            if not dataset:
                _log.info("No applicable [%s] dataset for [%s]",
                          self.dataset_type.name, tile.end_datetime)
                continue

            if self.band in [
                    b.name for b in tile.datasets[self.dataset_type].bands
            ]:
                relevant_tiles.append(tile)

        _log.info("Total tiles for band [%s] is [%d]", self.band,
                  len(relevant_tiles))

        for index, tile in enumerate(relevant_tiles, start=1):

            dataset = tile.datasets[self.dataset_type]
            assert dataset

            band = dataset.bands[self.band]
            assert band

            pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets
                   ) and tile.datasets[DatasetType.PQ25] or None
            wofs = (self.mask_wofs_apply and DatasetType.WATER in tile.datasets
                    ) and tile.datasets[DatasetType.WATER] or None

            if self.dataset_type not in tile.datasets:
                _log.debug("No [%s] dataset present for [%s] - skipping",
                           self.dataset_type.name, tile.end_datetime)
                continue

            filename = self.output().path

            if not metadata:
                metadata = get_dataset_metadata(dataset)
                assert metadata

            if not data_type:
                data_type = get_dataset_datatype(dataset)
                assert data_type

            if not ndv:
                ndv = get_dataset_ndv(dataset)
                assert ndv

            if not driver:

                if self.output_format == OutputFormat.GEOTIFF:
                    driver = gdal.GetDriverByName("GTiff")
                elif self.output_format == OutputFormat.ENVI:
                    driver = gdal.GetDriverByName("ENVI")

                assert driver

            if not raster:

                if self.output_format == OutputFormat.GEOTIFF:
                    raster = driver.Create(
                        filename,
                        metadata.shape[0],
                        metadata.shape[1],
                        len(tiles),
                        data_type,
                        options=["BIGTIFF=YES", "INTERLEAVE=BAND"])
                elif self.output_format == OutputFormat.ENVI:
                    raster = driver.Create(filename,
                                           metadata.shape[0],
                                           metadata.shape[1],
                                           len(tiles),
                                           data_type,
                                           options=["INTERLEAVE=BSQ"])

                assert raster

                # NOTE: could do this without the metadata!!
                raster.SetGeoTransform(metadata.transform)
                raster.SetProjection(metadata.projection)

            raster.SetMetadata(self.generate_raster_metadata())

            mask = None

            if pqa:
                mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask)

            if wofs:
                mask = get_mask_wofs(wofs, self.mask_wofs_mask, mask=mask)

            _log.info(
                "Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] and WOFS [%s] and WOFS mask [%s] to [%s]",
                band.name, dataset.path, pqa and pqa.path or "",
                pqa and self.mask_pqa_mask or "", wofs and wofs.path or "",
                wofs and self.mask_wofs_mask or "", filename)

            data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

            _log.debug("data is [%s]", data)

            stack_band = raster.GetRasterBand(index)

            stack_band.SetDescription(os.path.basename(dataset.path))
            stack_band.SetNoDataValue(ndv)
            stack_band.WriteArray(data[band])
            stack_band.ComputeStatistics(True)
            stack_band.SetMetadata({
                "ACQ_DATE": format_date(tile.end_datetime),
                "SATELLITE": dataset.satellite.name
            })

            stack_band.FlushCache()
            del stack_band

        if raster:
            raster.FlushCache()
            raster = None
            del raster
Example #25
0
def bs_workflow(tiles, percentile=90, xtile=None, ytile=None,
                out_fnames=None):
    """
    A baseline workflow for doing the baresoil percentile, NBAR, FC
    corresponding mosaics.
    """
    # Get some basic image info
    ds_type = DatasetType.FC25
    ds = tiles[0]
    dataset = ds.datasets[ds_type]
    md = get_dataset_metadata(dataset)
    if md is None:
        _log.info("Tile path not exists %s",dataset.path)
        return
    samples, lines = md.shape
    #_log.info("dataset shape %s for %s", md.shape, out_fnames)
    time_slices = len(tiles)
    _log.info("length of time slices [%d] for %s", time_slices, out_fnames)
    geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path))
    lat_lon = ""
    for line in out_fnames:
        lat_lon = line.split("/")[-2]
        break;
    # Initialise the tiling scheme for processing                                  
    if xtile is None:                                                             
        xtile = samples                                                              
    if ytile is None:                                                             
        ytile = lines
    chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile,
                            generator=False)

    # Define no-data
    no_data_value = NDV
    nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64

    # Define the output files
    if out_fnames is None:
        nbar_outfname = 'nbar_best_pixel'
        all_outfname = 'all_best_pixel'
        #fc_outfname = 'fc_best_pixel'
        #sat_outfname = 'sat_best_pixel'
        #date_outfnme = 'date_best_pixel'
        #count_outfnme = 'count_best_pixel'
    else:
        nbar_outfname = out_fnames[0]
        all_outfname = out_fnames[1]
        #fc_outfname = out_fnames[1]
        #sat_outfname = out_fnames[2]
        #date_outfnme = out_fnames[3]
        #count_outfnme = out_fnames[4]

    nbar_outnb = len(Ls57Arg25Bands)
    all_outnb = len(BareSoil)
    #fc_outnb = len(Fc25Bands)
    out_dtype = gdal.GDT_Int16
    #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames)
    nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines,
                             bands=nbar_outnb, dtype=out_dtype,
                             nodata=no_data_value, geobox=geobox, fmt="GTiff")
    all_outds = TiledOutput(all_outfname, samples=samples, lines=lines,
                           bands=all_outnb, dtype=out_dtype,
                           nodata=no_data_value, geobox=geobox, fmt="GTiff")

    satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8}
    fc_bands_subset = [Fc25Bands.PHOTOSYNTHETIC_VEGETATION,
                       Fc25Bands.NON_PHOTOSYNTHETIC_VEGETATION,
                       Fc25Bands.UNMIXING_ERROR]
    count=0

    # Loop over each spatial tile/chunk and build up the time series
    for chunk in chunks:
        count=0
        ys, ye = chunk[0]
        xs, xe = chunk[1]
        ysize = ye - ys
        xsize = xe - xs
        dims = (time_slices, ysize, xsize)

	#_log.info("got chunk  [%s] for %s", chunk, out_fnames)
        # Initialise the intermediate and best_pixel output arrays
        data = {}
        best_pixel_nbar = {}
        best_pixel_fc = {}
        stack_bare_soil = numpy.zeros(dims, dtype='float32')
        stack_sat = numpy.zeros(dims, dtype='int16')
        #stack_date = numpy.zeros(dims, dtype='int32')
        stack_year = numpy.zeros(dims, dtype='int16')
        stack_md = numpy.zeros(dims, dtype='int16')
        stack_count = numpy.zeros(dims, dtype='int16')
        best_pixel_satellite = numpy.zeros((ysize, xsize), dtype='int16')
        #best_pixel_date = numpy.zeros((ysize, xsize), dtype='int32')
        best_pixel_year = numpy.zeros((ysize, xsize), dtype='int16')
        best_pixel_md = numpy.zeros((ysize, xsize), dtype='int16')
        best_pixel_count = numpy.zeros((ysize, xsize), dtype='int16')
        best_pixel_satellite.fill(no_data_value)
        #best_pixel_date.fill(no_data_value)
        best_pixel_count.fill(no_data_value)

        stack_nbar = {}
        #_log.info("all initialised successfully")
        for band in Ls57Arg25Bands:
            stack_nbar[band] = numpy.zeros(dims, dtype='int16')
            best_pixel_nbar[band] = numpy.zeros((ysize, xsize),
                                                dtype='int16')
            best_pixel_nbar[band].fill(no_data_value)


        stack_fc = {}
        for band in fc_bands_subset:
            stack_fc[band] = numpy.zeros(dims, dtype='int16')
            best_pixel_fc[band] = numpy.zeros((ysize, xsize),
                                              dtype='int16')
            best_pixel_fc[band].fill(no_data_value)

        for idx, ds in enumerate(tiles):

            pqa = ds.datasets[DatasetType.PQ25]
            nbar = ds.datasets[DatasetType.ARG25]
            fc = ds.datasets[DatasetType.FC25]
            #_log.info("Processing nbar for index %d  ", idx)
            try:
                wofs = ds.datasets[DatasetType.WATER]
            except KeyError:
                print "Missing water for:\n {}".format(ds.end_datetime)
                wofs = None
            # mask = numpy.zeros((ysize, xsize), dtype='bool')
            # TODO update to use the api's version of extract_pq
            #pq_data = get_dataset_data(pqa, x=xs, y=ys, x_size=xsize,
            #                           y_size=ysize)[Pq25Bands.PQ]
            #mask = extract_pq_flags(pq_data, combine=True)
            #mask = ~mask
            mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize)

            # WOfS
            if wofs is not None:
                mask = get_mask_wofs(wofs, x=xs, y=ys, x_size=xsize,
                                     y_size=ysize, mask=mask)

            # NBAR
            data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys,
                                                       x_size=xsize,
                                                       y_size=ysize)
            # NDVI
            '''
            red = None
            nir = None
	    if satellite_code[fc.satellite] == 8:
		red = data[DatasetType.ARG25][Ls8Arg25Bands.RED]
		nir = data[DatasetType.ARG25][Ls8Arg25Bands.NEAR_INFRARED]
	    else:
	        red = data[DatasetType.ARG25][Ls57Arg25Bands.RED]
                nir = data[DatasetType.ARG25][Ls57Arg25Bands.NEAR_INFRARED]
	    
            ndvi = calculate_ndvi(red, nir)
            ndvi[mask] = no_data_value
            #mask |= numexpr.evaluate("(ndvi < 0.0) | (ndvi > 0.3)")
	    '''
            # FC
            data[DatasetType.FC25] = get_dataset_data(fc, x=xs, y=ys,
                                                      x_size=xsize,
                                                      y_size=ysize)
            bare_soil = data[DatasetType.FC25][Fc25Bands.BARE_SOIL]
            #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)")
            errcnt=0
            # apply the mask to each dataset and insert into the 3D array
	    if satellite_code[fc.satellite] == 8:
                for band in Ls57Arg25Bands:
		    for oband in Ls8Arg25Bands:
                        try:
                            if oband.name == band.name: 
	                        data[DatasetType.ARG25][oband][mask] = no_data_value
        	                stack_nbar[band][idx] = data[DatasetType.ARG25][oband]
			        break
                        except ValueError:
                            errcnt=1
                            _log.info("Data converting error LS8")
                        except IOError:
                            errcnt=1
                            _log.info("reading error LS8")
                        except KeyError:
                            errcnt=1
                            _log.info("Key error LS8")
                        except:
                            errcnt=1
                            _log.info("Unexpected error for LS8: %s",sys.exc_info()[0])

	    else:
                 for band in Ls57Arg25Bands:
                     try:
                         data[DatasetType.ARG25][band][mask] = no_data_value
                         stack_nbar[band][idx] = data[DatasetType.ARG25][band]
                     except ValueError:
                         errcnt=1
                         _log.info("Data converting error LS57")
                     except IOError:
                         errcnt=1
                         _log.info("NBAR reading error LS57")
                     except KeyError:
                         errcnt=1
                         _log.info("Key error LS57")
                     except:
                         errcnt=1
                         _log.info("Unexpected error LS57: %s",sys.exc_info()[0])

            for band in fc_bands_subset:
                try:
                    data[DatasetType.FC25][band][mask] = no_data_value
                    stack_fc[band][idx] = data[DatasetType.FC25][band]
                except ValueError:
                    errcnt=2
                    _log.info("FC Data converting error")
                except IOError:
                    errcnt=2
                    _log.info("FC reading error LS57")
                except KeyError:
                    errcnt=2
                    _log.info("FC Key error")
                except:
                    errcnt=2
                    _log.info("FC Unexpected error: %s",sys.exc_info()[0])

            if errcnt != 0:
                if errcnt == 1:
                    _log.info("nbar tile has problem  %s",nbar.path)
                else:
                    _log.info("fc tile has problem  %s",fc.path)
		errcnt=0
                continue

            # Add bare soil, satellite and date to the 3D arrays
            try:
                #_log.info("bare soil for %s %s",bare_soil, out_fnames)
                stack_bare_soil[idx] = bare_soil
                stack_bare_soil[idx][mask] = nan
                stack_sat[idx][:] = satellite_code[fc.satellite]
                #dtime = int(ds.end_datetime.strftime('%Y%m%d'))
                dtime = int(ds.end_datetime.strftime('%Y'))
                #_log.info("year of acquisition %d",dtime)
                stack_year[idx][:] = dtime
                #stack_date[idx][:] = dtime
                mtime = int(ds.end_datetime.strftime('%m%d'))
                stack_md[idx][:] = mtime
                count = count+1
                #count = int(numpy.ma.count(numpy.ma.masked_less(bare_soil, 1),axis=0)[0])
                #_log.info("count observed  [%d] on %d", count, dtime)
                count1 = int(numpy.ma.count(numpy.ma.masked_less(bare_soil, 1)))
                if count1 < 1 :
                    _log.info("no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon )
                    count=count-1 
                stack_count[idx][:] = count

            except:
                _log.info("stacking - Unexpected error: %s",sys.exc_info()[0])
        # Calcualte the percentile
        pct_fc = numpy.nanpercentile(stack_bare_soil, percentile,
                                     axis=0, interpolation='nearest')

        # Loop over each time slice and generate a mosaic for each dataset_type
        try:
            for idx in range(time_slices):
                pct_idx = pct_fc == stack_bare_soil[idx]
                for band in Ls57Arg25Bands:
                    band_data = stack_nbar[band]
                    best_pixel_nbar[band][pct_idx] = band_data[idx][pct_idx]
                for band in fc_bands_subset:
                    band_data = stack_fc[band]
                    best_pixel_fc[band][pct_idx] = band_data[idx][pct_idx]

                best_pixel_satellite[pct_idx] = stack_sat[idx][pct_idx]
                #best_pixel_date[pct_idx] = stack_date[idx][pct_idx]
                best_pixel_year[pct_idx] = stack_year[idx][pct_idx]
                best_pixel_md[pct_idx] = stack_md[idx][pct_idx]
                best_pixel_count[pct_idx] = stack_count[idx][pct_idx]
                #best_pixel_count[pct_idx] = time_slices
            # Output the current spatial chunk for each dataset
            for band in Ls57Arg25Bands:
                bn = band.value
                band_data = best_pixel_nbar[band]
                nbar_outds.write_tile(band_data, chunk, raster_band=bn)
            '''
         for band in fc_bands_subset:
            bn = band.value
            band_data = best_pixel_fc[band]
            fc_outds.write_tile(band_data, chunk, raster_band=bn)
            '''
            for band in BareSoil:
                bn = band.value
                if bn < 5:
                    if bn == 1:
                        all_outds.write_tile(pct_fc, chunk,raster_band=BareSoil.BARE_SOIL.value)
                    for oband in fc_bands_subset:
                        if oband.name == band.name:
                            band_data = best_pixel_fc[oband]
                            all_outds.write_tile(band_data, chunk, raster_band=bn)
                            break
                elif bn < 11:
                    for oband in Ls57Arg25Bands:
                        if oband.name == band.name:
                            band_data = best_pixel_nbar[oband]
                            all_outds.write_tile(band_data, chunk, raster_band=bn)
                            break
                elif bn == 11:
                    all_outds.write_tile(best_pixel_satellite, chunk, raster_band=bn) 		
	        elif bn == 12:
		    all_outds.write_tile(best_pixel_year, chunk, raster_band=bn)
	        elif bn == 13:
                    all_outds.write_tile(best_pixel_md, chunk, raster_band=bn)
	        elif bn == 14:
                    all_outds.write_tile(best_pixel_count, chunk, raster_band=bn)
	except ValueError:
            _log.info("Data converting final error")
        except IOError:
            _log.info("writing error LS57")
        except KeyError:
            _log.info("Key error final")
        except:
            _log.info("Final Unexpected error: %s",sys.exc_info()[0])    
        _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon)

    # Close the output files
    nbar_outds.close()
    all_outds.close()
Example #26
0
def classifier(arg25_dataset, pq25_dataset):
    """
    Runs the classifier designed by SF.
    """
    # Get the metadata
    md = get_dataset_metadata(arg25_dataset)
    cols, rows = md.shape

    # Read the data and mask pixels via the PQ dataset
    data = get_dataset_data_with_pq(arg25_dataset, pq25_dataset)

    # Get the wetness coefficients and calculate
    coef = TCI_COEFFICIENTS[arg25_dataset.satellite][TasselCapIndex.WETNESS]
    wetness = calculate_tassel_cap_index(data, coef)

    # NDVI
    ndvi = calculate_ndvi(data[arg25_dataset.bands.RED],
                          data[arg25_dataset.bands.NEAR_INFRARED],
                          output_ndv=numpy.nan)

    # Dump the reflectance data, the classifier only needs tc_wetness and ndvi
    del data

    # Allocate the result
    classified = numpy.zeros((rows,cols), dtype='uint8')

    # Water
    r1 = wetness > 0
    classified[r1] = 1
    _tmp = ~r1

    #r2 = _tmp & ((wetness >= -250) & (wetness < 0))
    r2 = (wetness >= -250) & (wetness < 0)
    r3 = ndvi <= 0.3
    #_tmp2 = _tmp & r2 & ~r3
    _tmp2 = _tmp & r2

    # non-veg
    classified[_tmp2 & r3] = 2
    _tmp3 = _tmp2 & ~r3

    r4 = ndvi <= 0.45

    # saltmarsh
    classified[_tmp3 & r4] = 3
    _tmp2 = _tmp3 & ~r4

    r5 = ndvi <= 0.6

    # mangrove/saltmarsh
    classified[_tmp2 & r5] = 4

    # mangrove
    classified[_tmp2 & ~r5] = 5

    # finished rhs of r2
    _tmp2 = _tmp & ~r2

    r6 = wetness < -750
    r7 = ndvi >= 0.3
    _tmp3 = _tmp2 & r6

    # saltmarsh
    classified[_tmp3 & r7] = 3

    # non-veg
    classified[_tmp3 & ~r7] = 2

    r8 = ndvi <= 0.3
    _tmp3 = _tmp2 & ~r6

    # non-veg
    classified[_tmp3 & r8] = 2

    r9 = ndvi <= 0.45
    _tmp2 = _tmp3 & ~r8

    # saltmarsh
    classified[_tmp2 & r9] = 3

    r10 = ndvi <= 0.6
    _tmp3 = _tmp2 & ~r9

    # mangrove-saltmarsh
    classified[_tmp3 & r10] = 4

    # mangrove
    classified[_tmp3 & ~r10] = 5

    # set any nulls
    valid = numpy.isfinite(ndvi)
    classified[~valid] = 0

    return classified
def tidal_workflow(tiles,
                   percentile=10,
                   xtile=None,
                   ytile=None,
                   low_off=0,
                   high_off=0,
                   out_fnames=None):
    """
    A baseline workflow for doing the baresoil percentile, NBAR, FC
    corresponding mosaics.
    """
    # Get some basic image info
    ds_type = DatasetType.ARG25
    ds = tiles[0]
    dataset = ds.datasets[ds_type]
    md = get_dataset_metadata(dataset)
    _log.info("low and high offset %s , %s ", low_off, high_off)
    if md is None:
        _log.info("Tile path not exists %s", dataset.path)
        return
    samples, lines = md.shape
    #_log.info("dataset shape %s for %s", md.shape, out_fnames)
    time_slices = len(tiles)
    _log.info("length of time slices [%d] for %s", time_slices, out_fnames)
    geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path))
    lat_lon = ""
    for line in out_fnames:
        lat_lon = line.split("/")[-2]
        break
    # Initialise the tiling scheme for processing
    if xtile is None:
        xtile = samples
    if ytile is None:
        ytile = lines
    chunks = generate_tiles(samples,
                            lines,
                            xtile=samples,
                            ytile=ytile,
                            generator=False)

    # Define no-data
    no_data_value = NDV
    nan = numpy.float32(numpy.nan)  # for the FC dtype no need for float64

    # Define the output files
    if out_fnames is None:
        nbar_outfname = 'nbar_best_pixel'
    else:
        nbar_outfname = out_fnames[0]

    #nbar_outnb = len(TidalProd)
    nbar_outnb = len(extraInfo)
    #fc_outnb = len(Fc25Bands)
    out_dtype = gdal.GDT_Int16
    #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames)
    nbar_outds = TiledOutput(nbar_outfname,
                             samples=samples,
                             lines=lines,
                             bands=nbar_outnb,
                             dtype=out_dtype,
                             nodata=no_data_value,
                             geobox=geobox,
                             fmt="GTiff")

    satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8}
    count = 0

    # Loop over each spatial tile/chunk and build up the time series
    for chunk in chunks:
        count = 0
        ys, ye = chunk[0]
        xs, xe = chunk[1]
        ysize = ye - ys
        xsize = xe - xs
        dims = (time_slices, ysize, xsize)

        #_log.info("got chunk  [%s] for %s", chunk, out_fnames)
        # Initialise the intermediate and best_pixel output arrays
        data = {}
        median_nbar = {}
        stack_tidal = numpy.zeros(dims, dtype='float32')
        stack_lowOff = numpy.zeros(dims, dtype='int16')
        stack_highOff = numpy.zeros(dims, dtype='int16')
        stack_count = numpy.zeros(dims, dtype='int16')

        median_lowOff = numpy.zeros((ysize, xsize), dtype='int16')
        median_highOff = numpy.zeros((ysize, xsize), dtype='int16')
        median_count = numpy.zeros((ysize, xsize), dtype='int16')
        median_lowOff.fill(no_data_value)
        median_highOff.fill(no_data_value)
        median_count.fill(no_data_value)
        stack_nbar = {}
        #_log.info("all initialised successfully")
        for band in Ls57Arg25Bands:
            stack_nbar[band] = numpy.zeros(dims, dtype='int16')
            median_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16')
            median_nbar[band].fill(no_data_value)

        for idx, ds in enumerate(tiles):

            pqa = ds.datasets[DatasetType.PQ25]
            nbar = ds.datasets[DatasetType.ARG25]
            mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize)

            # NBAR
            data[DatasetType.ARG25] = get_dataset_data(nbar,
                                                       x=xs,
                                                       y=ys,
                                                       x_size=xsize,
                                                       y_size=ysize)
            #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)")A
            errcnt = 0
            # apply the mask to each dataset and insert into the 3D array
            if satellite_code[nbar.satellite] == 8:
                for band in Ls57Arg25Bands:
                    for oband in Ls8Arg25Bands:
                        try:
                            if oband.name == band.name:
                                data[DatasetType.
                                     ARG25][oband][mask] = no_data_value
                                stack_nbar[band][idx] = data[
                                    DatasetType.ARG25][oband]
                                break
                        except ValueError:
                            errcnt = 1
                            _log.info("Data converting error LS8")
                        except IOError:
                            errcnt = 1
                            _log.info("reading error LS8")
                        except KeyError:
                            errcnt = 1
                            _log.info("Key error LS8")
                        except:
                            errcnt = 1
                            _log.info("Unexpected error for LS8: %s",
                                      sys.exc_info()[0])

            else:
                for band in Ls57Arg25Bands:
                    try:
                        data[DatasetType.ARG25][band][mask] = no_data_value
                        stack_nbar[band][idx] = data[DatasetType.ARG25][band]
                    except ValueError:
                        errcnt = 1
                        _log.info("Data converting error LS57")
                    except IOError:
                        errcnt = 1
                        _log.info("NBAR reading error LS57")
                    except KeyError:
                        errcnt = 1
                        _log.info("Key error LS57")
                    except:
                        errcnt = 1
                        _log.info("Unexpected error LS57: %s",
                                  sys.exc_info()[0])

            if errcnt != 0:
                if errcnt == 1:
                    _log.info("nbar tile has problem  %s", nbar.path)
                errcnt = 0
                continue

            # Add bare soil, satellite and date to the 3D arrays
            try:
                #_log.info("bare soil for %s %s",bare_soil, out_fnames)
                low = int(float(low_off) * 100)
                high = int(float(high_off) * 100)
                stack_lowOff[idx][:] = low
                stack_highOff[idx][:] = high
                #_log.info("count observed  [%d] on %d", count, dtime)

                count1 = int(
                    numpy.ma.count(numpy.ma.masked_less(stack_nbar, 1)))
                if count1 < 1:
                    _log.info(
                        "no data present on %d and year %d for tile %s reducing count by one",
                        mtime, dtime, lat_lon)
                else:
                    count = count + 1
                stack_count[idx][:] = count

            except:
                _log.info("stacking - Unexpected error: %s", sys.exc_info()[0])

        # Loop over each time slice and generate a mosaic for each dataset_type
        _log.info("checking - flow path: ")
        ndv = get_dataset_type_ndv(DatasetType.ARG25)
        try:
            _log.info("ndv is %s", ndv)
            for idx in range(time_slices):
                median_count = stack_count[idx]
                median_lowOff = stack_lowOff[idx]
                median_highOff = stack_highOff[idx]
            _log.info("ccccc_data  ")
            for band in TidalProd:
                bn = band.value
                if bn == 1:
                    nbar_outds.write_tile(median_count, chunk, raster_band=bn)
                elif bn == 2:
                    nbar_outds.write_tile(median_lowOff, chunk, raster_band=bn)
                elif bn == 3:
                    nbar_outds.write_tile(median_highOff,
                                          chunk,
                                          raster_band=bn)
        except ValueError:
            _log.info("Data converting final error")
        except IOError:
            _log.info("writing error LS57")
        except KeyError:
            _log.info("Key error final")
        except:
            _log.info("Final Unexpected error: %s", sys.exc_info()[0])
        _log.info("total dataset counts for each chunk is %d for tile %s",
                  count, lat_lon)

    # Close the output files
    nbar_outds.close()
Example #28
0
    def doit(self):
        shape = (4000, 4000)
        no_data_value = NDV

        best_pixel_data = dict()

        # TODO
        if Satellite.LS8.value in self.satellites:
            bands = Ls8Arg25Bands
        else:
            bands = Ls57Arg25Bands

        for band in bands:
            best_pixel_data[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=no_data_value)

        best_pixel_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        # best_pixel_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)
        best_pixel_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)

        current_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        # current_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)
        current_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)

        metadata = None

        SATELLITE_DATA_VALUES = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8}

        for tile in self.get_tiles(sort=SortType.DESC):
            # Get ARG25 dataset

            dataset = tile.datasets[DatasetType.ARG25]
            _log.info("Processing ARG tile [%s]", dataset.path)

            if not metadata:
                metadata = get_dataset_metadata(dataset)

            band_data = None

            if self.apply_pq_filter:
                band_data = get_dataset_data_with_pq(dataset, tile.datasets[DatasetType.PQ25])
            else:
                band_data = get_dataset_data(dataset)

            # Create the provenance datasets

            # NOTE: need to do this BEFORE selecting the pixel since it is actually using the fact that the
            # selected pixel currently doesn't have a value

            # NOTE: band values are propagated "as a job lot" so can just check any band

            # TODO better way than just saying....RED....?
            band = bands.RED

            # Satellite

            current_satellite.fill(SATELLITE_DATA_VALUES[dataset.satellite])
            best_pixel_satellite = numpy.where(best_pixel_data[band] == no_data_value, current_satellite, best_pixel_satellite)

            # # Epoch dataset
            #
            # current_epoch.fill(calendar.timegm(tile.end_datetime.timetuple()))
            # best_pixel_epoch = numpy.where(best_pixel_data[band] == no_data_value, current_epoch, best_pixel_epoch)

            # Date dataset (20150101)

            current_date.fill(tile.end_datetime.year * 10000 + tile.end_datetime.month * 100 + tile.end_datetime.day)
            best_pixel_date = numpy.where(best_pixel_data[band] == no_data_value, current_date, best_pixel_date)

            for band in bands:
                data = band_data[band]
                # _log.debug("data = \n%s", data)

                # Replace any NO DATA best pixels with data pixels
                # TODO should I explicitly do the AND data is not NO DATA VALUE?
                best_pixel_data[band] = numpy.where(best_pixel_data[band] == no_data_value, data, best_pixel_data[band])
                # _log.debug("best pixel = \n%s", best_pixel_data[band])

            still_no_data = numpy.any(numpy.array([best_pixel_data[b] for b in bands]) == no_data_value)
            # _log.debug("still no data pixels = %s", still_no_data)

            if not still_no_data:
                break

        # Now want to mask out values in the provenance datasets if we haven't actually got a value

        # TODO better way than just saying....RED....?
        band = bands.RED

        mask = numpy.ma.masked_equal(best_pixel_data[band], NDV).mask

        best_pixel_satellite = numpy.ma.array(best_pixel_satellite, mask=mask).filled(NDV)
        # best_pixel_epoch = numpy.ma.array(best_pixel_epoch, mask=mask).fill(NDV)
        best_pixel_date = numpy.ma.array(best_pixel_date, mask=mask).filled(NDV)

        # Composite NBAR dataset

        raster_create(self.get_output_path("NBAR"), [best_pixel_data[b] for b in bands],
                      metadata.transform, metadata.projection, NDV, gdal.GDT_Int16)

        # Provenance (satellite) dataset

        raster_create(self.get_output_path("SAT"),
                      [best_pixel_satellite],
                      metadata.transform, metadata.projection, no_data_value,
                      gdal.GDT_Int16)

        # # Provenance (epoch) dataset
        #
        # raster_create(self.get_output_path("EPOCH"),
        #               [best_pixel_epoch],
        #               metadata.transform, metadata.projection, no_data_value,
        #               gdal.GDT_Int32)

        # Provenance (day of month) dataset

        raster_create(self.get_output_path("DATE"),
                      [best_pixel_date],
                      metadata.transform, metadata.projection, no_data_value,
                      gdal.GDT_Int32)
Example #29
0
def preview_cloudfree_mosaic(x,y,start,end, bands, satellite,iterations=0,xsize=2000,ysize=2000,file_format="GTiff",data_type=gdal.GDT_CInt16):
    def resize_array(arr,size):
       r = numpy.array(arr).astype(numpy.int16)
       i = Image.fromarray(r)
       i2 = i.resize(size,Image.NEAREST)
       r2 = numpy.array(i2)
       del i2
       del i
       del r
       return r2
    StartDate = start
    EndDate = end

    best_data = {}
    band_str = "+".join([band.name for band in bands])
    sat_str = "+".join([sat.name for sat in satellite])
    cache_id = ["preview",str(x),str(y),str(start),str(end),band_str,sat_str,str(xsize),str(ysize),file_format,str(iterations)]
    f_name = "_".join(cache_id)
    f_name = f_name.replace(" ","_")
    c_name = f_name
    cached_res = cache.get(c_name)
    if cached_res:
        return str(cached_res)
    f_name = os.path.join("/tilestore/tile_cache",f_name)
    tiles = list_tiles(x=[x], y=[y],acq_min=StartDate,acq_max=EndDate,satellites=satellite,dataset_types=[DatasetType.ARG25,DatasetType.PQ25], sort=SortType.ASC)
    tile_metadata = None
    tile_count = 0
    tile_filled = False
    for tile in tiles:
        if tile_filled:
           break
        print "merging on tile "+str(tile.x)+", "+str(tile.y)
        tile_count+=1
        dataset =  DatasetType.ARG25 in tile.datasets and tile.datasets[DatasetType.ARG25] or None
        if dataset is None:
            print "No dataset availible"
            tile_count-=1
            continue
        tile_metadata = get_dataset_metadata(dataset)
        if tile_metadata is None:
            print "NO METADATA"
            tile_count-=1
            continue
        pqa = DatasetType.PQ25 in tile.datasets and tile.datasets[DatasetType.PQ25] or None
        mask = None
        mask = get_mask_pqa(pqa,[PqaMask.PQ_MASK_CLEAR],mask=mask)
        band_data = get_dataset_data_masked(dataset, mask=mask,bands=bands)
        swap_arr = None
        for band in band_data:
            if not band in best_data:
                print "Adding "+band.name
                bd = resize_array(band_data[band],(2000,2000))
                print bd
                best_data[band]=bd
                del bd
            else:
                best = resize_array(best_data[band],(2000,2000))

                swap_arr=numpy.in1d(best.ravel(),-999).reshape(best.shape)
                b_data = numpy.array(band_data[band])
                best[swap_arr]=b_data[swap_arr]
                best_data[band]=numpy.copy(best)
                del b_data
                del best
        del swap_arr
        if iterations > 0:
            if tile_count>iterations:
                print "Exiting after "+str(iterations)+" iterations"
                break
    numberOfBands=len(bands)
    if numberOfBands == 0:
       return "None"
    if bands[0] not in best_data:
       print "No data was merged for "+str(x)+", "+str(y)
       return "None"
    numberOfPixelsInXDirection=len(best_data[bands[0]])
    numberOfPixelsInYDirection=len(best_data[bands[0]][0])
    if tile_count <1:
        print "No tiles found for "+str(x)+", "+str(y)
        return "None"
    driver = gdal.GetDriverByName(file_format)
    if driver is None:
        print "No driver found for "+file_format
        return "None"
    print f_name+'.tif'
    raster = driver.Create(f_name+'.tif', numberOfPixelsInXDirection, numberOfPixelsInYDirection, numberOfBands, data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"])
    gt = tile_metadata.transform
    gt2 = (gt[0],gt[1]*2.0,gt[2],gt[3],gt[4],gt[5]*2.0)
    tile_metadata.transform = gt2
    raster.SetGeoTransform(tile_metadata.transform)
    print tile_metadata.transform
    raster.SetProjection(tile_metadata.projection)
    index = 1
    for band in bands:
        stack_band = raster.GetRasterBand(index)
        stack_band.SetNoDataValue(-999)
        stack_band.WriteArray(best_data[band])
        stack_band.ComputeStatistics(True)
        index+=1
        stack_band.FlushCache()
        del stack_band
    raster.FlushCache()
    del raster
    cache.set(c_name,f_name+".tif")
    return f_name+".tif"
    def run(self):

        # TODO move the dicking around with bands stuff into utils?

        import gdal

        driver = raster = None
        metadata = None
        data_type = ndv = None

        tiles = self.get_tiles()
        _log.info("Total tiles found [%d]", len(tiles))

        _log.info("Creating stack for band [%s]", self.band)

        relevant_tiles = []

        for tile in tiles:

            dataset = self.dataset_type in tile.datasets and tile.datasets[self.dataset_type] or None

            if not dataset:
                _log.info("No applicable [%s] dataset for [%s]", self.dataset_type.name, tile.end_datetime)
                continue

            if self.band in [b.name for b in tile.datasets[self.dataset_type].bands]:
                relevant_tiles.append(tile)

        _log.info("Total tiles for band [%s] is [%d]", self.band, len(relevant_tiles))

        for index, tile in enumerate(relevant_tiles, start=1):

            dataset = tile.datasets[self.dataset_type]
            assert dataset

            band = dataset.bands[self.band]
            assert band

            pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets) and tile.datasets[DatasetType.PQ25] or None
            wofs = (self.mask_wofs_apply and DatasetType.WATER in tile.datasets) and tile.datasets[DatasetType.WATER] or None

            if self.dataset_type not in tile.datasets:
                _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime)
                continue

            filename = self.output().path

            if not metadata:
                metadata = get_dataset_metadata(dataset)
                assert metadata

            if not data_type:
                data_type = get_dataset_datatype(dataset)
                assert data_type

            if not ndv:
                ndv = get_dataset_ndv(dataset)
                assert ndv

            if not driver:

                if self.output_format == OutputFormat.GEOTIFF:
                    driver = gdal.GetDriverByName("GTiff")
                elif self.output_format == OutputFormat.ENVI:
                    driver = gdal.GetDriverByName("ENVI")

                assert driver

            if not raster:

                if self.output_format == OutputFormat.GEOTIFF:
                    raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"])
                elif self.output_format == OutputFormat.ENVI:
                    raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["INTERLEAVE=BSQ"])

                assert raster

                # NOTE: could do this without the metadata!!
                raster.SetGeoTransform(metadata.transform)
                raster.SetProjection(metadata.projection)

            raster.SetMetadata(self.generate_raster_metadata())

            mask = None

            if pqa:
                mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask)

            if wofs:
                mask = get_mask_wofs(wofs, self.mask_wofs_mask, mask=mask)

            _log.info("Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] and WOFS [%s] and WOFS mask [%s] to [%s]",
                      band.name, dataset.path,
                      pqa and pqa.path or "",
                      pqa and self.mask_pqa_mask or "",
                      wofs and wofs.path or "", wofs and self.mask_wofs_mask or "",
                      filename)

            data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

            _log.debug("data is [%s]", data)

            stack_band = raster.GetRasterBand(index)

            stack_band.SetDescription(os.path.basename(dataset.path))
            stack_band.SetNoDataValue(ndv)
            stack_band.WriteArray(data[band])
            stack_band.ComputeStatistics(True)
            stack_band.SetMetadata({"ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name})

            stack_band.FlushCache()
            del stack_band

        if raster:
            raster.FlushCache()
            raster = None
            del raster
    def run(self):

        _log.info("Creating stack for band [%s]", self.band.name)

        data_type = get_dataset_type_datatype(self.dataset_type)
        ndv = get_dataset_type_ndv(self.dataset_type)
        metadata = None
        driver = None
        raster = None

        acq_min, acq_max, criteria = build_season_date_criteria(
            self.acq_min,
            self.acq_max,
            self.season,
            seasons=SEASONS,
            extend=True)

        _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_list(x=[self.x],
                                   y=[self.y],
                                   satellites=self.satellites,
                                   acq_min=acq_min,
                                   acq_max=acq_max,
                                   dataset_types=dataset_types,
                                   include=criteria)

        for index, tile in enumerate(tiles, start=1):

            dataset = tile.datasets[self.dataset_type]
            assert dataset

            # band = dataset.bands[self.band]
            # assert band
            band = self.band

            pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets
                   ) and tile.datasets[DatasetType.PQ25] or None

            if self.dataset_type not in tile.datasets:
                _log.debug("No [%s] dataset present for [%s] - skipping",
                           self.dataset_type.name, tile.end_datetime)
                continue

            filename = self.output().path

            if not metadata:
                metadata = get_dataset_metadata(dataset)
                assert metadata

            if not driver:

                if self.output_format == OutputFormat.GEOTIFF:
                    driver = gdal.GetDriverByName("GTiff")

                elif self.output_format == OutputFormat.ENVI:
                    driver = gdal.GetDriverByName("ENVI")

                assert driver

            if not raster:

                if self.output_format == OutputFormat.GEOTIFF:
                    raster = driver.Create(
                        filename,
                        metadata.shape[0],
                        metadata.shape[1],
                        len(tiles),
                        data_type,
                        options=["BIGTIFF=YES", "INTERLEAVE=BAND"])

                elif self.output_format == OutputFormat.ENVI:
                    raster = driver.Create(filename,
                                           metadata.shape[0],
                                           metadata.shape[1],
                                           len(tiles),
                                           data_type,
                                           options=["INTERLEAVE=BSQ"])

                assert raster

                # NOTE: could do this without the metadata!!
                raster.SetGeoTransform(metadata.transform)
                raster.SetProjection(metadata.projection)

            raster.SetMetadata(self.generate_raster_metadata())

            mask = None

            if pqa:
                mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask)

            _log.info(
                "Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]",
                band.name, dataset.path, pqa and pqa.path or "",
                pqa and self.mask_pqa_mask or "", filename)

            data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

            _log.debug("data is [%s]", data)

            stack_band = raster.GetRasterBand(index)

            stack_band.SetDescription(os.path.basename(dataset.path))
            stack_band.SetNoDataValue(ndv)
            stack_band.WriteArray(data[band])
            stack_band.ComputeStatistics(True)
            stack_band.SetMetadata({
                "ACQ_DATE": format_date(tile.end_datetime),
                "SATELLITE": dataset.satellite.name
            })

            stack_band.FlushCache()
            del stack_band

        if raster:
            raster.FlushCache()
            del raster
            raster = None