def retrieve_data(dataset, pq, pq_masks, path, x, y, overwrite=False, stack=False):
    _log.info("Retrieving data from [%s] with pq [%s] and pq mask [%s] to [%s]", dataset.path, pq and pq.path or "", pq and pq_masks or "", path)

    if os.path.exists(path) and not overwrite:
        _log.error("Output file [%s] exists", path)
        raise Exception("Output file [%s] already exists" % path)

    data = None

    metadata = get_dataset_metadata(dataset)

    if pq:
        data = get_dataset_data_with_pq(dataset, pq, pq_masks=pq_masks)
    else:
        data = get_dataset_data(dataset)

    _log.debug("data is [%s]", data)

    raster_create(path, [data[b] for b in dataset.bands],
              metadata.transform, metadata.projection, NDV, gdal.GDT_Int16)

    # If we are creating a stack then also add to a file list file...
    if stack:
        path_file_list = os.path.join(os.path.dirname(path), get_filename_file_list(dataset.satellite, dataset.dataset_type, x, y))
        _log.info("Also going to write file list to [%s]", path_file_list)
        with open(path_file_list, "ab") as f:
            print >>f, path
Exemple #2
0
    def create_water_tile(self, tile):
        arg = tile.datasets[DatasetType.ARG25]
        pqa = tile.datasets[DatasetType.PQ25]

        _log.info("ARG tile [%s]", arg)
        _log.info("PQ tile [%s]", pqa)

        filename = os.path.basename(arg.path)
        filename = filename.replace("NBAR", "WETNESS")
        filename = filename.replace(".vrt", ".tif")
        filename = os.path.join(self.output_directory, filename)

        metadata = get_dataset_metadata(arg)

        data = get_dataset_data_with_pq(arg, Ls57Arg25Bands, pqa)

        # Calculate TCI Wetness

        tci = calculate_tassel_cap_index(data,
                                         coefficients=TCI_COEFFICIENTS[arg.satellite][TasselCapIndex.WETNESS])

        _log.info("TCI shape is %s | min = %s | max = %s", numpy.shape(tci), tci.min(), tci.max())
        raster_create(filename,
                      [tci],
                      metadata.transform, metadata.projection, numpy.nan, gdal.GDT_Float32)
    def generate_derived_nbar(self, dataset_types, nbar, pqa, pqa_masks, overwrite=False):
        for dataset_type in dataset_types:
            filename = self.get_output_filename_derived_nbar(nbar, dataset_type)
            _log.info("Generating data from [%s] with pq [%s] and pq mask [%s] to [%s]", nbar.path, pqa and pqa.path or "", pqa and pqa_masks or "", filename)

            metadata = get_dataset_metadata(nbar)

            data = None

            if pqa:
                data = get_dataset_data_with_pq(nbar, pqa, pq_masks=pqa_masks)
            else:
                data = get_dataset_data(nbar)

            _log.debug("data is [%s]", data)

            if dataset_type == DatasetType.NDVI:
                ndvi = calculate_ndvi(data[nbar.bands.RED], data[nbar.bands.NEAR_INFRARED])
                raster_create(filename, [ndvi], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32)

            elif dataset_type == DatasetType.EVI:
                evi = calculate_evi(data[nbar.bands.RED], data[nbar.bands.BLUE], data[nbar.bands.NEAR_INFRARED])
                raster_create(filename, [evi], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32)

            elif dataset_type == DatasetType.NBR:
                nbr = calculate_nbr(data[nbar.bands.NEAR_INFRARED], data[nbar.bands.SHORT_WAVE_INFRARED_2])
                raster_create(filename, [nbr], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32)
def retrieve_pixel_value(dataset, pq, pq_masks, latitude, longitude, ndv=NDV):
    _log.debug("Retrieving pixel value(s) at lat=[%f] lon=[%f] from [%s] with pq [%s] and pq mask [%s]", latitude, longitude, dataset.path, pq and pq.path or "", pq and pq_masks or "")

    metadata = get_dataset_metadata(dataset)
    x, y = latlon_to_xy(latitude, longitude, metadata.transform)

    _log.debug("Retrieving value at x=[%d] y=[%d]", x, y)

    data = None

    if pq:
        data = get_dataset_data_with_pq(dataset, pq, x=x, y=y, x_size=1, y_size=1, pq_masks=pq_masks, ndv=ndv)
    else:
        data = get_dataset_data(dataset, x=x, y=y, x_size=1, y_size=1)

    _log.debug("data is [%s]", data)

    return data
Exemple #5
0
def test_retrieve_data_ls5_arg_with_pqa(config=None):

    filename = "LS5_TM_NBAR_WITH_PQA_{x:03d}_{y:04d}_{date}.{x_offset:04d}_{y_offset:04d}.{x_size:04d}x{y_size:04d}.tif".format(x=CELL_X, y=CELL_Y, date=DATE, x_offset=X_OFFSET, y_offset=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE)

    tiles = list_tiles_as_list(x=[CELL_X], y=[CELL_Y],
                               acq_min=ACQ_LS5, acq_max=ACQ_LS5,
                               satellites=[Satellite.LS5],
                               dataset_types=[ARG_DATASET_TYPE, PQ_DATASET_TYPE],
                               config=config)

    assert len(tiles) == 1

    tile = tiles[0]

    assert ARG_DATASET_TYPE in tile.datasets
    dataset = tile.datasets[ARG_DATASET_TYPE]

    assert PQ_DATASET_TYPE in tile.datasets
    pqa = tile.datasets[PQ_DATASET_TYPE]

    data = get_dataset_data_with_pq(dataset=dataset, dataset_pqa=pqa, x=X_OFFSET, y=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE)

    assert(data)
    _log.info("data is [%s]\n%s", numpy.shape(data), data)

    ndv = get_dataset_ndv(dataset)
    assert(is_ndv(ndv, ARG_NDV))

    data_type = get_dataset_datatype(dataset)
    assert(data_type == ARG_DATA_TYPE)

    metadata = generate_dataset_metadata(x=CELL_X, y=CELL_Y, acq_dt=ACQ_LS5,
                                         dataset=dataset, bands=None,
                                         mask_pqa_apply=False, mask_pqa_mask=None,
                                         mask_wofs_apply=False, mask_wofs_mask=None)

    raster_create_geotiff(filename, [data[b] for b in dataset.bands], CELL_GEO_TRANSFORM, CELL_PROJECTION, ndv, data_type,
                          dataset_metadata=metadata, band_ids=[b.name for b in dataset.bands])

    assert filecmp.cmp(filename, get_test_data_path(filename))
    def run(self):
        self.parse_arguments()

        config = Config()
        _log.debug(config.to_str())

        path = self.get_output_filename(self.dataset_type)
        _log.info("Output file is [%s]", path)

        if os.path.exists(path):
            if self.overwrite:
                _log.info("Removing existing output file [%s]", path)
                os.remove(path)
            else:
                _log.error("Output file [%s] exists", path)
                raise Exception("Output file [%s] already exists" % path)

        # TODO
        bands = get_bands(self.dataset_type, self.satellites[0])

        # TODO once WOFS is in the cube

        tiles = list_tiles_as_list(x=[self.x], y=[self.y], acq_min=self.acq_min, acq_max=self.acq_max,
                                   satellites=[satellite for satellite in self.satellites],
                                   dataset_types=[self.dataset_type],
                                   database=config.get_db_database(),
                                   user=config.get_db_username(),
                                   password=config.get_db_password(),
                                   host=config.get_db_host(), port=config.get_db_port())

        raster = None
        metadata = None

        # TODO - PQ is UNIT16 (others are INT16) and so -999 NDV doesn't work
        ndv = self.dataset_type == DatasetType.PQ25 and UINT16_MAX or NDV

        _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

        import itertools
        for x, y in itertools.product(range(0, 4000, self.chunk_size_x), range(0, 4000, self.chunk_size_y)):

            _log.info("About to read data chunk ({xmin:4d},{ymin:4d}) to ({xmax:4d},{ymax:4d})".format(xmin=x, ymin=y, xmax=x+self.chunk_size_x-1, ymax=y+self.chunk_size_y-1))
            _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

            stack = dict()

            for tile in tiles:

                if self.list_only:
                    _log.info("Would summarise dataset [%s]", tile.datasets[self.dataset_type].path)
                    continue

                pqa = None

                _log.debug("Reading dataset [%s]", tile.datasets[self.dataset_type].path)

                if not metadata:
                    metadata = get_dataset_metadata(tile.datasets[self.dataset_type])

                # Apply PQA if specified

                if self.apply_pqa_filter:
                    data = get_dataset_data_with_pq(tile.datasets[self.dataset_type], tile.datasets[DatasetType.PQ25], bands=bands, x=x, y=y, x_size=self.chunk_size_x, y_size=self.chunk_size_y, pq_masks=self.pqa_mask, ndv=ndv)

                else:
                    data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands, x=x, y=y, x_size=self.chunk_size_x, y_size=self.chunk_size_y)

                for band in bands:
                    if band in stack:
                        stack[band].append(data[band])

                    else:
                        stack[band] = [data[band]]

                    _log.debug("data[%s] has shape [%s] and MB [%s]", band.name, numpy.shape(data[band]), data[band].nbytes/1000/1000)
                    _log.debug("stack[%s] has [%s] elements", band.name, len(stack[band]))

            # Apply summary method

            _log.info("Finished reading {count} datasets for chunk ({xmin:4d},{ymin:4d}) to ({xmax:4d},{ymax:4d}) - about to summarise them".format(count=len(tiles), xmin=x, ymin=y, xmax=x+self.chunk_size_x-1, ymax=y+self.chunk_size_y-1))
            _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

            masked_stack = dict()

            for band in bands:
                masked_stack[band] = numpy.ma.masked_equal(stack[band], ndv)
                _log.debug("masked_stack[%s] is %s", band.name, masked_stack[band])
                _log.debug("masked stack[%s] has shape [%s] and MB [%s]", band.name, numpy.shape(masked_stack[band]), masked_stack[band].nbytes/1000/1000)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                if self.summary_method == TimeSeriesSummaryMethod.MIN:
                    masked_summary = numpy.min(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.MAX:
                    masked_summary = numpy.max(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.MEAN:
                    masked_summary = numpy.mean(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.MEDIAN:
                    masked_summary = numpy.median(masked_stack[band], axis=0)

                # aka 50th percentile

                elif self.summary_method == TimeSeriesSummaryMethod.MEDIAN_NON_INTERPOLATED:
                    masked_sorted = numpy.ma.sort(masked_stack[band], axis=0)
                    masked_percentile_index = numpy.ma.floor(numpy.ma.count(masked_sorted, axis=0) * 0.95).astype(numpy.int16)
                    masked_summary = numpy.ma.choose(masked_percentile_index, masked_sorted)

                elif self.summary_method == TimeSeriesSummaryMethod.COUNT:
                    # TODO Need to artificially create masked array here since it is being expected/filled below!!!
                    masked_summary = numpy.ma.masked_equal(masked_stack[band].count(axis=0), ndv)

                elif self.summary_method == TimeSeriesSummaryMethod.SUM:
                    masked_summary = numpy.sum(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.STANDARD_DEVIATION:
                    masked_summary = numpy.std(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.VARIANCE:
                    masked_summary = numpy.var(masked_stack[band], axis=0)

                # currently 95th percentile

                elif self.summary_method == TimeSeriesSummaryMethod.PERCENTILE:
                    masked_sorted = numpy.ma.sort(masked_stack[band], axis=0)
                    masked_percentile_index = numpy.ma.floor(numpy.ma.count(masked_sorted, axis=0) * 0.95).astype(numpy.int16)
                    masked_summary = numpy.ma.choose(masked_percentile_index, masked_sorted)

                elif self.summary_method == TimeSeriesSummaryMethod.YOUNGEST_PIXEL:

                    # TODO the fact that this is band at a time might be problematic.  We really should be considering
                    # all bands at once (that is what the landsat_mosaic logic did).  If PQA is being applied then
                    # it's probably all good but if not then we might get odd results....

                    masked_summary = empty_array(shape=(self.chunk_size_x, self.chunk_size_x), dtype=numpy.int16, ndv=ndv)

                    # Note the reversed as the stack is created oldest first
                    for d in reversed(stack[band]):
                        masked_summary = numpy.where(masked_summary == ndv, d, masked_summary)

                        # If the summary doesn't contain an no data values then we can stop
                        if not numpy.any(masked_summary == ndv):
                            break

                    # TODO Need to artificially create masked array here since it is being expected/filled below!!!
                    masked_summary = numpy.ma.masked_equal(masked_summary, ndv)

                elif self.summary_method == TimeSeriesSummaryMethod.OLDEST_PIXEL:

                    # TODO the fact that this is band at a time might be problematic.  We really should be considering
                    # all bands at once (that is what the landsat_mosaic logic did).  If PQA is being applied then
                    # it's probably all good but if not then we might get odd results....

                    masked_summary = empty_array(shape=(self.chunk_size_x, self.chunk_size_x), dtype=numpy.int16, ndv=ndv)

                    # Note the NOT reversed as the stack is created oldest first
                    for d in stack[band]:
                        masked_summary = numpy.where(masked_summary == ndv, d, masked_summary)

                        # If the summary doesn't contain an no data values then we can stop
                        if not numpy.any(masked_summary == ndv):
                            break

                    # TODO Need to artificially create masked array here since it is being expected/filled below!!!
                    masked_summary = numpy.ma.masked_equal(masked_summary, ndv)

                masked_stack[band] = None
                _log.debug("NONE-ing masked stack[%s]", band.name)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                _log.debug("masked summary is [%s]", masked_summary)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                # Create the output file

                if not os.path.exists(path):
                    _log.info("Creating raster [%s]", path)

                    driver = gdal.GetDriverByName("GTiff")
                    assert driver

                    raster = driver.Create(path, metadata.shape[0], metadata.shape[1], len(bands), gdal.GDT_Int16)
                    assert raster

                    raster.SetGeoTransform(metadata.transform)
                    raster.SetProjection(metadata.projection)

                    for b in bands:
                        raster.GetRasterBand(b.value).SetNoDataValue(ndv)

                _log.info("Writing band [%s] data to raster [%s]", band.name, path)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                raster.GetRasterBand(band.value).WriteArray(masked_summary.filled(ndv), xoff=x, yoff=y)
                raster.GetRasterBand(band.value).ComputeStatistics(True)

                raster.FlushCache()

                masked_summary = None
                _log.debug("NONE-ing the masked summary")
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

            stack = None
            _log.debug("Just NONE-ed the stack")
            _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

        raster = None

        _log.debug("Just NONE'd the raster")
        _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

        _log.info("Memory usage was [%d MB]", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)
        _log.info("CPU time used [%s]", timedelta(seconds=int(resource.getrusage(resource.RUSAGE_SELF).ru_utime)))
Exemple #7
0
    def doit(self):
        shape = (4000, 4000)
        no_data_value = NDV

        best_pixel_data = dict()

        # TODO
        if Satellite.LS8.value in self.satellites:
            bands = Ls8Arg25Bands
        else:
            bands = Ls57Arg25Bands

        for band in bands:
            best_pixel_data[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=no_data_value)

        best_pixel_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        # best_pixel_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)
        best_pixel_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)

        current_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        # current_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)
        current_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)

        metadata = None

        SATELLITE_DATA_VALUES = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8}

        for tile in self.get_tiles(sort=SortType.DESC):
            # Get ARG25 dataset

            dataset = tile.datasets[DatasetType.ARG25]
            _log.info("Processing ARG tile [%s]", dataset.path)

            if not metadata:
                metadata = get_dataset_metadata(dataset)

            band_data = None

            if self.apply_pq_filter:
                band_data = get_dataset_data_with_pq(dataset, tile.datasets[DatasetType.PQ25])
            else:
                band_data = get_dataset_data(dataset)

            # Create the provenance datasets

            # NOTE: need to do this BEFORE selecting the pixel since it is actually using the fact that the
            # selected pixel currently doesn't have a value

            # NOTE: band values are propagated "as a job lot" so can just check any band

            # TODO better way than just saying....RED....?
            band = bands.RED

            # Satellite

            current_satellite.fill(SATELLITE_DATA_VALUES[dataset.satellite])
            best_pixel_satellite = numpy.where(best_pixel_data[band] == no_data_value, current_satellite, best_pixel_satellite)

            # # Epoch dataset
            #
            # current_epoch.fill(calendar.timegm(tile.end_datetime.timetuple()))
            # best_pixel_epoch = numpy.where(best_pixel_data[band] == no_data_value, current_epoch, best_pixel_epoch)

            # Date dataset (20150101)

            current_date.fill(tile.end_datetime.year * 10000 + tile.end_datetime.month * 100 + tile.end_datetime.day)
            best_pixel_date = numpy.where(best_pixel_data[band] == no_data_value, current_date, best_pixel_date)

            for band in bands:
                data = band_data[band]
                # _log.debug("data = \n%s", data)

                # Replace any NO DATA best pixels with data pixels
                # TODO should I explicitly do the AND data is not NO DATA VALUE?
                best_pixel_data[band] = numpy.where(best_pixel_data[band] == no_data_value, data, best_pixel_data[band])
                # _log.debug("best pixel = \n%s", best_pixel_data[band])

            still_no_data = numpy.any(numpy.array([best_pixel_data[b] for b in bands]) == no_data_value)
            # _log.debug("still no data pixels = %s", still_no_data)

            if not still_no_data:
                break

        # Now want to mask out values in the provenance datasets if we haven't actually got a value

        # TODO better way than just saying....RED....?
        band = bands.RED

        mask = numpy.ma.masked_equal(best_pixel_data[band], NDV).mask

        best_pixel_satellite = numpy.ma.array(best_pixel_satellite, mask=mask).filled(NDV)
        # best_pixel_epoch = numpy.ma.array(best_pixel_epoch, mask=mask).fill(NDV)
        best_pixel_date = numpy.ma.array(best_pixel_date, mask=mask).filled(NDV)

        # Composite NBAR dataset

        raster_create(self.get_output_path("NBAR"), [best_pixel_data[b] for b in bands],
                      metadata.transform, metadata.projection, NDV, gdal.GDT_Int16)

        # Provenance (satellite) dataset

        raster_create(self.get_output_path("SAT"),
                      [best_pixel_satellite],
                      metadata.transform, metadata.projection, no_data_value,
                      gdal.GDT_Int16)

        # # Provenance (epoch) dataset
        #
        # raster_create(self.get_output_path("EPOCH"),
        #               [best_pixel_epoch],
        #               metadata.transform, metadata.projection, no_data_value,
        #               gdal.GDT_Int32)

        # Provenance (day of month) dataset

        raster_create(self.get_output_path("DATE"),
                      [best_pixel_date],
                      metadata.transform, metadata.projection, no_data_value,
                      gdal.GDT_Int32)
Exemple #8
0
    def doit(self):

        _log.debug("Bare Soil Cell Task - doit()")
        shape = (4000, 4000)
        no_data_value = NDV

        best_pixel_fc = dict()

        for band in Fc25Bands:
            best_pixel_fc[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=INT16_MIN)

        best_pixel_nbar = dict()

        for band in Ls57Arg25Bands:
            best_pixel_nbar[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)

        best_pixel_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        best_pixel_year = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        best_pixel_month = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        best_pixel_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)

        current_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        current_year = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        current_month = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV)
        current_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV)

        SATELLITE_DATA_VALUES = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8}

        metadata_nbar = None
        metadata_fc = None

        for tile in self.get_tiles():
            # Get the PQ mask

            pq = tile.datasets[DatasetType.PQ25]
            data_pq = get_dataset_data(pq, [Pq25Bands.PQ])[Pq25Bands.PQ]

            mask_pq = get_pq_mask(data_pq)

            # Get NBAR dataset

            nbar = tile.datasets[DatasetType.ARG25]
            _log.info("Processing NBAR tile [%s]", nbar.path)

            if not metadata_nbar:
                metadata_nbar = get_dataset_metadata(nbar)

            data_nbar = get_dataset_data_with_pq(nbar, Ls57Arg25Bands, tile.datasets[DatasetType.PQ25])

            # Get the NDVI mask

            red = data_nbar[Ls57Arg25Bands.RED]
            nir = data_nbar[Ls57Arg25Bands.NEAR_INFRARED]

            ndvi_data = calculate_ndvi(red, nir)

            ndvi_data = numpy.ma.masked_equal(ndvi_data, NDV)
            ndvi_data = numpy.ma.masked_outside(ndvi_data, 0, 0.3, copy=False)

            mask_ndvi = ndvi_data.mask

            # Get FC25 dataset

            fc = tile.datasets[DatasetType.FC25]
            _log.info("Processing FC tile [%s]", fc.path)

            if not metadata_fc:
                metadata_fc = get_dataset_metadata(fc)

            _log.debug("metadata fc is %s", metadata_fc)

            data_fc = get_dataset_data(fc, Fc25Bands)

            data_bare_soil = data_fc[Fc25Bands.BS]
            data_bare_soil = numpy.ma.masked_equal(data_bare_soil, -999)
            data_bare_soil = numpy.ma.masked_outside(data_bare_soil, 0, 8000)
            data_bare_soil.mask = (data_bare_soil.mask | mask_pq | mask_ndvi)
            data_bare_soil = data_bare_soil.filled(NDV)

            # Compare the bare soil value from this dataset to the current "best" value
            best_pixel_fc[Fc25Bands.BS] = numpy.fmax(best_pixel_fc[Fc25Bands.BS], data_bare_soil)

            # Now update the other best pixel datasets/bands to grab the pixels we just selected

            for band in Ls57Arg25Bands:
                best_pixel_nbar[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                                       data_bare_soil,
                                                                       data_nbar[band],
                                                                       best_pixel_nbar[band])

            for band in [Fc25Bands.PV, Fc25Bands.NPV, Fc25Bands.ERROR]:
                best_pixel_fc[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                                     data_bare_soil,
                                                                     data_fc[band],
                                                                     best_pixel_fc[band])

            # And now the other "provenance" data

            current_satellite.fill(SATELLITE_DATA_VALUES[fc.satellite])

            best_pixel_satellite = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                                  data_bare_soil,
                                                                  current_satellite,
                                                                  best_pixel_satellite)

            current_year.fill(tile.end_datetime_year)

            best_pixel_year = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                             data_bare_soil,
                                                             current_year,
                                                             best_pixel_year)

            current_month.fill(tile.end_datetime_month)

            best_pixel_month = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                              data_bare_soil,
                                                              current_month,
                                                              best_pixel_month)

            current_epoch.fill(calendar.timegm(tile.end_datetime.timetuple()))

            best_pixel_epoch = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS],
                                                              data_bare_soil,
                                                              current_epoch,
                                                              best_pixel_epoch)

        # Create the output datasets

        # FC composite

        raster_create(self.get_dataset_filename("FC"),
                      [best_pixel_fc[b] for b in Fc25Bands],
                      metadata_fc.transform, metadata_fc.projection, metadata_fc.bands[Fc25Bands.BS].no_data_value,
                      metadata_fc.bands[Fc25Bands.BS].data_type)

        # NBAR composite

        raster_create(self.get_dataset_filename("NBAR"),
                      [best_pixel_nbar[b] for b in Ls57Arg25Bands],
                      metadata_nbar.transform, metadata_nbar.projection,
                      metadata_nbar.bands[Ls57Arg25Bands.BLUE].no_data_value,
                      metadata_nbar.bands[Ls57Arg25Bands.BLUE].data_type)

        # "Provenance" composites

        raster_create(self.get_dataset_filename("SAT"),
                      [best_pixel_satellite],
                      metadata_nbar.transform, metadata_nbar.projection, no_data_value,
                      gdal.GDT_Int16)

        raster_create(self.get_dataset_filename("YEAR"),
                      [best_pixel_year],
                      metadata_nbar.transform, metadata_nbar.projection, no_data_value,
                      gdal.GDT_Int16)

        raster_create(self.get_dataset_filename("MONTH"),
                      [best_pixel_month],
                      metadata_nbar.transform, metadata_nbar.projection, no_data_value,
                      gdal.GDT_Int16)

        raster_create(self.get_dataset_filename("EPOCH"),
                      [best_pixel_epoch],
                      metadata_nbar.transform, metadata_nbar.projection, no_data_value,
                      gdal.GDT_Int32)
Exemple #9
0
def zonal_stats(dataset, rasterised_fname, dataset_type):
    """
    Computes the Observed Count, Min, Max, Sum and Sum of Squares for
    the segments defined by the rasterised image. The stats are
    derived from the `dataset` defined by the `dataset_type`.

    :param dataset:
        A class of type `Dataset`.

    :param rasterised_fname:
        A string containing the full file pathname of an image
        containing the rasterised features. These features will be
        interpreted as segments.

    :param dataset_type:
        A class of type `DatasetType`.

    :return:
        A `pandas.DataFrame` containing the statistics for each segment
        and for each raster band contained with the `dataset_type`.
    """
    # Initialiase a blank dataframe
    headings = ["SID", "Timestamp", "Band", "Observed_Count", "Min", "Max",
                "Sum", "Sum_of_Squares"]
    df = pandas.DataFrame(columns=headings, dtype=numpy.float)

    # Read the rasterised image
    with rasterio.open(rasterised_fname) as src:
        img = src.read(1)

    # Initialise the segment visitor
    seg_vis = Segments(img)

    # Do we have any data to analyse???
    if seg_vis.n_segments == 0:
        return df

    # We need to get the PQ data and the DatasetType of interest
    pq_ds = dataset.datasets[DatasetType.PQ25]
    ds = dataset.datasets[dataset_type]
    timestamp = dataset.start_datetime
    bands = ds.bands

    no_data = -999

    # TODO have a user choice at the config level to determine which PQ flags
    # to apply
    # pq_flags = [PqaMask.PQ_MASK_CLEAR] # The default will msk everything
    pq_flags = [PqaMask.PQ_MASK_SATURATION,
                PqaMask.PQ_MASK_CONTIGUITY,
                PqaMask.PQ_MASK_CLOUD] # cloud and cloud shadow

    for band in bands:
        # When the api has a release of get_pq_mask this will have to do
        # It'll re-compute the PQ every time which is not ideal
        # Otherwise go back to eotools???
        ds_data = (get_dataset_data_with_pq(ds, pq_ds, bands=[band],
                                            ndv=no_data)[band]).astype('float')
        # Set no-data to NaN
        ds_data[ds_data == no_data] = numpy.nan

        # Loop over each segment and get the data.
        # In other instances we may just need the locations
        for seg_id in seg_vis.ids:
            data = seg_vis.data(ds_data, segment_id=seg_id)

            # dimensions of the data which will be 1D
            dim = data.shape

            # Returns are 1D arrays, so check if we have an empty array
            if dim[0] == 0:
                continue # Empty bin, (no data), skipping

            # Compute the stats
            count = numpy.sum(numpy.isfinite(data))
            sum_ = numpy.nansum(data)
            sum_sq = numpy.nansum(data**2)
            min_ = numpy.nanmin(data)
            max_ = numpy.nanmax(data)

            format_dict = {"SID": seg_id,
                           "Timestamp": timestamp,
                           "Band": band.name,
                           "Observed_Count": count,
                           "Min": min_,
                           "Max": max_,
                           "Sum": sum_,
                           "Sum_of_Squares": sum_sq}

            # Append the stat to the data frame
            df = df.append(format_dict, ignore_index=True)

    return df
Exemple #10
0
def classifier(arg25_dataset, pq25_dataset):
    """
    Runs the classifier designed by SF.
    """
    # Get the metadata
    md = get_dataset_metadata(arg25_dataset)
    cols, rows = md.shape

    # Read the data and mask pixels via the PQ dataset
    data = get_dataset_data_with_pq(arg25_dataset, pq25_dataset)

    # Get the wetness coefficients and calculate
    coef = TCI_COEFFICIENTS[arg25_dataset.satellite][TasselCapIndex.WETNESS]
    wetness = calculate_tassel_cap_index(data, coef)

    # NDVI
    ndvi = calculate_ndvi(data[arg25_dataset.bands.RED],
                          data[arg25_dataset.bands.NEAR_INFRARED],
                          output_ndv=numpy.nan)

    # Dump the reflectance data, the classifier only needs tc_wetness and ndvi
    del data

    # Allocate the result
    classified = numpy.zeros((rows, cols), dtype='uint8')

    # Water
    r1 = wetness > 0
    classified[r1] = 1
    _tmp = ~r1

    #r2 = _tmp & ((wetness >= -250) & (wetness < 0))
    r2 = (wetness >= -250) & (wetness < 0)
    r3 = ndvi <= 0.3
    #_tmp2 = _tmp & r2 & ~r3
    _tmp2 = _tmp & r2

    # non-veg
    classified[_tmp2 & r3] = 2
    _tmp3 = _tmp2 & ~r3

    r4 = ndvi <= 0.45

    # saltmarsh
    classified[_tmp3 & r4] = 3
    _tmp2 = _tmp3 & ~r4

    r5 = ndvi <= 0.6

    # mangrove/saltmarsh
    classified[_tmp2 & r5] = 4

    # mangrove
    classified[_tmp2 & ~r5] = 5

    # finished rhs of r2
    _tmp2 = _tmp & ~r2

    r6 = wetness < -750
    r7 = ndvi >= 0.3
    _tmp3 = _tmp2 & r6

    # saltmarsh
    classified[_tmp3 & r7] = 3

    # non-veg
    classified[_tmp3 & ~r7] = 2

    r8 = ndvi <= 0.3
    _tmp3 = _tmp2 & ~r6

    # non-veg
    classified[_tmp3 & r8] = 2

    r9 = ndvi <= 0.45
    _tmp2 = _tmp3 & ~r8

    # saltmarsh
    classified[_tmp2 & r9] = 3

    r10 = ndvi <= 0.6
    _tmp3 = _tmp2 & ~r9

    # mangrove-saltmarsh
    classified[_tmp3 & r10] = 4

    # mangrove
    classified[_tmp3 & ~r10] = 5

    # set any nulls
    valid = numpy.isfinite(ndvi)
    classified[~valid] = 0

    return classified
Exemple #11
0
def classifier(arg25_dataset, pq25_dataset):
    """
    Runs the classifier designed by SF.
    """
    # Get the metadata
    md = get_dataset_metadata(arg25_dataset)
    cols, rows = md.shape

    # Read the data and mask pixels via the PQ dataset
    data = get_dataset_data_with_pq(arg25_dataset, pq25_dataset)

    # Get the wetness coefficients and calculate
    coef = TCI_COEFFICIENTS[arg25_dataset.satellite][TasselCapIndex.WETNESS]
    wetness = calculate_tassel_cap_index(data, coef)

    # NDVI
    ndvi = calculate_ndvi(data[arg25_dataset.bands.RED],
                          data[arg25_dataset.bands.NEAR_INFRARED],
                          output_ndv=numpy.nan)

    # Dump the reflectance data, the classifier only needs tc_wetness and ndvi
    del data

    # Allocate the result
    classified = numpy.zeros((rows,cols), dtype='uint8')

    # Water
    r1 = wetness > 0
    classified[r1] = 1
    _tmp = ~r1

    #r2 = _tmp & ((wetness >= -250) & (wetness < 0))
    r2 = (wetness >= -250) & (wetness < 0)
    r3 = ndvi <= 0.3
    #_tmp2 = _tmp & r2 & ~r3
    _tmp2 = _tmp & r2

    # non-veg
    classified[_tmp2 & r3] = 2
    _tmp3 = _tmp2 & ~r3

    r4 = ndvi <= 0.45

    # saltmarsh
    classified[_tmp3 & r4] = 3
    _tmp2 = _tmp3 & ~r4

    r5 = ndvi <= 0.6

    # mangrove/saltmarsh
    classified[_tmp2 & r5] = 4

    # mangrove
    classified[_tmp2 & ~r5] = 5

    # finished rhs of r2
    _tmp2 = _tmp & ~r2

    r6 = wetness < -750
    r7 = ndvi >= 0.3
    _tmp3 = _tmp2 & r6

    # saltmarsh
    classified[_tmp3 & r7] = 3

    # non-veg
    classified[_tmp3 & ~r7] = 2

    r8 = ndvi <= 0.3
    _tmp3 = _tmp2 & ~r6

    # non-veg
    classified[_tmp3 & r8] = 2

    r9 = ndvi <= 0.45
    _tmp2 = _tmp3 & ~r8

    # saltmarsh
    classified[_tmp2 & r9] = 3

    r10 = ndvi <= 0.6
    _tmp3 = _tmp2 & ~r9

    # mangrove-saltmarsh
    classified[_tmp3 & r10] = 4

    # mangrove
    classified[_tmp3 & ~r10] = 5

    # set any nulls
    valid = numpy.isfinite(ndvi)
    classified[~valid] = 0

    return classified