def retrieve_data(dataset, pq, pq_masks, path, x, y, overwrite=False, stack=False): _log.info("Retrieving data from [%s] with pq [%s] and pq mask [%s] to [%s]", dataset.path, pq and pq.path or "", pq and pq_masks or "", path) if os.path.exists(path) and not overwrite: _log.error("Output file [%s] exists", path) raise Exception("Output file [%s] already exists" % path) data = None metadata = get_dataset_metadata(dataset) if pq: data = get_dataset_data_with_pq(dataset, pq, pq_masks=pq_masks) else: data = get_dataset_data(dataset) _log.debug("data is [%s]", data) raster_create(path, [data[b] for b in dataset.bands], metadata.transform, metadata.projection, NDV, gdal.GDT_Int16) # If we are creating a stack then also add to a file list file... if stack: path_file_list = os.path.join(os.path.dirname(path), get_filename_file_list(dataset.satellite, dataset.dataset_type, x, y)) _log.info("Also going to write file list to [%s]", path_file_list) with open(path_file_list, "ab") as f: print >>f, path
def create_water_tile(self, tile): arg = tile.datasets[DatasetType.ARG25] pqa = tile.datasets[DatasetType.PQ25] _log.info("ARG tile [%s]", arg) _log.info("PQ tile [%s]", pqa) filename = os.path.basename(arg.path) filename = filename.replace("NBAR", "WETNESS") filename = filename.replace(".vrt", ".tif") filename = os.path.join(self.output_directory, filename) metadata = get_dataset_metadata(arg) data = get_dataset_data_with_pq(arg, Ls57Arg25Bands, pqa) # Calculate TCI Wetness tci = calculate_tassel_cap_index(data, coefficients=TCI_COEFFICIENTS[arg.satellite][TasselCapIndex.WETNESS]) _log.info("TCI shape is %s | min = %s | max = %s", numpy.shape(tci), tci.min(), tci.max()) raster_create(filename, [tci], metadata.transform, metadata.projection, numpy.nan, gdal.GDT_Float32)
def generate_derived_nbar(self, dataset_types, nbar, pqa, pqa_masks, overwrite=False): for dataset_type in dataset_types: filename = self.get_output_filename_derived_nbar(nbar, dataset_type) _log.info("Generating data from [%s] with pq [%s] and pq mask [%s] to [%s]", nbar.path, pqa and pqa.path or "", pqa and pqa_masks or "", filename) metadata = get_dataset_metadata(nbar) data = None if pqa: data = get_dataset_data_with_pq(nbar, pqa, pq_masks=pqa_masks) else: data = get_dataset_data(nbar) _log.debug("data is [%s]", data) if dataset_type == DatasetType.NDVI: ndvi = calculate_ndvi(data[nbar.bands.RED], data[nbar.bands.NEAR_INFRARED]) raster_create(filename, [ndvi], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32) elif dataset_type == DatasetType.EVI: evi = calculate_evi(data[nbar.bands.RED], data[nbar.bands.BLUE], data[nbar.bands.NEAR_INFRARED]) raster_create(filename, [evi], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32) elif dataset_type == DatasetType.NBR: nbr = calculate_nbr(data[nbar.bands.NEAR_INFRARED], data[nbar.bands.SHORT_WAVE_INFRARED_2]) raster_create(filename, [nbr], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32)
def retrieve_pixel_value(dataset, pq, pq_masks, latitude, longitude, ndv=NDV): _log.debug("Retrieving pixel value(s) at lat=[%f] lon=[%f] from [%s] with pq [%s] and pq mask [%s]", latitude, longitude, dataset.path, pq and pq.path or "", pq and pq_masks or "") metadata = get_dataset_metadata(dataset) x, y = latlon_to_xy(latitude, longitude, metadata.transform) _log.debug("Retrieving value at x=[%d] y=[%d]", x, y) data = None if pq: data = get_dataset_data_with_pq(dataset, pq, x=x, y=y, x_size=1, y_size=1, pq_masks=pq_masks, ndv=ndv) else: data = get_dataset_data(dataset, x=x, y=y, x_size=1, y_size=1) _log.debug("data is [%s]", data) return data
def test_retrieve_data_ls5_arg_with_pqa(config=None): filename = "LS5_TM_NBAR_WITH_PQA_{x:03d}_{y:04d}_{date}.{x_offset:04d}_{y_offset:04d}.{x_size:04d}x{y_size:04d}.tif".format(x=CELL_X, y=CELL_Y, date=DATE, x_offset=X_OFFSET, y_offset=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE) tiles = list_tiles_as_list(x=[CELL_X], y=[CELL_Y], acq_min=ACQ_LS5, acq_max=ACQ_LS5, satellites=[Satellite.LS5], dataset_types=[ARG_DATASET_TYPE, PQ_DATASET_TYPE], config=config) assert len(tiles) == 1 tile = tiles[0] assert ARG_DATASET_TYPE in tile.datasets dataset = tile.datasets[ARG_DATASET_TYPE] assert PQ_DATASET_TYPE in tile.datasets pqa = tile.datasets[PQ_DATASET_TYPE] data = get_dataset_data_with_pq(dataset=dataset, dataset_pqa=pqa, x=X_OFFSET, y=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE) assert(data) _log.info("data is [%s]\n%s", numpy.shape(data), data) ndv = get_dataset_ndv(dataset) assert(is_ndv(ndv, ARG_NDV)) data_type = get_dataset_datatype(dataset) assert(data_type == ARG_DATA_TYPE) metadata = generate_dataset_metadata(x=CELL_X, y=CELL_Y, acq_dt=ACQ_LS5, dataset=dataset, bands=None, mask_pqa_apply=False, mask_pqa_mask=None, mask_wofs_apply=False, mask_wofs_mask=None) raster_create_geotiff(filename, [data[b] for b in dataset.bands], CELL_GEO_TRANSFORM, CELL_PROJECTION, ndv, data_type, dataset_metadata=metadata, band_ids=[b.name for b in dataset.bands]) assert filecmp.cmp(filename, get_test_data_path(filename))
def run(self): self.parse_arguments() config = Config() _log.debug(config.to_str()) path = self.get_output_filename(self.dataset_type) _log.info("Output file is [%s]", path) if os.path.exists(path): if self.overwrite: _log.info("Removing existing output file [%s]", path) os.remove(path) else: _log.error("Output file [%s] exists", path) raise Exception("Output file [%s] already exists" % path) # TODO bands = get_bands(self.dataset_type, self.satellites[0]) # TODO once WOFS is in the cube tiles = list_tiles_as_list(x=[self.x], y=[self.y], acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type], database=config.get_db_database(), user=config.get_db_username(), password=config.get_db_password(), host=config.get_db_host(), port=config.get_db_port()) raster = None metadata = None # TODO - PQ is UNIT16 (others are INT16) and so -999 NDV doesn't work ndv = self.dataset_type == DatasetType.PQ25 and UINT16_MAX or NDV _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) import itertools for x, y in itertools.product(range(0, 4000, self.chunk_size_x), range(0, 4000, self.chunk_size_y)): _log.info("About to read data chunk ({xmin:4d},{ymin:4d}) to ({xmax:4d},{ymax:4d})".format(xmin=x, ymin=y, xmax=x+self.chunk_size_x-1, ymax=y+self.chunk_size_y-1)) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) stack = dict() for tile in tiles: if self.list_only: _log.info("Would summarise dataset [%s]", tile.datasets[self.dataset_type].path) continue pqa = None _log.debug("Reading dataset [%s]", tile.datasets[self.dataset_type].path) if not metadata: metadata = get_dataset_metadata(tile.datasets[self.dataset_type]) # Apply PQA if specified if self.apply_pqa_filter: data = get_dataset_data_with_pq(tile.datasets[self.dataset_type], tile.datasets[DatasetType.PQ25], bands=bands, x=x, y=y, x_size=self.chunk_size_x, y_size=self.chunk_size_y, pq_masks=self.pqa_mask, ndv=ndv) else: data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands, x=x, y=y, x_size=self.chunk_size_x, y_size=self.chunk_size_y) for band in bands: if band in stack: stack[band].append(data[band]) else: stack[band] = [data[band]] _log.debug("data[%s] has shape [%s] and MB [%s]", band.name, numpy.shape(data[band]), data[band].nbytes/1000/1000) _log.debug("stack[%s] has [%s] elements", band.name, len(stack[band])) # Apply summary method _log.info("Finished reading {count} datasets for chunk ({xmin:4d},{ymin:4d}) to ({xmax:4d},{ymax:4d}) - about to summarise them".format(count=len(tiles), xmin=x, ymin=y, xmax=x+self.chunk_size_x-1, ymax=y+self.chunk_size_y-1)) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) masked_stack = dict() for band in bands: masked_stack[band] = numpy.ma.masked_equal(stack[band], ndv) _log.debug("masked_stack[%s] is %s", band.name, masked_stack[band]) _log.debug("masked stack[%s] has shape [%s] and MB [%s]", band.name, numpy.shape(masked_stack[band]), masked_stack[band].nbytes/1000/1000) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) if self.summary_method == TimeSeriesSummaryMethod.MIN: masked_summary = numpy.min(masked_stack[band], axis=0) elif self.summary_method == TimeSeriesSummaryMethod.MAX: masked_summary = numpy.max(masked_stack[band], axis=0) elif self.summary_method == TimeSeriesSummaryMethod.MEAN: masked_summary = numpy.mean(masked_stack[band], axis=0) elif self.summary_method == TimeSeriesSummaryMethod.MEDIAN: masked_summary = numpy.median(masked_stack[band], axis=0) # aka 50th percentile elif self.summary_method == TimeSeriesSummaryMethod.MEDIAN_NON_INTERPOLATED: masked_sorted = numpy.ma.sort(masked_stack[band], axis=0) masked_percentile_index = numpy.ma.floor(numpy.ma.count(masked_sorted, axis=0) * 0.95).astype(numpy.int16) masked_summary = numpy.ma.choose(masked_percentile_index, masked_sorted) elif self.summary_method == TimeSeriesSummaryMethod.COUNT: # TODO Need to artificially create masked array here since it is being expected/filled below!!! masked_summary = numpy.ma.masked_equal(masked_stack[band].count(axis=0), ndv) elif self.summary_method == TimeSeriesSummaryMethod.SUM: masked_summary = numpy.sum(masked_stack[band], axis=0) elif self.summary_method == TimeSeriesSummaryMethod.STANDARD_DEVIATION: masked_summary = numpy.std(masked_stack[band], axis=0) elif self.summary_method == TimeSeriesSummaryMethod.VARIANCE: masked_summary = numpy.var(masked_stack[band], axis=0) # currently 95th percentile elif self.summary_method == TimeSeriesSummaryMethod.PERCENTILE: masked_sorted = numpy.ma.sort(masked_stack[band], axis=0) masked_percentile_index = numpy.ma.floor(numpy.ma.count(masked_sorted, axis=0) * 0.95).astype(numpy.int16) masked_summary = numpy.ma.choose(masked_percentile_index, masked_sorted) elif self.summary_method == TimeSeriesSummaryMethod.YOUNGEST_PIXEL: # TODO the fact that this is band at a time might be problematic. We really should be considering # all bands at once (that is what the landsat_mosaic logic did). If PQA is being applied then # it's probably all good but if not then we might get odd results.... masked_summary = empty_array(shape=(self.chunk_size_x, self.chunk_size_x), dtype=numpy.int16, ndv=ndv) # Note the reversed as the stack is created oldest first for d in reversed(stack[band]): masked_summary = numpy.where(masked_summary == ndv, d, masked_summary) # If the summary doesn't contain an no data values then we can stop if not numpy.any(masked_summary == ndv): break # TODO Need to artificially create masked array here since it is being expected/filled below!!! masked_summary = numpy.ma.masked_equal(masked_summary, ndv) elif self.summary_method == TimeSeriesSummaryMethod.OLDEST_PIXEL: # TODO the fact that this is band at a time might be problematic. We really should be considering # all bands at once (that is what the landsat_mosaic logic did). If PQA is being applied then # it's probably all good but if not then we might get odd results.... masked_summary = empty_array(shape=(self.chunk_size_x, self.chunk_size_x), dtype=numpy.int16, ndv=ndv) # Note the NOT reversed as the stack is created oldest first for d in stack[band]: masked_summary = numpy.where(masked_summary == ndv, d, masked_summary) # If the summary doesn't contain an no data values then we can stop if not numpy.any(masked_summary == ndv): break # TODO Need to artificially create masked array here since it is being expected/filled below!!! masked_summary = numpy.ma.masked_equal(masked_summary, ndv) masked_stack[band] = None _log.debug("NONE-ing masked stack[%s]", band.name) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) _log.debug("masked summary is [%s]", masked_summary) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) # Create the output file if not os.path.exists(path): _log.info("Creating raster [%s]", path) driver = gdal.GetDriverByName("GTiff") assert driver raster = driver.Create(path, metadata.shape[0], metadata.shape[1], len(bands), gdal.GDT_Int16) assert raster raster.SetGeoTransform(metadata.transform) raster.SetProjection(metadata.projection) for b in bands: raster.GetRasterBand(b.value).SetNoDataValue(ndv) _log.info("Writing band [%s] data to raster [%s]", band.name, path) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) raster.GetRasterBand(band.value).WriteArray(masked_summary.filled(ndv), xoff=x, yoff=y) raster.GetRasterBand(band.value).ComputeStatistics(True) raster.FlushCache() masked_summary = None _log.debug("NONE-ing the masked summary") _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) stack = None _log.debug("Just NONE-ed the stack") _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) raster = None _log.debug("Just NONE'd the raster") _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) _log.info("Memory usage was [%d MB]", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) _log.info("CPU time used [%s]", timedelta(seconds=int(resource.getrusage(resource.RUSAGE_SELF).ru_utime)))
def doit(self): shape = (4000, 4000) no_data_value = NDV best_pixel_data = dict() # TODO if Satellite.LS8.value in self.satellites: bands = Ls8Arg25Bands else: bands = Ls57Arg25Bands for band in bands: best_pixel_data[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=no_data_value) best_pixel_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) # best_pixel_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) best_pixel_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) current_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) # current_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) current_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) metadata = None SATELLITE_DATA_VALUES = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} for tile in self.get_tiles(sort=SortType.DESC): # Get ARG25 dataset dataset = tile.datasets[DatasetType.ARG25] _log.info("Processing ARG tile [%s]", dataset.path) if not metadata: metadata = get_dataset_metadata(dataset) band_data = None if self.apply_pq_filter: band_data = get_dataset_data_with_pq(dataset, tile.datasets[DatasetType.PQ25]) else: band_data = get_dataset_data(dataset) # Create the provenance datasets # NOTE: need to do this BEFORE selecting the pixel since it is actually using the fact that the # selected pixel currently doesn't have a value # NOTE: band values are propagated "as a job lot" so can just check any band # TODO better way than just saying....RED....? band = bands.RED # Satellite current_satellite.fill(SATELLITE_DATA_VALUES[dataset.satellite]) best_pixel_satellite = numpy.where(best_pixel_data[band] == no_data_value, current_satellite, best_pixel_satellite) # # Epoch dataset # # current_epoch.fill(calendar.timegm(tile.end_datetime.timetuple())) # best_pixel_epoch = numpy.where(best_pixel_data[band] == no_data_value, current_epoch, best_pixel_epoch) # Date dataset (20150101) current_date.fill(tile.end_datetime.year * 10000 + tile.end_datetime.month * 100 + tile.end_datetime.day) best_pixel_date = numpy.where(best_pixel_data[band] == no_data_value, current_date, best_pixel_date) for band in bands: data = band_data[band] # _log.debug("data = \n%s", data) # Replace any NO DATA best pixels with data pixels # TODO should I explicitly do the AND data is not NO DATA VALUE? best_pixel_data[band] = numpy.where(best_pixel_data[band] == no_data_value, data, best_pixel_data[band]) # _log.debug("best pixel = \n%s", best_pixel_data[band]) still_no_data = numpy.any(numpy.array([best_pixel_data[b] for b in bands]) == no_data_value) # _log.debug("still no data pixels = %s", still_no_data) if not still_no_data: break # Now want to mask out values in the provenance datasets if we haven't actually got a value # TODO better way than just saying....RED....? band = bands.RED mask = numpy.ma.masked_equal(best_pixel_data[band], NDV).mask best_pixel_satellite = numpy.ma.array(best_pixel_satellite, mask=mask).filled(NDV) # best_pixel_epoch = numpy.ma.array(best_pixel_epoch, mask=mask).fill(NDV) best_pixel_date = numpy.ma.array(best_pixel_date, mask=mask).filled(NDV) # Composite NBAR dataset raster_create(self.get_output_path("NBAR"), [best_pixel_data[b] for b in bands], metadata.transform, metadata.projection, NDV, gdal.GDT_Int16) # Provenance (satellite) dataset raster_create(self.get_output_path("SAT"), [best_pixel_satellite], metadata.transform, metadata.projection, no_data_value, gdal.GDT_Int16) # # Provenance (epoch) dataset # # raster_create(self.get_output_path("EPOCH"), # [best_pixel_epoch], # metadata.transform, metadata.projection, no_data_value, # gdal.GDT_Int32) # Provenance (day of month) dataset raster_create(self.get_output_path("DATE"), [best_pixel_date], metadata.transform, metadata.projection, no_data_value, gdal.GDT_Int32)
def doit(self): _log.debug("Bare Soil Cell Task - doit()") shape = (4000, 4000) no_data_value = NDV best_pixel_fc = dict() for band in Fc25Bands: best_pixel_fc[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=INT16_MIN) best_pixel_nbar = dict() for band in Ls57Arg25Bands: best_pixel_nbar[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) best_pixel_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) best_pixel_year = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) best_pixel_month = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) best_pixel_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) current_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) current_year = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) current_month = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) current_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) SATELLITE_DATA_VALUES = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} metadata_nbar = None metadata_fc = None for tile in self.get_tiles(): # Get the PQ mask pq = tile.datasets[DatasetType.PQ25] data_pq = get_dataset_data(pq, [Pq25Bands.PQ])[Pq25Bands.PQ] mask_pq = get_pq_mask(data_pq) # Get NBAR dataset nbar = tile.datasets[DatasetType.ARG25] _log.info("Processing NBAR tile [%s]", nbar.path) if not metadata_nbar: metadata_nbar = get_dataset_metadata(nbar) data_nbar = get_dataset_data_with_pq(nbar, Ls57Arg25Bands, tile.datasets[DatasetType.PQ25]) # Get the NDVI mask red = data_nbar[Ls57Arg25Bands.RED] nir = data_nbar[Ls57Arg25Bands.NEAR_INFRARED] ndvi_data = calculate_ndvi(red, nir) ndvi_data = numpy.ma.masked_equal(ndvi_data, NDV) ndvi_data = numpy.ma.masked_outside(ndvi_data, 0, 0.3, copy=False) mask_ndvi = ndvi_data.mask # Get FC25 dataset fc = tile.datasets[DatasetType.FC25] _log.info("Processing FC tile [%s]", fc.path) if not metadata_fc: metadata_fc = get_dataset_metadata(fc) _log.debug("metadata fc is %s", metadata_fc) data_fc = get_dataset_data(fc, Fc25Bands) data_bare_soil = data_fc[Fc25Bands.BS] data_bare_soil = numpy.ma.masked_equal(data_bare_soil, -999) data_bare_soil = numpy.ma.masked_outside(data_bare_soil, 0, 8000) data_bare_soil.mask = (data_bare_soil.mask | mask_pq | mask_ndvi) data_bare_soil = data_bare_soil.filled(NDV) # Compare the bare soil value from this dataset to the current "best" value best_pixel_fc[Fc25Bands.BS] = numpy.fmax(best_pixel_fc[Fc25Bands.BS], data_bare_soil) # Now update the other best pixel datasets/bands to grab the pixels we just selected for band in Ls57Arg25Bands: best_pixel_nbar[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, data_nbar[band], best_pixel_nbar[band]) for band in [Fc25Bands.PV, Fc25Bands.NPV, Fc25Bands.ERROR]: best_pixel_fc[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, data_fc[band], best_pixel_fc[band]) # And now the other "provenance" data current_satellite.fill(SATELLITE_DATA_VALUES[fc.satellite]) best_pixel_satellite = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, current_satellite, best_pixel_satellite) current_year.fill(tile.end_datetime_year) best_pixel_year = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, current_year, best_pixel_year) current_month.fill(tile.end_datetime_month) best_pixel_month = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, current_month, best_pixel_month) current_epoch.fill(calendar.timegm(tile.end_datetime.timetuple())) best_pixel_epoch = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, current_epoch, best_pixel_epoch) # Create the output datasets # FC composite raster_create(self.get_dataset_filename("FC"), [best_pixel_fc[b] for b in Fc25Bands], metadata_fc.transform, metadata_fc.projection, metadata_fc.bands[Fc25Bands.BS].no_data_value, metadata_fc.bands[Fc25Bands.BS].data_type) # NBAR composite raster_create(self.get_dataset_filename("NBAR"), [best_pixel_nbar[b] for b in Ls57Arg25Bands], metadata_nbar.transform, metadata_nbar.projection, metadata_nbar.bands[Ls57Arg25Bands.BLUE].no_data_value, metadata_nbar.bands[Ls57Arg25Bands.BLUE].data_type) # "Provenance" composites raster_create(self.get_dataset_filename("SAT"), [best_pixel_satellite], metadata_nbar.transform, metadata_nbar.projection, no_data_value, gdal.GDT_Int16) raster_create(self.get_dataset_filename("YEAR"), [best_pixel_year], metadata_nbar.transform, metadata_nbar.projection, no_data_value, gdal.GDT_Int16) raster_create(self.get_dataset_filename("MONTH"), [best_pixel_month], metadata_nbar.transform, metadata_nbar.projection, no_data_value, gdal.GDT_Int16) raster_create(self.get_dataset_filename("EPOCH"), [best_pixel_epoch], metadata_nbar.transform, metadata_nbar.projection, no_data_value, gdal.GDT_Int32)
def zonal_stats(dataset, rasterised_fname, dataset_type): """ Computes the Observed Count, Min, Max, Sum and Sum of Squares for the segments defined by the rasterised image. The stats are derived from the `dataset` defined by the `dataset_type`. :param dataset: A class of type `Dataset`. :param rasterised_fname: A string containing the full file pathname of an image containing the rasterised features. These features will be interpreted as segments. :param dataset_type: A class of type `DatasetType`. :return: A `pandas.DataFrame` containing the statistics for each segment and for each raster band contained with the `dataset_type`. """ # Initialiase a blank dataframe headings = ["SID", "Timestamp", "Band", "Observed_Count", "Min", "Max", "Sum", "Sum_of_Squares"] df = pandas.DataFrame(columns=headings, dtype=numpy.float) # Read the rasterised image with rasterio.open(rasterised_fname) as src: img = src.read(1) # Initialise the segment visitor seg_vis = Segments(img) # Do we have any data to analyse??? if seg_vis.n_segments == 0: return df # We need to get the PQ data and the DatasetType of interest pq_ds = dataset.datasets[DatasetType.PQ25] ds = dataset.datasets[dataset_type] timestamp = dataset.start_datetime bands = ds.bands no_data = -999 # TODO have a user choice at the config level to determine which PQ flags # to apply # pq_flags = [PqaMask.PQ_MASK_CLEAR] # The default will msk everything pq_flags = [PqaMask.PQ_MASK_SATURATION, PqaMask.PQ_MASK_CONTIGUITY, PqaMask.PQ_MASK_CLOUD] # cloud and cloud shadow for band in bands: # When the api has a release of get_pq_mask this will have to do # It'll re-compute the PQ every time which is not ideal # Otherwise go back to eotools??? ds_data = (get_dataset_data_with_pq(ds, pq_ds, bands=[band], ndv=no_data)[band]).astype('float') # Set no-data to NaN ds_data[ds_data == no_data] = numpy.nan # Loop over each segment and get the data. # In other instances we may just need the locations for seg_id in seg_vis.ids: data = seg_vis.data(ds_data, segment_id=seg_id) # dimensions of the data which will be 1D dim = data.shape # Returns are 1D arrays, so check if we have an empty array if dim[0] == 0: continue # Empty bin, (no data), skipping # Compute the stats count = numpy.sum(numpy.isfinite(data)) sum_ = numpy.nansum(data) sum_sq = numpy.nansum(data**2) min_ = numpy.nanmin(data) max_ = numpy.nanmax(data) format_dict = {"SID": seg_id, "Timestamp": timestamp, "Band": band.name, "Observed_Count": count, "Min": min_, "Max": max_, "Sum": sum_, "Sum_of_Squares": sum_sq} # Append the stat to the data frame df = df.append(format_dict, ignore_index=True) return df
def classifier(arg25_dataset, pq25_dataset): """ Runs the classifier designed by SF. """ # Get the metadata md = get_dataset_metadata(arg25_dataset) cols, rows = md.shape # Read the data and mask pixels via the PQ dataset data = get_dataset_data_with_pq(arg25_dataset, pq25_dataset) # Get the wetness coefficients and calculate coef = TCI_COEFFICIENTS[arg25_dataset.satellite][TasselCapIndex.WETNESS] wetness = calculate_tassel_cap_index(data, coef) # NDVI ndvi = calculate_ndvi(data[arg25_dataset.bands.RED], data[arg25_dataset.bands.NEAR_INFRARED], output_ndv=numpy.nan) # Dump the reflectance data, the classifier only needs tc_wetness and ndvi del data # Allocate the result classified = numpy.zeros((rows, cols), dtype='uint8') # Water r1 = wetness > 0 classified[r1] = 1 _tmp = ~r1 #r2 = _tmp & ((wetness >= -250) & (wetness < 0)) r2 = (wetness >= -250) & (wetness < 0) r3 = ndvi <= 0.3 #_tmp2 = _tmp & r2 & ~r3 _tmp2 = _tmp & r2 # non-veg classified[_tmp2 & r3] = 2 _tmp3 = _tmp2 & ~r3 r4 = ndvi <= 0.45 # saltmarsh classified[_tmp3 & r4] = 3 _tmp2 = _tmp3 & ~r4 r5 = ndvi <= 0.6 # mangrove/saltmarsh classified[_tmp2 & r5] = 4 # mangrove classified[_tmp2 & ~r5] = 5 # finished rhs of r2 _tmp2 = _tmp & ~r2 r6 = wetness < -750 r7 = ndvi >= 0.3 _tmp3 = _tmp2 & r6 # saltmarsh classified[_tmp3 & r7] = 3 # non-veg classified[_tmp3 & ~r7] = 2 r8 = ndvi <= 0.3 _tmp3 = _tmp2 & ~r6 # non-veg classified[_tmp3 & r8] = 2 r9 = ndvi <= 0.45 _tmp2 = _tmp3 & ~r8 # saltmarsh classified[_tmp2 & r9] = 3 r10 = ndvi <= 0.6 _tmp3 = _tmp2 & ~r9 # mangrove-saltmarsh classified[_tmp3 & r10] = 4 # mangrove classified[_tmp3 & ~r10] = 5 # set any nulls valid = numpy.isfinite(ndvi) classified[~valid] = 0 return classified
def classifier(arg25_dataset, pq25_dataset): """ Runs the classifier designed by SF. """ # Get the metadata md = get_dataset_metadata(arg25_dataset) cols, rows = md.shape # Read the data and mask pixels via the PQ dataset data = get_dataset_data_with_pq(arg25_dataset, pq25_dataset) # Get the wetness coefficients and calculate coef = TCI_COEFFICIENTS[arg25_dataset.satellite][TasselCapIndex.WETNESS] wetness = calculate_tassel_cap_index(data, coef) # NDVI ndvi = calculate_ndvi(data[arg25_dataset.bands.RED], data[arg25_dataset.bands.NEAR_INFRARED], output_ndv=numpy.nan) # Dump the reflectance data, the classifier only needs tc_wetness and ndvi del data # Allocate the result classified = numpy.zeros((rows,cols), dtype='uint8') # Water r1 = wetness > 0 classified[r1] = 1 _tmp = ~r1 #r2 = _tmp & ((wetness >= -250) & (wetness < 0)) r2 = (wetness >= -250) & (wetness < 0) r3 = ndvi <= 0.3 #_tmp2 = _tmp & r2 & ~r3 _tmp2 = _tmp & r2 # non-veg classified[_tmp2 & r3] = 2 _tmp3 = _tmp2 & ~r3 r4 = ndvi <= 0.45 # saltmarsh classified[_tmp3 & r4] = 3 _tmp2 = _tmp3 & ~r4 r5 = ndvi <= 0.6 # mangrove/saltmarsh classified[_tmp2 & r5] = 4 # mangrove classified[_tmp2 & ~r5] = 5 # finished rhs of r2 _tmp2 = _tmp & ~r2 r6 = wetness < -750 r7 = ndvi >= 0.3 _tmp3 = _tmp2 & r6 # saltmarsh classified[_tmp3 & r7] = 3 # non-veg classified[_tmp3 & ~r7] = 2 r8 = ndvi <= 0.3 _tmp3 = _tmp2 & ~r6 # non-veg classified[_tmp3 & r8] = 2 r9 = ndvi <= 0.45 _tmp2 = _tmp3 & ~r8 # saltmarsh classified[_tmp2 & r9] = 3 r10 = ndvi <= 0.6 _tmp3 = _tmp2 & ~r9 # mangrove-saltmarsh classified[_tmp3 & r10] = 4 # mangrove classified[_tmp3 & ~r10] = 5 # set any nulls valid = numpy.isfinite(ndvi) classified[~valid] = 0 return classified