def create_tasks(self): x_list = range(self.x_min, self.x_max + 1) y_list = range(self.y_min, self.y_max + 1) dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) from itertools import product for (acq_min, acq_max), season in product(self.get_epochs(), self.get_seasons()): _log.info("%s %s %s", acq_min, acq_max, season) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, season, seasons=SEASONS, extend=True) _log.info("\tcriteria is %s", criteria) for cell in list_cells_as_generator(x=x_list, y=y_list, satellites=self.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): _log.info("\t%3d %4d", cell.x, cell.y) yield self.create_task(x=cell.x, y=cell.y, acq_min=acq_min, acq_max=acq_max, season=season)
def create_tasks(self): x_list = range(self.x_min, self.x_max + 1) y_list = range(self.y_min, self.y_max + 1) dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) from itertools import product for (acq_min, acq_max), season in product(self.get_epochs(), self.get_seasons()): _log.info("%s %s %s", acq_min, acq_max, season) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria( acq_min, acq_max, season, seasons=SEASONS, extend=True) _log.info("\tcriteria is %s", criteria) for cell in list_cells_as_generator(x=x_list, y=y_list, satellites=self.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): _log.info("\t%3d %4d", cell.x, cell.y) yield self.create_task(x=cell.x, y=cell.y, acq_min=acq_min, acq_max=acq_max, season=season)
def run(self): _log.info("Creating stack for band [%s]", self.band.name) data_type = get_dataset_type_datatype(self.dataset_type) ndv = get_dataset_type_ndv(self.dataset_type) metadata = None driver = None raster = None acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season, seasons=SEASONS, extend=True) _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria) dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) tiles = list_tiles_as_list(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min, acq_max=acq_max, dataset_types=dataset_types, include=criteria) for index, tile in enumerate(tiles, start=1): dataset = tile.datasets[self.dataset_type] assert dataset # band = dataset.bands[self.band] # assert band band = self.band pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets) and tile.datasets[DatasetType.PQ25] or None if self.dataset_type not in tile.datasets: _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime) continue filename = self.output().path if not metadata: metadata = get_dataset_metadata(dataset) assert metadata if not driver: if self.output_format == OutputFormat.GEOTIFF: driver = gdal.GetDriverByName("GTiff") elif self.output_format == OutputFormat.ENVI: driver = gdal.GetDriverByName("ENVI") assert driver if not raster: if self.output_format == OutputFormat.GEOTIFF: raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"]) elif self.output_format == OutputFormat.ENVI: raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["INTERLEAVE=BSQ"]) assert raster # NOTE: could do this without the metadata!! raster.SetGeoTransform(metadata.transform) raster.SetProjection(metadata.projection) raster.SetMetadata(self.generate_raster_metadata()) mask = None if pqa: mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask) _log.info("Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]", band.name, dataset.path, pqa and pqa.path or "", pqa and self.mask_pqa_mask or "", filename) data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv) _log.debug("data is [%s]", data) stack_band = raster.GetRasterBand(index) stack_band.SetDescription(os.path.basename(dataset.path)) stack_band.SetNoDataValue(ndv) stack_band.WriteArray(data[band]) stack_band.ComputeStatistics(True) stack_band.SetMetadata({"ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name}) stack_band.FlushCache() del stack_band if raster: raster.FlushCache() del raster raster = None
def run(self): _log.info("Calculating statistics for chunk") ndv = get_dataset_type_ndv(self.dataset_type) acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season, seasons=SEASONS, extend=True) _log.info("\tcriteria is %s", criteria) dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) tiles = list_tiles_as_generator(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min, acq_max=acq_max, dataset_types=dataset_types, include=criteria) stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv, x=self.x_offset, y=self.y_offset, x_size=self.x_chunk_size, y_size=self.y_chunk_size, mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask) if len(stack) == 0: return # TODO get statistics to be generated from command line argument if Statistic.COUNT in self.statistics: log_mem("Before COUNT") # COUNT print "COUNT" stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat) del stack_stat if Statistic.MIN in self.statistics: log_mem("Before MIN") # MIN print "MIN" stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat) del stack_stat if Statistic.MAX in self.statistics: log_mem("Before MAX") # MAX print "MAX" stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat) del stack_stat if Statistic.MEAN in self.statistics: log_mem("Before MEAN") # MEAN print "MEAN" stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat) del stack_stat for percentile in [Statistic.PERCENTILE_25, Statistic.PERCENTILE_50, Statistic.PERCENTILE_75, Statistic.PERCENTILE_90, Statistic.PERCENTILE_95]: if percentile in self.statistics: log_mem("Before {p}".format(p=percentile.name)) print "Before {p}".format(p=percentile.name) stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile], ndv=ndv) numpy.save(self.get_statistic_filename(percentile), stack_stat) del stack_stat if Statistic.COUNT_OBSERVED in self.statistics: log_mem("Before OBSERVED COUNT") # COUNT OBSERVED - note the copy=False is modifying the array so this is done last print "COUNT OBSERVED" stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat) del stack_stat log_mem("DONE")
def run(self): _log.info("Creating stack for band [%s]", self.band.name) data_type = get_dataset_type_datatype(self.dataset_type) ndv = get_dataset_type_ndv(self.dataset_type) metadata = None driver = None raster = None acq_min, acq_max, criteria = build_season_date_criteria( self.acq_min, self.acq_max, self.season, seasons=SEASONS, extend=True) _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria) dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) tiles = list_tiles_as_list(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min, acq_max=acq_max, dataset_types=dataset_types, include=criteria) for index, tile in enumerate(tiles, start=1): dataset = tile.datasets[self.dataset_type] assert dataset # band = dataset.bands[self.band] # assert band band = self.band pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets ) and tile.datasets[DatasetType.PQ25] or None if self.dataset_type not in tile.datasets: _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime) continue filename = self.output().path if not metadata: metadata = get_dataset_metadata(dataset) assert metadata if not driver: if self.output_format == OutputFormat.GEOTIFF: driver = gdal.GetDriverByName("GTiff") elif self.output_format == OutputFormat.ENVI: driver = gdal.GetDriverByName("ENVI") assert driver if not raster: if self.output_format == OutputFormat.GEOTIFF: raster = driver.Create( filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"]) elif self.output_format == OutputFormat.ENVI: raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["INTERLEAVE=BSQ"]) assert raster # NOTE: could do this without the metadata!! raster.SetGeoTransform(metadata.transform) raster.SetProjection(metadata.projection) raster.SetMetadata(self.generate_raster_metadata()) mask = None if pqa: mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask) _log.info( "Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]", band.name, dataset.path, pqa and pqa.path or "", pqa and self.mask_pqa_mask or "", filename) data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv) _log.debug("data is [%s]", data) stack_band = raster.GetRasterBand(index) stack_band.SetDescription(os.path.basename(dataset.path)) stack_band.SetNoDataValue(ndv) stack_band.WriteArray(data[band]) stack_band.ComputeStatistics(True) stack_band.SetMetadata({ "ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name }) stack_band.FlushCache() del stack_band if raster: raster.FlushCache() del raster raster = None