def create_tasks(args): x_list = range(args.x_min, args.x_max + 1) y_list = range(args.y_min, args.y_max + 1) dataset_types = [args.dataset_type] if args.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) from itertools import product if args.file_per_statistic: for (season, band, statistic) in product(args.get_seasons(), args.bands, args.statistics): acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(args.acq_min, args.acq_max, season, seasons=SEASONS, extend=True) for cell in list_cells_as_list(x=x_list, y=y_list, satellites=args.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): yield Arg25EpochStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=season, epochs = list(args.get_epochs()), satellites=args.satellites, dataset_type=args.dataset_type, band=band, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, tidal_workflow=args.tidal_workflow, tidal_ifile=args.tidal_ifile, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistic = statistic, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory) return for (acq_min, acq_max), season in product(args.get_epochs(), args.get_seasons()): _log.debug("acq_min=[%s] acq_max=[%s] season=[%s]", acq_min, acq_max, season.name) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, season, seasons=SEASONS, extend=True) _log.debug("\tacq_min_extended=[%s], acq_max_extended=[%s], criteria=[%s]", acq_min_extended, acq_max_extended, criteria) for cell in list_cells_as_list(x=x_list, y=y_list, satellites=args.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): _log.debug("\t%3d %4d", cell.x, cell.y) #yield args.create_task(x=cell.x, y=cell.y, acq_min=acq_min, acq_max=acq_max, season=season) _log.debug("Creating task for %s %s %s %s %s", cell.x, cell.y, acq_min, acq_max, season) yield Arg25BandStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=season, satellites=args.satellites, dataset_type=args.dataset_type, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory)
def create_tasks(args): x_list = range(args.x_min, args.x_max + 1) y_list = range(args.y_min, args.y_max + 1) dataset_types = [args.dataset_type] if args.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) from itertools import product if args.file_per_statistic: for (season, band, statistic) in product(args.get_seasons(), args.bands, args.statistics): acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(args.acq_min, args.acq_max, season, seasons=SEASONS, extend=True) for cell in list_cells_as_list(x=x_list, y=y_list, satellites=args.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): yield Arg25EpochStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=season, epochs = list(args.get_epochs()), satellites=args.satellites, dataset_type=args.dataset_type, band=band, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistic = statistic, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory) return for (acq_min, acq_max), season in product(args.get_epochs(), args.get_seasons()): _log.debug("acq_min=[%s] acq_max=[%s] season=[%s]", acq_min, acq_max, season.name) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, season, seasons=SEASONS, extend=True) _log.debug("\tacq_min_extended=[%s], acq_max_extended=[%s], criteria=[%s]", acq_min_extended, acq_max_extended, criteria) for cell in list_cells_as_list(x=x_list, y=y_list, satellites=args.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): _log.debug("\t%3d %4d", cell.x, cell.y) #yield args.create_task(x=cell.x, y=cell.y, acq_min=acq_min, acq_max=acq_max, season=season) _log.debug("Creating task for %s %s %s %s %s", cell.x, cell.y, acq_min, acq_max, season) yield Arg25BandStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=season, satellites=args.satellites, dataset_type=args.dataset_type, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory)
def requires(self): dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) for (acq_min, acq_max) in self.epochs: _log.debug("acq_min=[%s] acq_max=[%s] season=[%s]", acq_min, acq_max, self.season.name) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, self.season, seasons=SEASONS, extend=True) _log.debug("\tacq_min_extended=[%s], acq_max_extended=[%s], criteria=[%s]", acq_min_extended, acq_max_extended, criteria) for cell in list_cells_as_list(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): _log.debug("\t%3d %4d", cell.x, cell.y) # yield args.create_task(x=cell.x, y=cell.y, acq_min=acq_min, acq_max=acq_max, season=season) _log.debug("Creating task for %s %s %s %s %s", cell.x, cell.y, acq_min, acq_max, self.season) yield Arg25BandStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=self.season, satellites=args.satellites, dataset_type=args.dataset_type, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, tidal_workflow=args.tidal_workflow, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory)
def requires(self): dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) for (acq_min, acq_max) in self.epochs: _log.debug("acq_min=[%s] acq_max=[%s] season=[%s]", acq_min, acq_max, self.season.name) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, self.season, seasons=SEASONS, extend=True) _log.debug("\tacq_min_extended=[%s], acq_max_extended=[%s], criteria=[%s]", acq_min_extended, acq_max_extended, criteria) for cell in list_cells_as_list(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): _log.debug("\t%3d %4d", cell.x, cell.y) # yield args.create_task(x=cell.x, y=cell.y, acq_min=acq_min, acq_max=acq_max, season=season) _log.debug("Creating task for %s %s %s %s %s", cell.x, cell.y, acq_min, acq_max, self.season) yield Arg25BandStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=self.season, satellites=args.satellites, dataset_type=args.dataset_type, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory)
def test_list_cells_120_020_2005_ls578_summer(config=None): filename = "cells_120_020_2005_ls578_summer.csv" acq_min, acq_max, include = build_season_date_criteria(ACQ_MIN_2005, ACQ_MAX_2005, Season.SUMMER) list_cells_to_file(x=[CELL_X], y=[CELL_Y], acq_min=acq_min, acq_max=acq_max, satellites=SATELLITE_LS578, dataset_types=DATASET_TYPE_ARG25, filename=filename, include=include, config=config) assert filecmp.cmp(filename, get_test_data_path(filename))
def get_tiles(x, y, satellites, acq_min, acq_max, season, dataset_type, mask_pqa_apply): acq_min, acq_max, criteria = build_season_date_criteria(acq_min, acq_max, season, seasons=SEASONS, extend=True) dataset_types = [dataset_type] if mask_pqa_apply: dataset_types.append(DatasetType.PQ25) tiles = list_tiles_as_list(x=[x], y=[y], satellites=satellites, acq_min=acq_min, acq_max=acq_max, dataset_types=dataset_types, include=criteria) return tiles
def test_list_tiles_120_020_2000_2010_ls578_summer_arg25_stats(config=None): filename = "tiles_120_020_2000_2010_ls578_summer_arg25_stats.csv" acq_min, acq_max, include = build_season_date_criteria(ACQ_MIN_2000, ACQ_MAX_2010, Season.SUMMER, seasons=SEASONS_ARG25_STATS) list_tiles_to_file(x=[CELL_X], y=[CELL_Y], acq_min=acq_min, acq_max=acq_max, satellites=SATELLITE_LS578, dataset_types=DATASET_TYPE_ARG25_FC25_PQ25, filename=filename, include=include, config=config) assert filecmp.cmp(filename, get_test_data_path(filename))
def get_tiles(self): acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season, seasons=SEASONS, extend=True) _log.info("\tcriteria is %s", criteria) dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) tiles = list_tiles_as_list(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min, acq_max=acq_max, dataset_types=dataset_types, include=criteria) return tiles
def test_list_cells_120_020_2000_2010_ls578_summer_arg25_stats(config=None): filename = "cells_120_020_2000_2010_ls578_wofs_summer_arg25_stats.csv" acq_min = parse_date_min("2000") acq_max = parse_date_max("2010") acq_min, acq_max, include = build_season_date_criteria(acq_min, acq_max, Season.SUMMER, seasons=SEASONS_ARG25_STATS) list_cells_to_file(x=[CELL_X], y=[CELL_Y], acq_min=acq_min, acq_max=acq_max, satellites=SATELLITE_LS578, dataset_types=DATASET_TYPE_WOFS, filename=filename, include=include, config=config) assert filecmp.cmp(filename, get_test_data_path(filename))
def test_query(): workflow = Arg25BandStatisticsWorkflow() workflow.x_min = workflow.x_max = TEST_X workflow.y_min = workflow.y_max = TEST_Y workflow.acq_min = parse_date_min("1985") workflow.acq_max = parse_date_max("2014") workflow.epoch = EpochParameter(5, 6) workflow.seasons = Season workflow.seasons = [Season.SUMMER] workflow.satellites = [Satellite.LS5, Satellite.LS7] workflow.mask_pqa_apply = True workflow.mask_pqa_mask = [PqaMask.PQ_MASK_SATURATION, PqaMask.PQ_MASK_CONTIGUITY, PqaMask.PQ_MASK_CLOUD] workflow.dataset_type = DatasetType.ARG25 workflow.bands = Ls57Arg25Bands epochs = list(workflow.get_epochs()) print "" print "epochs are", epochs for season, epoch in product(workflow.seasons, epochs): print season, epoch from datacube.api.utils import build_season_date_criteria acq_min, acq_max, criteria = build_season_date_criteria(epoch[0], epoch[1], season, seasons=SEASONS, extend=True) print acq_min, acq_max, criteria from datacube.api.query import list_tiles_as_list tiles = list_tiles_as_list(x=[workflow.x_min], y=[workflow.y_min], satellites=workflow.satellites, acq_min=acq_min, acq_max=acq_max, dataset_types=[workflow.dataset_type], include=criteria) print "Tiles found is ", len(tiles)
def run(self): _log.info("*** Aggregating chunk NPY files into TIF") ndv = get_dataset_type_ndv(self.dataset_type) # TODO transform = (self.x, 0.00025, 0.0, self.y + 1, 0.0, -0.00025) srs = osr.SpatialReference() srs.ImportFromEPSG(4326) projection = srs.ExportToWkt() driver = gdal.GetDriverByName("GTiff") assert driver # Create the output TIF # TODO gdal_type = gdal.GDT_Int16 if self.dataset_type == DatasetType.NDVI and self.statistic not in [ Statistic.COUNT, Statistic.COUNT_OBSERVED ]: gdal_type = gdal.GDT_Float32 raster = driver.Create( self.output().path, 4000, 4000, len(self.epochs), gdal_type, options=["INTERLEAVE=BAND", "COMPRESS=LZW", "TILED=YES"]) assert raster # TODO raster.SetGeoTransform(transform) raster.SetProjection(projection) raster.SetMetadata(self.generate_raster_metadata()) from itertools import product from datetime import date for index, (acq_min, acq_max) in enumerate(self.epochs, start=1): _log.info( "Doing band [%s] statistic [%s] which is band number [%s]", self.band.name, self.statistic.name, index) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria( acq_min, acq_max, self.season, seasons=SEASONS, extend=True) band = raster.GetRasterBand(index) assert band season = SEASONS[self.season] acq_min_str = date(acq_min_extended.year, season[0][0].value, season[0][1]).strftime("%Y%m%d") acq_max_str = acq_max_extended.strftime("%Y%m%d") # TODO band.SetNoDataValue(ndv) band.SetDescription("{band} {stat} {start}-{end}".format( band=self.band.name, stat=self.statistic.name, start=acq_min_str, end=acq_max_str)) for x_offset, y_offset in product( range(0, 4000, self.x_chunk_size), range(0, 4000, self.y_chunk_size)): filename = self.get_statistic_filename(acq_min_extended, acq_max_extended, x_offset, y_offset) _log.info("Processing chunk [%4d|%4d] for [%s] from [%s]", x_offset, y_offset, self.statistic.name, filename) # read the chunk try: data = numpy.load(filename) except IOError: _log.info("Failed to load chunk") continue _log.info("data is [%s]\n[%s]", numpy.shape(data), data) _log.info("Writing it to (%d,%d)", x_offset, y_offset) # write the chunk to the TIF at the offset band.WriteArray(data, x_offset, y_offset) band.FlushCache() band.ComputeStatistics(True) band.FlushCache() del band raster.FlushCache() del raster
def run(self): _log.info("*** Aggregating chunk NPY files into TIF") ndv = get_dataset_type_ndv(self.dataset_type) # TODO transform = (self.x, 0.00025, 0.0, self.y+1, 0.0, -0.00025) srs = osr.SpatialReference() srs.ImportFromEPSG(4326) projection = srs.ExportToWkt() driver = gdal.GetDriverByName("GTiff") assert driver # Create the output TIF # TODO gdal_type = gdal.GDT_Int16 if self.dataset_type == DatasetType.NDVI and self.statistic not in [Statistic.COUNT, Statistic.COUNT_OBSERVED]: gdal_type = gdal.GDT_Float32 raster = driver.Create(self.output().path, 4000, 4000, len(self.epochs), gdal_type, options=["INTERLEAVE=BAND", "COMPRESS=LZW", "TILED=YES"]) assert raster # TODO raster.SetGeoTransform(transform) raster.SetProjection(projection) raster.SetMetadata(self.generate_raster_metadata()) from itertools import product from datetime import date for index, (acq_min, acq_max) in enumerate(self.epochs, start=1): _log.info("Doing band [%s] statistic [%s] which is band number [%s]", self.band.name, self.statistic.name, index) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, self.season, seasons=SEASONS, extend=True) band = raster.GetRasterBand(index) assert band season = SEASONS[self.season] acq_min_str = date(acq_min_extended.year, season[0][0].value, season[0][1]).strftime("%Y%m%d") acq_max_str = acq_max_extended.strftime("%Y%m%d") # TODO band.SetNoDataValue(ndv) band.SetDescription("{band} {stat} {start}-{end}".format(band=self.band.name, stat=self.statistic.name, start=acq_min_str, end=acq_max_str)) for x_offset, y_offset in product(range(0, 4000, self.x_chunk_size), range(0, 4000, self.y_chunk_size)): filename = self.get_statistic_filename(acq_min_extended, acq_max_extended, x_offset, y_offset) _log.info("Processing chunk [%4d|%4d] for [%s] from [%s]", x_offset, y_offset, self.statistic.name, filename) # read the chunk try: data = numpy.load(filename) except IOError: _log.info("Failed to load chunk") continue _log.info("data is [%s]\n[%s]", numpy.shape(data), data) _log.info("Writing it to (%d,%d)", x_offset, y_offset) # write the chunk to the TIF at the offset band.WriteArray(data, x_offset, y_offset) band.FlushCache() band.ComputeStatistics(True) band.FlushCache() del band raster.FlushCache() del raster