def test_get_band_stack_filename_seasons_ord_wet(): season = SeasonParameter("WET", (Month.NOVEMBER, 1), (Month.MARCH, 31)) assert get_dataset_band_stack_filename( satellites=[Satellite.LS5], dataset_type=DatasetType.ARG25, band=Ls57Arg25Bands.BLUE, x=120, y=-20, acq_min=parse_date_min("2000"), acq_max=parse_date_max("2005"), season=season, mask_pqa_apply=False, mask_wofs_apply=False, mask_vector_apply=False, output_format=OutputFormat.GEOTIFF) == "LS5_NBAR_120_-020_2000_01_01_2005_12_31_WET_NOV_01_MAR_31_BLUE_STACK.tif" assert get_dataset_band_stack_filename( satellites=[Satellite.LS5, Satellite.LS7], dataset_type=DatasetType.ARG25, band=Ls57Arg25Bands.BLUE, x=120, y=-20, season=season, acq_min=parse_date_min("2000"), acq_max=parse_date_max("2005"), mask_pqa_apply=False, mask_wofs_apply=False, mask_vector_apply=False, output_format=OutputFormat.GEOTIFF) == "LS57_NBAR_120_-020_2000_01_01_2005_12_31_WET_NOV_01_MAR_31_BLUE_STACK.tif" assert get_dataset_band_stack_filename( satellites=[Satellite.LS5, Satellite.LS7, Satellite.LS8], dataset_type=DatasetType.ARG25, band=Ls57Arg25Bands.BLUE, x=120, y=-20, acq_min=parse_date_min("2000"), acq_max=parse_date_max("2005"), season=season, mask_pqa_apply=False, mask_wofs_apply=False, mask_vector_apply=False, output_format=OutputFormat.GEOTIFF) == "LS578_NBAR_120_-020_2000_01_01_2005_12_31_WET_NOV_01_MAR_31_BLUE_STACK.tif"
def test_get_band_stack_filename_seasons_bom(): season = Season.SUMMER season_start = SEASONS[season][0] season_end = SEASONS[season][1] season = SeasonParameter(season.name, (season_start[0], season_start[1]), (season_end[0], season_end[1])) assert get_dataset_band_stack_filename( satellites=[Satellite.LS5], dataset_type=DatasetType.ARG25, band=Ls57Arg25Bands.BLUE, x=120, y=-20, acq_min=parse_date_min("2000"), acq_max=parse_date_max("2005"), season=season, mask_pqa_apply=False, mask_wofs_apply=False, mask_vector_apply=False, output_format=OutputFormat.GEOTIFF) == "LS5_NBAR_120_-020_2000_01_01_2005_12_31_SUMMER_DEC_01_FEB_31_BLUE_STACK.tif" assert get_dataset_band_stack_filename( satellites=[Satellite.LS5, Satellite.LS7], dataset_type=DatasetType.ARG25, band=Ls57Arg25Bands.BLUE, x=120, y=-20, season=season, acq_min=parse_date_min("2000"), acq_max=parse_date_max("2005"), mask_pqa_apply=False, mask_wofs_apply=False, mask_vector_apply=False, output_format=OutputFormat.GEOTIFF) == "LS57_NBAR_120_-020_2000_01_01_2005_12_31_SUMMER_DEC_01_FEB_31_BLUE_STACK.tif" assert get_dataset_band_stack_filename( satellites=[Satellite.LS5, Satellite.LS7, Satellite.LS8], dataset_type=DatasetType.ARG25, band=Ls57Arg25Bands.BLUE, x=120, y=-20, acq_min=parse_date_min("2000"), acq_max=parse_date_max("2005"), season=season, mask_pqa_apply=False, mask_wofs_apply=False, mask_vector_apply=False, output_format=OutputFormat.GEOTIFF) == "LS578_NBAR_120_-020_2000_01_01_2005_12_31_SUMMER_DEC_01_FEB_31_BLUE_STACK.tif"
def process_arguments(self, args): self.output_directory = args.output_directory self.x_min = args.x_min self.x_max = args.x_max self.y_min = args.y_min self.y_max = args.y_max self.acq_min = parse_date_min(args.acq_min) self.acq_max = parse_date_max(args.acq_max) self.satellites = args.satellite self.csv = args.csv self.dummy = args.dummy self.mask_pqa_apply = args.mask_pqa_apply self.mask_pqa_mask = args.mask_pqa_mask self.mask_wofs_apply = args.mask_wofs_apply self.mask_wofs_mask = args.mask_wofs_mask self.local_scheduler = args.local_scheduler self.workers = args.workers _log.setLevel(args.log_level)
def test_get_epochs_5_6(): workflow = Arg25BandStatisticsWorkflow() workflow.x_min = workflow.x_max = TEST_X workflow.y_min = workflow.y_max = TEST_Y workflow.acq_min = parse_date_min("1985") workflow.acq_max = parse_date_max("2014") workflow.epoch = EpochParameter(5, 6) workflow.seasons = Season epochs = list(workflow.get_epochs()) EXPECTED_EPOCHS = [ (date(1985, 1, 1), date(1990, 12, 31)), (date(1990, 1, 1), date(1995, 12, 31)), (date(1995, 1, 1), date(2000, 12, 31)), (date(2000, 1, 1), date(2005, 12, 31)), (date(2005, 1, 1), date(2010, 12, 31)), (date(2010, 1, 1), date(2014, 12, 31)) ] assert epochs == EXPECTED_EPOCHS
def process_arguments(self, args): # # Call method on super class # # super(self.__class__, self).process_arguments(args) # workflow.Workflow.process_arguments(self, args) self.x_min, self.x_max = args.x self.y_min, self.y_max = args.y self.output_directory = args.output_directory self.acq_min = parse_date_min(args.acq[0]) self.acq_max = parse_date_max(args.acq[1]) self.satellites = args.satellites self.epoch = args.epoch self.seasons = args.season self.mask_pqa_apply = args.mask_pqa_apply self.mask_pqa_mask = args.mask_pqa_mask self.local_scheduler = args.local_scheduler self.workers = args.workers _log.setLevel(args.log_level) self.dataset_type = args.dataset_type self.bands = args.bands self.output_format = args.output_format
def test_list_tiles_120_020_2005_ls578_no_ls8_pre_wrs_2(config=None): dataset_types = [DatasetType.ARG25, DatasetType.PQ25, DatasetType.FC25] tiles = list_tiles_as_list(x=[TEST_CELL_X], y=[TEST_CELL_Y], acq_min=parse_date_min(TEST_YEAR_STR), acq_max=parse_date_max(TEST_YEAR_STR), satellites=[Satellite.LS5, Satellite.LS7, Satellite.LS8], dataset_types=dataset_types, exclude=[LS8_PRE_WRS_2_EXCLUSION], config=config) assert(tiles and len(list(tiles)) > 0) for tile in tiles: _log.info("Found tile xy = %s", tile.xy) dataset = tile.datasets[DatasetType.ARG25] assert dataset _log.info("Found ARG25 dataset [%s]", dataset.path) assert(tile.x == TEST_CELL_X and tile.y == TEST_CELL_Y and tile.xy == (TEST_CELL_X, TEST_CELL_Y) and tile.end_datetime_year == TEST_YEAR and (ds in tile.datasets for ds in dataset_types) and (dataset.satellite != Satellite.LS8 or tile.end_datetime.date() >= LS8_PRE_WRS_2_ACQ_MAX))
def process_arguments(self, args): self.input_directory = args.input_directory self.output_directory = args.output_directory self.acq_min = parse_date_min(args.acq_min) self.acq_max = parse_date_max(args.acq_max) self.satellites = args.satellites if args.epoch: self.epoch = EpochParameter(int(args.epoch[0]), int(args.epoch[1])) self.seasons = args.season self.statistics = args.statistic self.samples = args.samples self.dataset_type = args.dataset_type self.bands = args.bands self.mask_pqa_apply = args.mask_pqa_apply self.mask_pqa_mask = args.mask_pqa_mask _log.setLevel(args.log_level) self.cells = args.cell self.interpolation = args.interpolation
def get_workflow_1985_2014(): workflow = Arg25BandStatisticsWorkflow() workflow.x_min = workflow.x_max = 129 workflow.y_min = workflow.y_max = -26 workflow.acq_min = parse_date_min("1985") workflow.acq_max = parse_date_max("2014") workflow.epoch = EpochParameter(5, 6) workflow.seasons = Season # workflow.seasons = [Season.SPRING] workflow.satellites = [Satellite.LS5, Satellite.LS7] workflow.output_directory = "/tmp" workflow.mask_pqa_apply = True workflow.mask_pqa_mask = [PqaMask.PQ_MASK_SATURATION, PqaMask.PQ_MASK_CONTIGUITY, PqaMask.PQ_MASK_CLOUD] # workflow.local_scheduler = None # workflow.workers = None workflow.dataset_type = DatasetType.ARG25 workflow.bands = Ls57Arg25Bands workflow.x_chunk_size = 4000 workflow.y_chunk_size = 4000 workflow.statistics = [Statistic.PERCENTILE_25, Statistic.PERCENTILE_50, Statistic.PERCENTILE_75] return workflow
def process_arguments(self, args): _log.setLevel(args.log_level) self.acq_min = parse_date_min(args.acq_min) self.acq_max = parse_date_max(args.acq_max) if args.season: self.season = SeasonParameter(args.season[0], parse_season_min(args.season[1]), parse_season_max(args.season[2])) # self.process_min = parse_date_min(args.process_min) # self.process_max = parse_date_max(args.process_max) # # self.ingest_min = parse_date_min(args.ingest_min) # self.ingest_max = parse_date_max(args.ingest_max) self.satellites = args.satellite self.mask_pqa_apply = args.mask_pqa_apply self.mask_pqa_mask = args.mask_pqa_mask self.mask_wofs_apply = args.mask_wofs_apply self.mask_wofs_mask = args.mask_wofs_mask self.include_ls7_slc_off = args.include_ls7_slc_off self.include_ls8_pre_wrs2 = args.include_ls8_pre_wrs2
def process_arguments(self, args): # # Call method on super class # # super(self.__class__, self).process_arguments(args) # workflow.Workflow.process_arguments(self, args) self.x_min = args.x_min self.x_max = args.x_max self.y_min = args.y_min self.y_max = args.y_max self.output_directory = args.output_directory self.acq_min = parse_date_min(args.acq_min) self.acq_max = parse_date_max(args.acq_max) self.satellites = args.satellites if args.epoch: self.epoch = EpochParameter(int(args.epoch[0]), int(args.epoch[1])) self.seasons = args.season self.mask_pqa_apply = args.mask_pqa_apply self.tidal_workflow = args.tidal_workflow self.tidal_ifile = args.tidal_ifile self.mask_pqa_mask = args.mask_pqa_mask self.local_scheduler = args.local_scheduler self.workers = args.workers _log.setLevel(args.log_level) self.dataset_type = args.dataset_type self.bands = args.bands # # Verify that all the requested satellites have the requested bands # # for satellite in self.satellites: # if not all(item in [b.name for b in get_bands(self.dataset_type, satellite)] for item in self.bands): # _log.error("Requested bands [%s] not ALL present for satellite [%s]", self.bands, satellite) # raise Exception("Not all bands present for all satellites") self.x_chunk_size = args.chunk_size_x self.y_chunk_size = args.chunk_size_y self.statistics = args.statistic self.interpolation = args.interpolation self.file_per_statistic = args.file_per_statistic
def test_list_cells_120_020_2005_ls578(config=None): cells = list_cells_as_list(x=[TEST_CELL_X], y=[TEST_CELL_Y], acq_min=parse_date_min(TEST_YEAR_STR), acq_max=parse_date_max(TEST_YEAR_STR), satellites=[Satellite.LS5, Satellite.LS7, Satellite.LS8], dataset_types=[DatasetType.ARG25], config=config) assert(cells and len(list(cells)) > 0) for cell in cells: _log.info("Found cell xy = %s", cell.xy) assert(cell.x == TEST_CELL_X and cell.y == TEST_CELL_Y and cell.xy == (TEST_CELL_X, TEST_CELL_Y))
def test_list_cells_act_2005_ls578(config=None): cells = list_cells_vector_file_as_list(vector_file=TEST_VECTOR_FILE, vector_layer=TEST_VECTOR_LAYER, vector_feature=TEST_VECTOR_FEATURE, satellites=[Satellite.LS5, Satellite.LS7, Satellite.LS8], acq_min=parse_date_min(TEST_YEAR_STR), acq_max=parse_date_max(TEST_YEAR_STR), dataset_types=[DatasetType.ARG25], config=None) assert(cells and len(list(cells)) == 2) for cell in cells: _log.info("Found cell xy = %s", cell.xy) assert((cell.x == 148 or cell.x == 149) and cell.y == -36)
def test_list_tiles_120_020_2005_ls578(config=None): dataset_types = [DatasetType.ARG25, DatasetType.PQ25, DatasetType.FC25] tiles = list_tiles_as_list(x=[TEST_CELL_X], y=[TEST_CELL_Y], acq_min=parse_date_min(TEST_YEAR_STR), acq_max=parse_date_max(TEST_YEAR_STR), satellites=[Satellite.LS5, Satellite.LS7, Satellite.LS8], dataset_types=dataset_types, config=config) assert(tiles and len(list(tiles)) > 0) for tile in tiles: _log.info("Found tile xy = %s", tile.xy) assert(tile.x == TEST_CELL_X and tile.y == TEST_CELL_Y and tile.xy == (TEST_CELL_X, TEST_CELL_Y) and tile.end_datetime_year == TEST_YEAR and ds in tile.datasets for ds in dataset_types)
def test_list_cells_120_020_2000_2010_ls578_summer_arg25_stats(config=None): filename = "cells_120_020_2000_2010_ls578_summer_arg25_stats.csv" acq_min = parse_date_min("2000") acq_max = parse_date_max("2010") acq_min, acq_max, include = build_season_date_criteria(acq_min, acq_max, Season.SUMMER, seasons=SEASONS_ARG25_STATS) list_cells_to_file(x=[CELL_X], y=[CELL_Y], acq_min=acq_min, acq_max=acq_max, satellites=SATELLITE_LS578, dataset_types=DATASET_TYPE_ARG25, filename=filename, include=include, config=config) assert filecmp.cmp(filename, get_test_data_path(filename))
def test_query(): workflow = Arg25BandStatisticsWorkflow() workflow.x_min = workflow.x_max = TEST_X workflow.y_min = workflow.y_max = TEST_Y workflow.acq_min = parse_date_min("1985") workflow.acq_max = parse_date_max("2014") workflow.epoch = EpochParameter(5, 6) workflow.seasons = Season workflow.seasons = [Season.SUMMER] workflow.satellites = [Satellite.LS5, Satellite.LS7] workflow.mask_pqa_apply = True workflow.mask_pqa_mask = [PqaMask.PQ_MASK_SATURATION, PqaMask.PQ_MASK_CONTIGUITY, PqaMask.PQ_MASK_CLOUD] workflow.dataset_type = DatasetType.ARG25 workflow.bands = Ls57Arg25Bands epochs = list(workflow.get_epochs()) print "" print "epochs are", epochs for season, epoch in product(workflow.seasons, epochs): print season, epoch from datacube.api.utils import build_season_date_criteria acq_min, acq_max, criteria = build_season_date_criteria(epoch[0], epoch[1], season, seasons=SEASONS, extend=True) print acq_min, acq_max, criteria from datacube.api.query import list_tiles_as_list tiles = list_tiles_as_list(x=[workflow.x_min], y=[workflow.y_min], satellites=workflow.satellites, acq_min=acq_min, acq_max=acq_max, dataset_types=[workflow.dataset_type], include=criteria) print "Tiles found is ", len(tiles)
def process_arguments(self, args): # # Call method on super class # # super(self.__class__, self).process_arguments(args) # workflow.Workflow.process_arguments(self, args) self.x_min = args.x_min self.x_max = args.x_max self.y_min = args.y_min self.y_max = args.y_max self.output_directory = args.output_directory self.acq_min = parse_date_min(args.acq_min) self.acq_max = parse_date_max(args.acq_max) self.satellites = args.satellites self.epoch = args.epoch self.seasons = args.season self.mask_pqa_apply = args.mask_pqa_apply self.mask_pqa_mask = args.mask_pqa_mask self.local_scheduler = args.local_scheduler self.workers = args.workers _log.setLevel(args.log_level) self.dataset_type = args.dataset_type self.bands = args.bands # # Verify that all the requested satellites have the requested bands # # for satellite in self.satellites: # if not all(item in [b.name for b in get_bands(self.dataset_type, satellite)] for item in self.bands): # _log.error("Requested bands [%s] not ALL present for satellite [%s]", self.bands, satellite) # raise Exception("Not all bands present for all satellites") self.output_format = args.output_format
def process_arguments(self, args): _log.setLevel(args.log_level) self.acq_min = parse_date_min(args.acq_min) self.acq_max = parse_date_max(args.acq_max) # self.process_min = parse_date_min(args.process_min) # self.process_max = parse_date_max(args.process_max) # # self.ingest_min = parse_date_min(args.ingest_min) # self.ingest_max = parse_date_max(args.ingest_max) self.satellites = args.satellite self.mask_pqa_apply = args.mask_pqa_apply self.mask_pqa_mask = args.mask_pqa_mask self.mask_wofs_apply = args.mask_wofs_apply self.mask_wofs_mask = args.mask_wofs_mask
def __init__(self): self.x_min = self.x_max = 140 self.y_min = self.y_max = -36 self.epoch = 5 self.acq_min = parse_date_min("1985") self.acq_max = parse_date_max("2014") self.seasons = [s for s in Season] self.satellites = [Satellite.LS5, Satellite.LS7] self.dataset_type = DatasetType.ARG25 self.bands = [Ls57Arg25Bands.RED, Ls57Arg25Bands.GREEN] self.x_chunk_size = 2000 self.y_chunk_size = 2000 self.mask_pqa_apply = True self.mask_pqa_mask = [PqaMask.PQ_MASK_SATURATION, PqaMask.PQ_MASK_CONTIGUITY, PqaMask.PQ_MASK_CLOUD] self.statistics = STATISTICS
def test_get_band_stack_filename(): assert get_dataset_band_stack_filename( satellites=[Satellite.LS5], dataset_type=DatasetType.ARG25, band=Ls57Arg25Bands.BLUE, x=120, y=-20, acq_min=parse_date_min("2000"), acq_max=parse_date_max("2005"), mask_pqa_apply=False, mask_wofs_apply=False, mask_vector_apply=False, output_format=OutputFormat.GEOTIFF) == "LS5_NBAR_120_-020_2000_01_01_2005_12_31_BLUE_STACK.tif" assert get_dataset_band_stack_filename( satellites=[Satellite.LS5, Satellite.LS7], dataset_type=DatasetType.ARG25, band=Ls57Arg25Bands.BLUE, x=120, y=-20, acq_min=parse_date_min("2000"), acq_max=parse_date_max("2005"), mask_pqa_apply=False, mask_wofs_apply=False, mask_vector_apply=False, output_format=OutputFormat.GEOTIFF) == "LS57_NBAR_120_-020_2000_01_01_2005_12_31_BLUE_STACK.tif" assert get_dataset_band_stack_filename( satellites=[Satellite.LS5, Satellite.LS7, Satellite.LS8], dataset_type=DatasetType.ARG25, band=Ls57Arg25Bands.BLUE, x=120, y=-20, acq_min=parse_date_min("2000"), acq_max=parse_date_max("2005"), mask_pqa_apply=False, mask_wofs_apply=False, mask_vector_apply=False, output_format=OutputFormat.GEOTIFF) == "LS578_NBAR_120_-020_2000_01_01_2005_12_31_BLUE_STACK.tif"
def main(): workflow = Arg25BandStatisticsWorkflow() workflow.x_min = 125 workflow.x_max = 126 workflow.y_min = -35 workflow.y_max = -34 workflow.acq_min = parse_date_min("1985") workflow.acq_max = parse_date_max("2014") workflow.epoch = 5 workflow.seasons = Season # workflow.seasons = [Season.SPRING] workflow.satellites = [Satellite.LS5, Satellite.LS7] workflow.output_directory = "/Users/simon/tmp/cube/output/test/arg25_stats_tasks" workflow.output_directory = "/Users/simon/tmp/cube/output/test/arg25_stats_tasks/ARG25_125_126_-035_-034_1985_2014_SUMMER_AUTUMN_WINTER_SPRING" workflow.mask_pqa_apply = True workflow.mask_pqa_mask = [PqaMask.PQ_MASK_SATURATION, PqaMask.PQ_MASK_CONTIGUITY, PqaMask.PQ_MASK_CLOUD] # workflow.local_scheduler = None # workflow.workers = None workflow.dataset_type = [DatasetType.ARG25] workflow.bands = Ls57Arg25Bands workflow.x_chunk_size = 1000 workflow.y_chunk_size = 1000 workflow.statistics = [Statistic.PERCENTILE_25, Statistic.PERCENTILE_50, Statistic.PERCENTILE_75] from luigi.task import flatten tasks = flatten(workflow.create_tasks()) print tasks for task in tasks: _log.info("task = %s", task) path = os.path.join(workflow.output_directory, task.output().path.replace("_STATS.tif", "")) os.makedirs(path) for output in flatten(task.output()): _log.info("output %s", output.path) # print output.path.replace("_STATS.tif", "") chunk_tasks = flatten(task.requires()) for chunk_task in chunk_tasks: _log.info("chunk task %s", chunk_task) for output in flatten(chunk_task.output()): _log.info("output %s", output.path) # print "\t" + output.path.replace(".npy", "") os.makedirs(os.path.join(path, output.path.replace(".npy", "")))
def test_create_tasks(): TILE_COUNTS = { 1985: {Season.SUMMER: 34, Season.AUTUMN: 29, Season.WINTER: 36, Season.SPRING: 34}, 1990: {Season.SUMMER: 53, Season.AUTUMN: 65, Season.WINTER: 65, Season.SPRING: 57} } workflow = Arg25BandStatisticsWorkflow() workflow.x_min = workflow.x_max = TEST_X workflow.y_min = workflow.y_max = TEST_Y workflow.acq_min = parse_date_min("1985") workflow.acq_max = parse_date_max("1994") workflow.epoch = EpochParameter(5, 5) workflow.seasons = Season # workflow.seasons = [Season.SPRING] workflow.satellites = [Satellite.LS5, Satellite.LS7] workflow.output_directory = "/tmp" workflow.mask_pqa_apply = True workflow.mask_pqa_mask = [PqaMask.PQ_MASK_SATURATION, PqaMask.PQ_MASK_CONTIGUITY, PqaMask.PQ_MASK_CLOUD] # workflow.local_scheduler = None # workflow.workers = None workflow.dataset_type = DatasetType.ARG25 workflow.bands = Ls57Arg25Bands workflow.x_chunk_size = 4000 workflow.y_chunk_size = 4000 workflow.statistics = [Statistic.PERCENTILE_25, Statistic.PERCENTILE_50, Statistic.PERCENTILE_75] from luigi.task import flatten tasks = flatten(workflow.create_tasks()) assert(len(tasks) == len(workflow.seasons) * len(TILE_COUNTS)) for task in tasks: _log.info("task = %s", task) for output in flatten(task.output()): _log.info("output %s", output.path) chunk_tasks = flatten(task.requires()) assert(len(chunk_tasks) == len(Ls57Arg25Bands)) for chunk_task in chunk_tasks: _log.info("chunk task %s", chunk_task) for output in flatten(chunk_task.output()): _log.info("output %s", output.path) tiles = list(chunk_task.get_tiles()) _log.info("Found %d tiles", len(tiles)) assert (len(tiles) == TILE_COUNTS[chunk_task.acq_min.year][chunk_task.season]) for tile in tiles: _log.info("\t%s", tile.end_datetime)