def get_tiffs_to_eopatches_workflow(config: TiffsToEopatchConfig, delete_tiffs: bool = False) -> EOWorkflow: """ Set up workflow to ingest tiff files into EOPatches """ # Set up credentials in sh config sh_config = set_sh_config(config) import_bands = [(ImportFromTiff( (FeatureType.DATA, band), folder=f's3://{config.bucket_name}/{config.tiffs_folder}', config=sh_config), f'Import band {band}') for band in config.band_names] import_clp = (ImportFromTiff( (FeatureType.DATA, config.clp_name), folder=f's3://{config.bucket_name}/{config.tiffs_folder}', config=sh_config), f'Import {config.clp_name}') import_mask = (ImportFromTiff( (FeatureType.MASK, config.mask_name), folder=f's3://{config.bucket_name}/{config.tiffs_folder}', config=sh_config), f'Import {config.mask_name}') rearrange_bands = (RearrangeBands(), 'Swap time and band axis') add_timestamps = (AddTimestampsUpdateTime( f's3://{config.bucket_name}/{config.tiffs_folder}'), 'Load timestamps') merge_bands = (MergeFeatureTask( input_features={FeatureType.DATA: config.band_names}, output_feature=(FeatureType.DATA, config.data_name)), 'Merge band features') remove_bands = (RemoveFeature( features={FeatureType.DATA: config.band_names}), 'Remove bands') rename_mask = (RenameFeature((FeatureType.MASK, config.mask_name, config.is_data_mask)), 'Rename is data mask') calculate_clm = (CloudMasking(), 'Get CLM mask from CLP') save_task = (SaveTask( path=f's3://{config.bucket_name}/{config.eopatches_folder}', config=sh_config, overwrite_permission=OverwritePermission.OVERWRITE_FEATURES), 'Save EOPatch') filenames = [f'{band}.tif' for band in config.band_names] + \ [f'{config.mask_name}.tif', f'{config.clp_name}.tif', 'userdata.json'] delete_files = (DeleteFiles(path=config.tiffs_folder, filenames=filenames), 'Delete batch files') workflow = [ *import_bands, import_clp, import_mask, rearrange_bands, add_timestamps, merge_bands, remove_bands, rename_mask, calculate_clm, save_task ] if delete_tiffs: workflow.append(delete_files) return LinearWorkflow(*workflow)
def test_time_dependent_feature(self): feature = FeatureType.DATA, 'NDVI' filename_export = 'relative-path/*.tiff' filename_import = [ f'relative-path/{timestamp.strftime("%Y%m%dT%H%M%S")}.tiff' for timestamp in self.eopatch.timestamp ] export_task = ExportToTiff(feature, folder=self.path) import_task = ImportFromTiff(feature, folder=self.path, timestamp_size=68) export_task.execute(self.eopatch, filename=filename_export) new_eopatch = import_task.execute(filename=filename_import) self.assertTrue( np.array_equal(new_eopatch[feature], self.eopatch[feature])) self.eopatch.timestamp[-1] = datetime.datetime(2020, 10, 10) filename_import = [ f'relative-path/{timestamp.strftime("%Y%m%dT%H%M%S")}.tiff' for timestamp in self.eopatch.timestamp ] with self.assertRaises(ResourceNotFound): import_task.execute(filename=filename_import)
def test_import_tiff_intersecting(self): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../example_data/import-tiff-test2.tiff') mask_feature = FeatureType.MASK_TIMELESS, 'TEST_TIF' mask_type, mask_name = mask_feature no_data_value = 1.0 task = ImportFromTiff(mask_feature, path, image_dtype=np.float64, no_data_value=no_data_value) task.execute(self.eopatch) tiff_img = read_data(path) self.assertTrue( np.array_equal(tiff_img[-6:, :3, :], self.eopatch[mask_type][mask_name][:6, -3:, :]), msg='Imported tiff data should be the same as original') feature_dtype = self.eopatch[mask_type][mask_name].dtype self.assertEqual( feature_dtype, np.float64, msg='Feature should have dtype numpy.float64 but {} found'.format( feature_dtype)) self.eopatch[mask_type][mask_name][:6, -3:, :] = no_data_value unique_values = list( np.unique(self.eopatch[mask_type][mask_name][:6, -3:, :])) self.assertEqual(unique_values, [no_data_value], msg='No data values should all be equal to {}'.format( no_data_value))
def test_export_import(self): for test_case in self.test_cases: with self.subTest(msg='Test case {}'.format(test_case.name)): self.eopatch[test_case.feature_type][test_case.name] = test_case.data with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' feature = test_case.feature_type, test_case.name export_task = ExportToTiff(feature, folder=tmp_dir_name, band_indices=test_case.bands, date_indices=test_case.times) export_task.execute(self.eopatch, filename=tmp_file_name) import_task = ImportFromTiff(feature, folder=tmp_dir_name, timestamp_size=test_case.get_expected_timestamp_size()) expected_raster = test_case.get_expected() new_eop = import_task.execute(filename=tmp_file_name) old_eop = import_task.execute(self.eopatch, filename=tmp_file_name) self.assertTrue(np.array_equal(expected_raster, new_eop[test_case.feature_type][test_case.name]), msg='Tiff imported into new EOPatch is not the same as expected') self.assertTrue(np.array_equal(expected_raster, old_eop[test_case.feature_type][test_case.name]), msg='Tiff imported into old EOPatch is not the same as expected') self.assertEqual(expected_raster.dtype, new_eop[test_case.feature_type][test_case.name].dtype, msg='Tiff imported into new EOPatch has different dtype as expected')
def test_time_dependent_feature_with_timestamps(self): feature = FeatureType.DATA, 'NDVI' filename = 'relative-path/%Y%m%dT%H%M%S.tiff' export_task = ExportToTiff(feature, folder=self.path) import_task = ImportFromTiff(feature, folder=self.path) export_task.execute(self.eopatch, filename=filename) new_eopatch = import_task.execute(self.eopatch, filename=filename) self.assertTrue( np.array_equal(new_eopatch[feature], self.eopatch[feature]))
def test_timeless_feature(self): feature = FeatureType.DATA_TIMELESS, 'DEM' filename = 'relative-path/my-filename.tiff' export_task = ExportToTiff(feature, folder=self.path) import_task = ImportFromTiff(feature, folder=self.path) export_task.execute(self.eopatch, filename=filename) new_eopatch = import_task.execute(self.eopatch, filename=filename) self.assertTrue( np.array_equal(new_eopatch[feature], self.eopatch[feature]))
def load_tiffs(datapath: str, feature: Tuple[FeatureType, str], filename: str = None, image_dtype: np.generic = np.float32, no_data_value: float = np.nan, data_source: str = 's5p', offset: int = 2100): """ Helper function to load the data sources provided as tiffs """ assert data_source in ['s5p', 'modis', 'era5', 'cams', 's3'] tiles = sorted(os.listdir(datapath)) if filename is None else [filename] # only keep files tiles = [tile for tile in tiles if not os.path.isdir(datapath / tile)] # unzip tiffs if they have .gz extension and delete them compressed_tiles = [tile for tile in tiles if tile.endswith('.gz')] for ctile in compressed_tiles: uctile = ctile.split('.gz')[0] ungz_file(str(datapath / ctile), str(datapath / uctile), delete=True) tiles = [ tile if not tile.endswith('.gz') else tile.split('.gz')[0] for tile in tiles ] zipped_tiles = [tile for tile in tiles if tile.endswith('.zip')] for ztile in zipped_tiles: unzip_file(str(datapath / ztile), str(datapath)) tiles = tiles + [tile.replace('.zip', '.tif') for tile in zipped_tiles] # remove files which don't have .tif extension tiles = [tile for tile in tiles if os.path.splitext(tile)[1] == '.tif'] timestamp_size = len(tiles) if feature[0].is_time_dependent() else None import_task = ImportFromTiff(feature=feature, folder=datapath, image_dtype=image_dtype, no_data_value=no_data_value, timestamp_size=len(tiles)) if not feature[0].is_time_dependent(): assert len(tiles) == 1 return import_task.execute(filename=tiles[0]) assert len(tiles) >= 1 eop = import_task.execute(filename=(sorted(tiles))) eop.timestamp = TIMESTAMP_PARSER[data_source](sorted(tiles), offset) return eop
def test_import_tiff_subset(self): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../example_data/import-tiff-test1.tiff') mask_feature = FeatureType.MASK_TIMELESS, 'TEST_TIF' mask_type, mask_name = mask_feature task = ImportFromTiff(mask_feature, path) task.execute(self.eopatch) tiff_img = read_data(path) self.assertTrue(np.array_equal(tiff_img[20: 53, 21: 54], self.eopatch[mask_type][mask_name][..., 0]), msg='Imported tiff data should be the same as original')