def test_export2tiff_data_band_tuple_time_tuple(self): data = np.arange(10 * 3 * 2 * 6, dtype=float).reshape(10, 3, 2, 6) bands = (1, 4) times = (2, 8) bands_selection = np.arange(bands[0], bands[1] + 1) times_selection = np.arange(times[0], times[1] + 1) subset = data[times_selection][..., bands_selection].squeeze() eop = EOPatch.load(self.PATCH_FILENAME) eop.data['data'] = data with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' task = ExportToTiff((FeatureType.DATA, 'data'), folder=tmp_dir_name, band_indices=bands, date_indices=times, image_dtype=data.dtype) task.execute(eop, filename=tmp_file_name) # split times and bands in raster and mimic the initial shape raster = read_data(os.path.join(tmp_dir_name, tmp_file_name)) raster = raster.reshape(raster.shape[0], raster.shape[1], len(times_selection), len(bands_selection)) raster = np.moveaxis(raster, -2, 0) self.assertTrue(np.all(subset == raster))
def save_to_tiff(self, file_path, feature=None, no_data_value=None, merge_method="last", padding=0): """ Save indexed EOPatches to a complete tiff. :param feature: Feature which will be exported :type feature: (FeatureType, str) :param file_path: path to save tiff :type file_path: str :param no_data_value: Value of pixels of tiff image with no data in EOPatch :type no_data_value: int or float :param merge_method: How to merge overlap EOPatches. "last" mean latter array overwrite former array, "first" mean former array overwrite latter array. :type merge_method: str """ if not feature: feature = self.feature if not self._is_loaded(): self._load_with_index(feature=feature) union_patch = self._patch_joint(self.patch_index, feature=feature, merge_method=merge_method, padding=padding) self._assure_folder_exist(path=file_path, path_type="file") temp_file = tempfile.mktemp(suffix=".tiff") try: export_tiff = ExportToTiff(feature, no_data_value=no_data_value) export_tiff.execute(union_patch, filename=temp_file) self._cog_translate(src_path=temp_file, dst_path=file_path) except Exception as e: raise PatchSetError(e.__str__()) finally: if os.path.exists(temp_file): os.remove(temp_file)
def test_time_dependent_feature(self): feature = FeatureType.DATA, 'NDVI' filename_export = 'relative-path/*.tiff' filename_import = [ f'relative-path/{timestamp.strftime("%Y%m%dT%H%M%S")}.tiff' for timestamp in self.eopatch.timestamp ] export_task = ExportToTiff(feature, folder=self.path) import_task = ImportFromTiff(feature, folder=self.path, timestamp_size=68) export_task.execute(self.eopatch, filename=filename_export) new_eopatch = import_task.execute(filename=filename_import) self.assertTrue( np.array_equal(new_eopatch[feature], self.eopatch[feature])) self.eopatch.timestamp[-1] = datetime.datetime(2020, 10, 10) filename_import = [ f'relative-path/{timestamp.strftime("%Y%m%dT%H%M%S")}.tiff' for timestamp in self.eopatch.timestamp ] with self.assertRaises(ResourceNotFound): import_task.execute(filename=filename_import)
def test_export2tiff_order(self): data = np.arange(10 * 3 * 2 * 6, dtype=float).reshape(10, 3, 2, 6) bands = [2, 3, 0] times = [1, 7] # create ordered subset ordered_subset = [] for t in times: for b in bands: ordered_subset.append(data[t][..., b]) ordered_subset = np.array(ordered_subset) ordered_subset = np.moveaxis(ordered_subset, 0, -1) eop = EOPatch.load(self.PATCH_FILENAME) eop.data['data'] = data with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' task = ExportToTiff((FeatureType.DATA, 'data'), folder=tmp_dir_name, band_indices=bands, date_indices=times, image_dtype=data.dtype) task.execute(eop, filename=tmp_file_name) raster = read_data(os.path.join(tmp_dir_name, tmp_file_name)) self.assertTrue(np.all(ordered_subset == raster))
def test_export2tiff_mask_tuple_string(self): eop = EOPatch.load(self.PATCH_FILENAME) dates = np.array(eop.timestamp) mask = np.arange(len(dates) * 3 * 2 * 1).reshape(len(dates), 3, 2, 1) eop.mask['mask'] = mask indices = [2, 4] # day time gets floored times = (datetime_to_iso(dates[indices[0]]), datetime_to_iso(dates[indices[1]])) selection = np.nonzero( np.where((dates >= iso_to_datetime(times[0])) & (dates <= iso_to_datetime(times[1])), dates, 0)) subset = mask[selection].squeeze() with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' task = ExportToTiff((FeatureType.MASK, 'mask'), folder=tmp_dir_name, date_indices=times) task.execute(eop, filename=tmp_file_name) # rasterio saves `bands` to the last dimension, move it up front raster = read_data(os.path.join(tmp_dir_name, tmp_file_name)) raster = np.moveaxis(raster, -1, 0) self.assertTrue(np.all(subset == raster))
def test_export_import(self): for test_case in self.test_cases: with self.subTest(msg='Test case {}'.format(test_case.name)): self.eopatch[test_case.feature_type][test_case.name] = test_case.data with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' feature = test_case.feature_type, test_case.name export_task = ExportToTiff(feature, folder=tmp_dir_name, band_indices=test_case.bands, date_indices=test_case.times) export_task.execute(self.eopatch, filename=tmp_file_name) import_task = ImportFromTiff(feature, folder=tmp_dir_name, timestamp_size=test_case.get_expected_timestamp_size()) expected_raster = test_case.get_expected() new_eop = import_task.execute(filename=tmp_file_name) old_eop = import_task.execute(self.eopatch, filename=tmp_file_name) self.assertTrue(np.array_equal(expected_raster, new_eop[test_case.feature_type][test_case.name]), msg='Tiff imported into new EOPatch is not the same as expected') self.assertTrue(np.array_equal(expected_raster, old_eop[test_case.feature_type][test_case.name]), msg='Tiff imported into old EOPatch is not the same as expected') self.assertEqual(expected_raster.dtype, new_eop[test_case.feature_type][test_case.name].dtype, msg='Tiff imported into new EOPatch has different dtype as expected')
def test_timeless_feature(self): feature = FeatureType.DATA_TIMELESS, 'DEM' filename = 'relative-path/my-filename.tiff' export_task = ExportToTiff(feature, folder=self.path) import_task = ImportFromTiff(feature, folder=self.path) export_task.execute(self.eopatch, filename=filename) new_eopatch = import_task.execute(self.eopatch, filename=filename) self.assertTrue( np.array_equal(new_eopatch[feature], self.eopatch[feature]))
def test_export2tiff_wrong_format(self): data = np.arange(10*3*2*6, dtype=float).reshape(10, 3, 2, 6) self.eopatch.data['data'] = data for bands, times in [([2, 'string', 1, 0], [1, 7, 0, 2, 3]), ([2, 3, 1, 0], [1, 7, 'string', 2, 3])]: with tempfile.TemporaryDirectory() as tmp_dir_name, self.assertRaises(ValueError): tmp_file_name = 'temp_file.tiff' task = ExportToTiff((FeatureType.DATA, 'data'), folder=tmp_dir_name, band_indices=bands, date_indices=times, image_dtype=data.dtype) task.execute(self.eopatch, filename=tmp_file_name)
def test_time_dependent_feature_with_timestamps(self): feature = FeatureType.DATA, 'NDVI' filename = 'relative-path/%Y%m%dT%H%M%S.tiff' export_task = ExportToTiff(feature, folder=self.path) import_task = ImportFromTiff(feature, folder=self.path) export_task.execute(self.eopatch, filename=filename) new_eopatch = import_task.execute(self.eopatch, filename=filename) self.assertTrue( np.array_equal(new_eopatch[feature], self.eopatch[feature]))
def test_export2tiff_mask_timeless(self): mask_timeless = np.arange(3 * 3 * 1).reshape(3, 3, 1) subset = mask_timeless.squeeze() eop = EOPatch.load(self.PATCH_FILENAME) eop.mask_timeless['mask_timeless'] = mask_timeless with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' task = ExportToTiff((FeatureType.MASK_TIMELESS, 'mask_timeless'), folder=tmp_dir_name) task.execute(eop, filename=tmp_file_name) raster = read_data(os.path.join(tmp_dir_name, tmp_file_name)) self.assertTrue(np.all(subset == raster))
def test_export2tiff_mask_single(self): mask = np.arange(5 * 3 * 3 * 1).reshape(5, 3, 3, 1) times = [4] subset = mask[times].squeeze() eop = EOPatch.load(self.PATCH_FILENAME) eop.mask['mask'] = mask with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' task = ExportToTiff((FeatureType.MASK, 'mask'), folder=tmp_dir_name, date_indices=times) task.execute(eop, filename=tmp_file_name) raster = read_data(os.path.join(tmp_dir_name, tmp_file_name)) self.assertTrue(np.all(subset == raster))
def test_export2tiff_wrong_dates_format(self): data = np.arange(10 * 3 * 2 * 6, dtype=float).reshape(10, 3, 2, 6) bands = [2, 3, 1, 0] times = [1, 7, 'string', 2, 3] eop = EOPatch.load(self.PATCH_FILENAME) eop.data['data'] = data with tempfile.TemporaryDirectory() as tmp_dir_name, self.assertRaises( ValueError): tmp_file_name = 'temp_file.tiff' task = ExportToTiff((FeatureType.DATA, 'data'), folder=tmp_dir_name, band_indices=bands, date_indices=times, image_dtype=data.dtype) task.execute(eop, filename=tmp_file_name)
def test_export2tiff_scalar_timeless_list(self): scalar_timeless = np.arange(5) bands = [3, 0, 2] subset = scalar_timeless[bands] eop = EOPatch.load(self.PATCH_FILENAME) eop.scalar_timeless['scalar_timeless'] = scalar_timeless with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' task = ExportToTiff( (FeatureType.SCALAR_TIMELESS, 'scalar_timeless'), folder=tmp_dir_name, band_indices=bands) task.execute(eop, filename=tmp_file_name) raster = read_data(os.path.join(tmp_dir_name, tmp_file_name)) self.assertTrue(np.all(subset == raster))
def get_post_processing_workflow(config: PostProcessConfig) -> LinearWorkflow: sh_config = set_sh_config(config) load_task = LoadTask(path=f's3://{config.bucket_name}/{config.eopatches_folder}', features=[config.feature_extent, config.feature_boundary, (FeatureType.MASK, 'CLM'), (FeatureType.MASK, 'IS_DATA'), FeatureType.TIMESTAMP, FeatureType.META_INFO, FeatureType.BBOX], config=sh_config), 'Load EOPatch' merge_extent_tasks = [(TemporalMerging(feature=config.feature_extent, feature_merged=(FeatureType.DATA_TIMELESS, f'{config.feature_extent[1]}_{month}'), woy_start=woy_start, woy_end=woy_end, percentile=config.percentile, max_cloud_coverage=config.max_cloud_coverage), f'Merge EXTENT for {month}') for month, (woy_start, woy_end) in config.time_intervals.items()] merge_boundary_tasks = [(TemporalMerging(feature=config.feature_boundary, feature_merged=(FeatureType.DATA_TIMELESS, f'{config.feature_boundary[1]}_{month}'), woy_start=woy_start, woy_end=woy_end, percentile=config.percentile, max_cloud_coverage=config.max_cloud_coverage), f'Merge BOUNDARY for {month}') for month, (woy_start, woy_end) in config.time_intervals.items()] combine_tasks = [(CombineUpsample( feature_extent=(FeatureType.DATA_TIMELESS, f'{config.feature_extent[1]}_{month}'), feature_boundary=(FeatureType.DATA_TIMELESS, f'{config.feature_boundary[1]}_{month}'), feature_output=(FeatureType.DATA_TIMELESS, f'PREDICTED_{config.model_version}_{month}'), scale_factor=config.scale_factor, disk_size=config.disk_size), f'Combine masks for {month}') for month in config.time_intervals] save_task = SaveTask(path=f's3://{config.bucket_name}/{config.eopatches_folder}', features=[(FeatureType.DATA_TIMELESS, f'{config.feature_extent[1]}_{month}') for month in config.time_intervals] + [(FeatureType.DATA_TIMELESS, f'{config.feature_boundary[1]}_{month}') for month in config.time_intervals] + [(FeatureType.DATA_TIMELESS, f'PREDICTED_{config.model_version}_{month}') for month in config.time_intervals], overwrite_permission=OverwritePermission.OVERWRITE_FEATURES, config=sh_config), 'Save Task' export_tasks = [(ExportToTiff(feature=(FeatureType.DATA_TIMELESS, f'PREDICTED_{config.model_version}_{month}'), folder=f's3://{config.bucket_name}/{config.tiffs_folder}/{month}/', image_dtype=np.float32), f'Export tiffs for {month}') for month in config.time_intervals] workflow = LinearWorkflow(load_task, *merge_extent_tasks, *merge_boundary_tasks, *combine_tasks, save_task, *export_tasks) return workflow
def test_export2tiff_data_timeless_band_list(self): data_timeless = np.arange(3 * 2 * 5, dtype=float).reshape(3, 2, 5) bands = [2, 4, 1, 0] subset = data_timeless[..., bands].squeeze() eop = EOPatch.load(self.PATCH_FILENAME) eop.data_timeless['data_timeless'] = data_timeless with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' task = ExportToTiff((FeatureType.DATA_TIMELESS, 'data_timeless'), folder=tmp_dir_name, band_indices=bands, image_dtype=data_timeless.dtype) task.execute(eop, filename=tmp_file_name) raster = read_data(os.path.join(tmp_dir_name, tmp_file_name)) self.assertTrue(np.all(subset == raster))
def test_export2tiff_mask_list(self): mask = np.arange(5 * 3 * 2 * 1).reshape(5, 3, 2, 1) times = [4, 2] subset = mask[times].squeeze() eop = EOPatch.load(self.PATCH_FILENAME) eop.mask['mask'] = mask with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' task = ExportToTiff((FeatureType.MASK, 'mask'), folder=tmp_dir_name, date_indices=times) task.execute(eop, filename=tmp_file_name) # rasterio saves `bands` to the last dimension, move it up front raster = read_data(os.path.join(tmp_dir_name, tmp_file_name)) raster = np.moveaxis(raster, -1, 0) self.assertTrue(np.all(subset == raster))
def test_export2tiff_scalar_band_single_time_single(self): scalar = np.arange(10 * 6, dtype=float).reshape(10, 6) bands = [3] times = [7] subset = scalar[times][..., bands].squeeze() eop = EOPatch.load(self.PATCH_FILENAME) eop.scalar['scalar'] = scalar with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' task = ExportToTiff((FeatureType.SCALAR, 'scalar'), folder=tmp_dir_name, band_indices=bands, date_indices=times, image_dtype=scalar.dtype) task.execute(eop, filename=tmp_file_name) raster = read_data(os.path.join(tmp_dir_name, tmp_file_name)) self.assertTrue(np.all(subset == raster))
def test_export2tiff_wrong_feature(self, mocked_logger): with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file.tiff' feature = FeatureType.MASK_TIMELESS, 'feature-not-present' export_task = ExportToTiff(feature, folder=tmp_dir_name, fail_on_missing=False) export_task.execute(self.eopatch, filename=tmp_file_name) assert mocked_logger.call_count == 1 val_err_tup, _ = mocked_logger.call_args val_err, = val_err_tup assert str(val_err) == 'Feature feature-not-present of type FeatureType.MASK_TIMELESS ' \ 'was not found in EOPatch' with self.assertRaises(ValueError): export_task_fail = ExportToTiff(feature, folder=tmp_dir_name, fail_on_missing=True) export_task_fail.execute(self.eopatch, filename=tmp_file_name)
def test_export2tiff_separate_timestamps(self): test_case = self.test_cases[-1] eopatch = copy.deepcopy(self.eopatch) eopatch[test_case.feature_type][test_case.name] = test_case.data eopatch.timestamp = self.eopatch.timestamp[:test_case.data.shape[0]] with tempfile.TemporaryDirectory() as tmp_dir_name: tmp_file_name = 'temp_file_*' tmp_file_name_reproject = 'temp_file_4326_%Y%m%d.tif' feature = test_case.feature_type, test_case.name export_task = ExportToTiff(feature, band_indices=test_case.bands, date_indices=test_case.times) full_path = os.path.join(tmp_dir_name, tmp_file_name) export_task.execute(eopatch, filename=full_path) for timestamp in eopatch.timestamp: expected_path = os.path.join( tmp_dir_name, timestamp.strftime('temp_file_%Y%m%dT%H%M%S.tif')) self.assertTrue(os.path.exists(expected_path), f'Path {expected_path} does not exist') full_path = os.path.join(tmp_dir_name, tmp_file_name_reproject) export_task = ExportToTiff(feature, folder=full_path, band_indices=test_case.bands, date_indices=test_case.times, crs='EPSG:4326', compress='lzw') export_task.execute(eopatch) for timestamp in eopatch.timestamp: expected_path = os.path.join( tmp_dir_name, timestamp.strftime(tmp_file_name_reproject)) self.assertTrue(os.path.exists(expected_path), f'Path {expected_path} does not exist')
} }) executor = EOExecutor(workflow, execution_args, save_logs=True) executor.run(workers=5, multiprocess=False) # should install graphviz # executor.make_report() # Load GeogeniusEOPatch eopatch = GeogeniusEOPatch.load(path=os.path.join(path_out, 'eopatch_{}'.format(0)), lazy_loading=True) print(eopatch) # Print data print(eopatch.get_feature(FeatureType.DATA, 'BANDS')) # Convert all patches to tiff tiff_out = get_current_folder("tiff") if not os.path.isdir(tiff_out): os.makedirs(tiff_out) export_to_tiff = ExportToTiff(feature=(FeatureType.DATA, 'BANDS'), folder=tiff_out) for idx, bbox in enumerate(bbox_list[patchIDs]): patch_patch = os.path.join(path_out, 'eopatch_{}'.format(idx)) sub_patch = GeogeniusEOPatch.load(path=os.path.join( path_out, 'eopatch_{}'.format(idx)), lazy_loading=True) export_to_tiff.execute(eopatch=sub_patch, filename='eopatch_{}.tiff'.format(idx))
#%% # TASK TO LOAD EXISTING EOPATCHES load = LoadFromDisk(path_out_sampled) # TASK FOR PREDICTION predict = PredictPatch(model, (FeatureType.DATA, 'FEATURES'), 'LBL_GBM', 'SCR_GBM') # TASK FOR SAVING save = SaveToDisk(str(path_out_sampled), overwrite_permission=OverwritePermission.OVERWRITE_PATCH) # TASK TO EXPORT TIFF export_tiff = ExportToTiff((FeatureType.MASK_TIMELESS, 'LBL_GBM')) tiff_location = './predicted_tiff' if not os.path.isdir(tiff_location): os.makedirs(tiff_location) workflow = LinearWorkflow(load, predict, export_tiff, save) #%% # create a list of execution arguments for each patch execution_args = [] for i in range(len(patchIDs)): execution_args.append({ load: { 'eopatch_folder': 'eopatch_{}'.format(i) }, export_tiff: {
def predict_raster_patch(path_EOPatch, patch_n, scale, debug=False): path_EOPatch = Path(path_EOPatch) model_path = path_module / "model.pkl" model = joblib.load(model_path) # TASK TO LOAD EXISTING EOPATCHES load = LoadFromDisk(path_EOPatch.parent) # TASK FOR CONCATENATION concatenate = ConcatenateData("FEATURES", ["BANDS", "NDVI", "NDWI", "NORM"]) # TASK FOR FILTERING OUT TOO CLOUDY SCENES # keep frames with > 80 % valid coverage valid_data_predicate = ValidDataFractionPredicate(0.8) filter_task = SimpleFilterTask((FeatureType.MASK, 'IS_VALID'), valid_data_predicate) save = SaveToDisk(path_EOPatch.parent, overwrite_permission=OverwritePermission.OVERWRITE_PATCH) workflow = LinearWorkflow( load, concatenate, filter_task, save, ) execution_args = [] for idx in range(0, 1): execution_args.append({ load: { "eopatch_folder": path_EOPatch.stem }, save: { "eopatch_folder": path_EOPatch.stem }, }) if debug: print("Saving the features ...") executor = EOExecutor(workflow, execution_args, save_logs=False) executor.run(workers=5, multiprocess=False) if debug: executor.make_report() # load from disk to determine number of valid pictures eopatch = EOPatch.load(path_EOPatch, lazy_loading=True) n_pics = eopatch.data["BANDS"].shape[0] print(f'Number of valid pictures detected: {n_pics}') list_path_raster = [] for pic_n in range(n_pics): # TASK TO LOAD EXISTING EOPATCHES load = LoadFromDisk(path_EOPatch.parent) # TASK FOR PREDICTION predict = PredictPatch(model, (FeatureType.DATA, "FEATURES"), "LBL", pic_n, "SCR") # TASK FOR SAVING save = SaveToDisk( str(path_EOPatch.parent), overwrite_permission=OverwritePermission.OVERWRITE_PATCH) # TASK TO EXPORT TIFF export_tiff = ExportToTiff((FeatureType.MASK_TIMELESS, "LBL")) tiff_location = (path_EOPatch.parent / f"predicted_tiff") if not os.path.isdir(tiff_location): os.makedirs(tiff_location) workflow = LinearWorkflow(load, predict, export_tiff, save) # create a list of execution arguments for each patch execution_args = [] path_predict = tiff_location / f"prediction-eopatch_{patch_n}-pic_{pic_n}.tiff" for i in range(0, 1): execution_args.append({ load: { "eopatch_folder": path_EOPatch.stem }, export_tiff: { "filename": path_predict }, save: { "eopatch_folder": path_EOPatch.stem }, }) # run the executor on 2 cores executor = EOExecutor(workflow, execution_args) # uncomment below save the logs in the current directory and produce a report! # executor = EOExecutor(workflow, execution_args, save_logs=True) if debug: print("Predicting the land cover ...") executor.run(workers=5, multiprocess=False) if debug: executor.make_report() # PATH = path_out / "predicted_tiff" / f"patch{patch_n}" path_merged = tiff_location / f"merged_prediction-eopatch_{patch_n}-pic_{pic_n}.tiff" if path_merged.exists(): path_merged.unlink() cmd = f"gdal_merge.py -o {path_merged} -co compress=LZW {path_predict}" os.system(cmd) # save path list_path_raster.append(path_merged) # Reference colormap things lulc_cmap = mpl.colors.ListedColormap([entry.color for entry in LULC]) lulc_norm = mpl.colors.BoundaryNorm(np.arange(-0.5, 3, 1), lulc_cmap.N) size = 20 fig, ax = plt.subplots(figsize=(2 * size * 1, 1 * size * scale), nrows=1, ncols=2) eopatch = EOPatch.load(path_EOPatch, lazy_loading=True) im = ax[0].imshow(eopatch.mask_timeless["LBL"].squeeze(), cmap=lulc_cmap, norm=lulc_norm) ax[0].set_xticks([]) ax[0].set_yticks([]) ax[0].set_aspect("auto") fig.subplots_adjust(wspace=0, hspace=0) for i in range(0, 1): eopatch = EOPatch.load(path_EOPatch, lazy_loading=True) ax = ax[1] plt.imshow( np.clip( eopatch.data["BANDS"][pic_n, :, :, :][..., [2, 1, 0]] * 3.5, 0, 1)) plt.xticks([]) plt.yticks([]) ax.set_aspect("auto") del eopatch if debug: print("saving the predicted image ...") plt.savefig(path_EOPatch.parent / f"predicted_vs_real_{patch_n}-{pic_n}.png") return list_path_raster