def test_nonexistent_location(self): path = './folder/subfolder/new-eopatch/' empty_eop = EOPatch() for fs_loader in self.filesystem_loaders: with fs_loader() as temp_fs: with self.assertRaises(ResourceNotFound): EOPatch.load(path, filesystem=temp_fs) empty_eop.save(path, filesystem=temp_fs) with TempFS() as temp_fs: full_path = os.path.join(temp_fs.root_path, path) with self.assertRaises(CreateFailed): EOPatch.load(full_path) load_task = LoadTask(full_path) with self.assertRaises(CreateFailed): load_task.execute() empty_eop.save(full_path) self.assertTrue(os.path.exists(full_path)) with TempFS() as temp_fs: full_path = os.path.join(temp_fs.root_path, path) save_task = SaveTask(full_path) save_task.execute(empty_eop) self.assertTrue(os.path.exists(full_path))
def test_nonexistent_location(fs_loader): path = "./folder/subfolder/new-eopatch/" empty_eop = EOPatch() with fs_loader() as temp_fs: with pytest.raises(ResourceNotFound): EOPatch.load(path, filesystem=temp_fs) empty_eop.save(path, filesystem=temp_fs) with TempFS() as temp_fs: full_path = os.path.join(temp_fs.root_path, path) with pytest.raises(CreateFailed): EOPatch.load(full_path) load_task = LoadTask(full_path) with pytest.raises(CreateFailed): load_task.execute() empty_eop.save(full_path) assert os.path.exists(full_path) with TempFS() as temp_fs: full_path = os.path.join(temp_fs.root_path, path) save_task = SaveTask(full_path) save_task.execute(empty_eop) assert os.path.exists(full_path)
def test_cleanup_different_compression(fs_loader, eopatch): folder = "foo-folder" patch_folder = "patch-folder" with fs_loader() as temp_fs: temp_fs.makedir(folder) save_compressed_task = SaveTask(folder, filesystem=temp_fs, compress_level=9, overwrite_permission=1) save_noncompressed_task = SaveTask(folder, filesystem=temp_fs, compress_level=0, overwrite_permission=1) bbox_path = fs.path.join(folder, patch_folder, "bbox.geojson") compressed_bbox_path = bbox_path + ".gz" data_timeless_path = fs.path.join(folder, patch_folder, "data_timeless", "mask.npy") compressed_data_timeless_path = data_timeless_path + ".gz" save_compressed_task(eopatch, eopatch_folder=patch_folder) save_noncompressed_task(eopatch, eopatch_folder=patch_folder) assert temp_fs.exists(bbox_path) assert temp_fs.exists(data_timeless_path) assert not temp_fs.exists(compressed_bbox_path) assert not temp_fs.exists(compressed_data_timeless_path) save_compressed_task(eopatch, eopatch_folder=patch_folder) assert not temp_fs.exists(bbox_path) assert not temp_fs.exists(data_timeless_path) assert temp_fs.exists(compressed_bbox_path) assert temp_fs.exists(compressed_data_timeless_path)
def get_tiffs_to_eopatches_workflow(config: TiffsToEopatchConfig, delete_tiffs: bool = False) -> EOWorkflow: """ Set up workflow to ingest tiff files into EOPatches """ # Set up credentials in sh config sh_config = set_sh_config(config) import_bands = [(ImportFromTiff( (FeatureType.DATA, band), folder=f's3://{config.bucket_name}/{config.tiffs_folder}', config=sh_config), f'Import band {band}') for band in config.band_names] import_clp = (ImportFromTiff( (FeatureType.DATA, config.clp_name), folder=f's3://{config.bucket_name}/{config.tiffs_folder}', config=sh_config), f'Import {config.clp_name}') import_mask = (ImportFromTiff( (FeatureType.MASK, config.mask_name), folder=f's3://{config.bucket_name}/{config.tiffs_folder}', config=sh_config), f'Import {config.mask_name}') rearrange_bands = (RearrangeBands(), 'Swap time and band axis') add_timestamps = (AddTimestampsUpdateTime( f's3://{config.bucket_name}/{config.tiffs_folder}'), 'Load timestamps') merge_bands = (MergeFeatureTask( input_features={FeatureType.DATA: config.band_names}, output_feature=(FeatureType.DATA, config.data_name)), 'Merge band features') remove_bands = (RemoveFeature( features={FeatureType.DATA: config.band_names}), 'Remove bands') rename_mask = (RenameFeature((FeatureType.MASK, config.mask_name, config.is_data_mask)), 'Rename is data mask') calculate_clm = (CloudMasking(), 'Get CLM mask from CLP') save_task = (SaveTask( path=f's3://{config.bucket_name}/{config.eopatches_folder}', config=sh_config, overwrite_permission=OverwritePermission.OVERWRITE_FEATURES), 'Save EOPatch') filenames = [f'{band}.tif' for band in config.band_names] + \ [f'{config.mask_name}.tif', f'{config.clp_name}.tif', 'userdata.json'] delete_files = (DeleteFiles(path=config.tiffs_folder, filenames=filenames), 'Delete batch files') workflow = [ *import_bands, import_clp, import_mask, rearrange_bands, add_timestamps, merge_bands, remove_bands, rename_mask, calculate_clm, save_task ] if delete_tiffs: workflow.append(delete_files) return LinearWorkflow(*workflow)
def predict_using_model(patch_dir, model_file, method, window_size): ''' Defines a workflow that will perform the prediction step on a given EOPatch. For a given EOPatch, use the specified model to apply prediction step. Parameters: - patch_dir: the directory that contains the patch - model_file; the path to the model file. - method: The local noramalization method, one of 'min', 'median' or 'mean'. This should be the same as the one used to train the model. - window_size: The window_size used in the local normalization step. Should be the same as that used to train the model. Returns: Nothing. Updates the EOPatch on disk. ''' path = patch_dir if (type(path) != str): path = str(path) save = SaveTask(path=path, overwrite_permission=OverwritePermission.OVERWRITE_PATCH) load_task = LoadTask(path=path) local_norm = LocalNormalization() detect_plastics = DetectPlastics(model_file=model_file) workflow = LinearWorkflow(load_task, local_norm, detect_plastics, save) workflow.execute( {local_norm: { 'method': method, 'window_size': window_size }})
def get_gsaa_to_eopatch_workflow(config: GsaaToEopatchConfig) -> EOWorkflow: # set up AWS credentials sh_config = set_sh_config(config) # load patch load_task = LoadTask(path=f's3://{config.bucket_name}/{config.eopatches_folder}', config=sh_config) # add original vectors to patch vec2vec = DB2Vector(database=config.database, user=config.user, password=config.password, host=config.host, port=config.port, crs=config.crs, vector_output_feature=config.vector_feature) # get extent mask from vector vec2ras = VectorToRaster(config.vector_feature, config.extent_feature, values=1, raster_shape=(config.width, config.height), no_data_value=config.no_data_value, buffer=config.buffer_poly, write_to_existing=False) # get boundary mask from extent mask ras2bound = Extent2Boundary(config.extent_feature, config.boundary_feature, structure=disk(config.disk_radius)) # get distance from extent mask ras2dist = Extent2Distance(config.extent_feature, config.distance_feature, normalize=True) # save new features save_task = SaveTask(path=f's3://{config.bucket_name}/{config.eopatches_folder}', features=[config.vector_feature, config.extent_feature, config.boundary_feature, config.distance_feature], overwrite_permission=OverwritePermission.OVERWRITE_FEATURES, config=sh_config) return LinearWorkflow(load_task, vec2vec, vec2ras, ras2bound, ras2dist, save_task)
def process_feature(feature, feature_index): '''A function to download a given target pixel and it's surroundings as an EOPatch Parameters: feature (GeoSeries): A row from the GeoDataFrame produced by load_fetures_from_file feature_index (int): The integer used in saving the EOPatch to disk. Returns: Nothing ''' save = SaveTask(path=f'{base_dir}/feature_{feature_index}/', overwrite_permission=OverwritePermission.OVERWRITE_PATCH) train_test_workflow = LinearWorkflow(input_task,true_color,add_l2a,ndvi,ndwi,add_fdi,cloud_detection,water_detection,combine_mask,save ) feature_result = train_test_workflow.execute({ input_task: { 'bbox':BBox(bounds.iloc[feature_index],bbox_list[0].crs), 'time_interval': [feature.date_start, feature.date_end] }, combine_mask:{ 'use_water': False #(target.reduced_label != 'Timber') }, add_fdi:{ 'band_layer': USE_BANDS, 'band_names': band_names } }) patch = feature_result.eopatch() return patch
def get_post_processing_workflow(config: PostProcessConfig) -> LinearWorkflow: sh_config = set_sh_config(config) load_task = LoadTask(path=f's3://{config.bucket_name}/{config.eopatches_folder}', features=[config.feature_extent, config.feature_boundary, (FeatureType.MASK, 'CLM'), (FeatureType.MASK, 'IS_DATA'), FeatureType.TIMESTAMP, FeatureType.META_INFO, FeatureType.BBOX], config=sh_config), 'Load EOPatch' merge_extent_tasks = [(TemporalMerging(feature=config.feature_extent, feature_merged=(FeatureType.DATA_TIMELESS, f'{config.feature_extent[1]}_{month}'), woy_start=woy_start, woy_end=woy_end, percentile=config.percentile, max_cloud_coverage=config.max_cloud_coverage), f'Merge EXTENT for {month}') for month, (woy_start, woy_end) in config.time_intervals.items()] merge_boundary_tasks = [(TemporalMerging(feature=config.feature_boundary, feature_merged=(FeatureType.DATA_TIMELESS, f'{config.feature_boundary[1]}_{month}'), woy_start=woy_start, woy_end=woy_end, percentile=config.percentile, max_cloud_coverage=config.max_cloud_coverage), f'Merge BOUNDARY for {month}') for month, (woy_start, woy_end) in config.time_intervals.items()] combine_tasks = [(CombineUpsample( feature_extent=(FeatureType.DATA_TIMELESS, f'{config.feature_extent[1]}_{month}'), feature_boundary=(FeatureType.DATA_TIMELESS, f'{config.feature_boundary[1]}_{month}'), feature_output=(FeatureType.DATA_TIMELESS, f'PREDICTED_{config.model_version}_{month}'), scale_factor=config.scale_factor, disk_size=config.disk_size), f'Combine masks for {month}') for month in config.time_intervals] save_task = SaveTask(path=f's3://{config.bucket_name}/{config.eopatches_folder}', features=[(FeatureType.DATA_TIMELESS, f'{config.feature_extent[1]}_{month}') for month in config.time_intervals] + [(FeatureType.DATA_TIMELESS, f'{config.feature_boundary[1]}_{month}') for month in config.time_intervals] + [(FeatureType.DATA_TIMELESS, f'PREDICTED_{config.model_version}_{month}') for month in config.time_intervals], overwrite_permission=OverwritePermission.OVERWRITE_FEATURES, config=sh_config), 'Save Task' export_tasks = [(ExportToTiff(feature=(FeatureType.DATA_TIMELESS, f'PREDICTED_{config.model_version}_{month}'), folder=f's3://{config.bucket_name}/{config.tiffs_folder}/{month}/', image_dtype=np.float32), f'Export tiffs for {month}') for month in config.time_intervals] workflow = LinearWorkflow(load_task, *merge_extent_tasks, *merge_boundary_tasks, *combine_tasks, save_task, *export_tasks) return workflow
def get_and_process_patch(bounds, time_range, base_dir, index): ''' Defines a workflow that will download and process a specific EOPatch. The pipline has the folowing steps - Download data - Calculate NDVI - Calculate NDWI - Calculate FDI - Add cloud mask - Add water mask - Combine all masks - Perform local noramalization - Save the results. Parameters: - bounds: The bounding box of the EOPatch we wish to process - time_range: An array of [start_time,end_time]. Any satelite pass in that range will be procesed. - base_dir: the directory to save the patches to - index: An index to label this patch Returns: The EOPatch for this region and time range. ''' save = SaveTask(path=f'{base_dir}/feature_{index}/', overwrite_permission=OverwritePermission.OVERWRITE_PATCH) add_fdi = CalcFDI() water_detection = WaterDetector() combine_mask = CombineMask() local_norm = LocalNormalization() fetch_workflow = LinearWorkflow(input_task, true_color, add_l2a, ndvi_task(), ndwi_task(), add_fdi, cloud_classifier_task(), water_detection, combine_mask, local_norm, save) feature_result = fetch_workflow.execute({ input_task: { 'bbox': BBox(bounds, CRS.WGS84), 'time_interval': time_range }, combine_mask: { 'use_water': False }, local_norm: { 'method': 'min', 'window_size': 10, } }) patch = feature_result.eopatch() return patch
def test_save_and_load_tasks(eopatch, fs_loader): folder = "foo-folder" patch_folder = "patch-folder" with fs_loader() as temp_fs: temp_fs.makedir(folder) save_task = SaveTask(folder, filesystem=temp_fs, compress_level=9) load_task = LoadTask(folder, filesystem=temp_fs, lazy_loading=False) saved_eop = save_task(eopatch, eopatch_folder=patch_folder) bbox_path = fs.path.join(folder, patch_folder, "bbox.geojson.gz") assert temp_fs.exists(bbox_path) assert saved_eop == eopatch eop = load_task(eopatch_folder=patch_folder) assert eop == eopatch
def download_patches(path, shp, bbox_list, indexes): add_data = S2L1CWCSInput( layer='BANDS-S2-L1C', feature=(FeatureType.DATA, 'BANDS'), # save under name 'BANDS' resx='10m', # resolution x resy='10m', # resolution y maxcc=0.8, # maximum allowed cloud cover of original ESA tiles ) path_out = path + '/Slovenia/' if not os.path.isdir(path_out): os.makedirs(path_out) save = SaveTask(path_out, overwrite_permission=OverwritePermission.OVERWRITE_PATCH) workflow = LinearWorkflow(add_data, save) time_interval = ['2017-01-01', '2017-12-31'] # time interval for the SH request execution_args = [] for idx, bbox in zip(indexes, bbox_list[indexes]): execution_args.append({ add_data: { 'bbox': bbox, 'time_interval': time_interval }, save: { 'eopatch_folder': 'eopatch_{}'.format(idx) } }) start_time = time.time() executor = EOExecutor(workflow, execution_args, save_logs=True) executor.run(workers=1, multiprocess=False) file = open('timing.txt', 'a') running = str( dt.datetime.now()) + ' Running time: {}\n'.format(time.time() - start_time) print(running) file.write(running) file.close()
def test_save_and_load_tasks(self): folder = 'foo-folder' patch_folder = 'patch-folder' for fs_loader in self.filesystem_loaders: with fs_loader() as temp_fs: temp_fs.makedir(folder) save_task = SaveTask(folder, filesystem=temp_fs, compress_level=9) load_task = LoadTask(folder, filesystem=temp_fs, lazy_loading=False) saved_eop = save_task(self.eopatch, eopatch_folder=patch_folder) bbox_path = fs.path.join(folder, patch_folder, 'bbox.pkl.gz') self.assertTrue(temp_fs.exists(bbox_path)) self.assertEqual(saved_eop, self.eopatch) eop = load_task(eopatch_folder=patch_folder) self.assertEqual(eop, self.eopatch)
resx='10m', resy='10m', maxcc=0.2) # task to get ground-truth from Geopedia geopedia_data = AddGeopediaFeature( (FeatureType.MASK_TIMELESS, 'TREE_COVER'), layer='ttl2275', theme='QP', raster_value=raster_value) # task to compute median values get_median_pixel = MedianPixel( (FeatureType.DATA, 'TRUE-COLOR-S2-L2A'), feature_out=(FeatureType.DATA_TIMELESS, 'MEDIAN_PIXEL')) # task to save to disk save = SaveTask(op.join(output_path, 'eopatch'), overwrite_permission=OverwritePermission.OVERWRITE_PATCH, compress_level=2) # initialize workflow workflow = LinearWorkflow(input_task, geopedia_data, get_median_pixel, save) # use a function to run this workflow on a single bbox def execute_workflow(index): bbox = bbox_splitter.bbox_list[index] info = bbox_splitter.info_list[index] patch_name = 'eopatch_{0}_row-{1}_col-{2}'.format( index, info['index_x'], info['index_y']) results = workflow.execute({
(FeatureType.DATA, 'BANDS'), (FeatureType.DATA, 'NDVI'), [band_names.index('B08'), band_names.index('B04')]) ndwi = NormalizedDifferenceIndexTask( (FeatureType.DATA, 'BANDS'), (FeatureType.DATA, 'NDWI'), [band_names.index('B03'), band_names.index('B08')]) ndbi = NormalizedDifferenceIndexTask( (FeatureType.DATA, 'BANDS'), (FeatureType.DATA, 'NDBI'), [band_names.index('B11'), band_names.index('B08')]) #SaveTask save = SaveTask(SAVE_PATH, overwrite_permission=OverwritePermission.OVERWRITE_PATCH) #Workflow workflow = LinearWorkflow( add_data, ndvi, ndwi, ndbi, save, ) time_interval = { '18/19': ['2018-11-01', '2019-05-01'], '19/20': ['2019-11-01', '2020-05-01'], }
gradient[..., np.newaxis]) return eopatch if __name__ == '__main__': # path = 'E:/Data/PerceptiveSentinel' path = '/home/beno/Documents/test/Slovenia/' size_small = (337, 333) size_big = (505, 500) load = LoadTask(path, lazy_loading=True) save_path_location = path if not os.path.isdir(save_path_location): os.makedirs(save_path_location) save = SaveTask(save_path_location, overwrite_permission=OverwritePermission.OVERWRITE_PATCH) dem = SentinelHubDemTask((FeatureType.DATA_TIMELESS, 'DEM'), size=size_big) grad = AddGradientTask((FeatureType.DATA_TIMELESS, 'DEM'), (FeatureType.DATA_TIMELESS, 'INCLINATION')) workflow = LinearWorkflow(load, dem, grad, save) no_patches = 1061 execution_args = [] for i in range(no_patches): i = i + 2 execution_args.append({ load: { 'eopatch_folder': 'eopatch_{}'.format(i)
def run_prediction_on_eopatch( eopatch_name: str, config: PredictionConfig, model: ResUnetA = None, normalisation_factors: pd.DataFrame = None) -> dict: """ Run prediction workflow on one eopatch. Model and dataframe can be provided to avoid loading them every time """ sh_config = set_sh_config(config) filesystem = prepare_filesystem(config) if normalisation_factors is None: normalisation_factors = load_metadata(filesystem, config) if model is None: model = load_model(filesystem, config) load_task = LoadTask( path=f's3://{config.bucket_name}/{config.eopatches_folder}', features=[ config.feature_bands, config.reference_distance, config.reference_extent, config.reference_boundary, FeatureType.TIMESTAMP, FeatureType.META_INFO, FeatureType.BBOX ], config=sh_config) save_task = SaveTask( path=f's3://{config.bucket_name}/{config.eopatches_folder}', features=[ config.feature_extent, config.feature_boundary, config.feature_distance, FeatureType.META_INFO ], overwrite_permission=OverwritePermission.OVERWRITE_FEATURES, config=sh_config) try: eop = load_task.execute(eopatch_folder=eopatch_name) eop = prediction_fn(eop, normalisation_factors=normalisation_factors, normalise=config.normalise, model=model, model_name=config.model_name, extent_feature=config.feature_extent, boundary_feature=config.feature_boundary, distance_feature=config.feature_distance, suffix=config.model_version, batch_size=config.batch_size, n_classes=config.n_classes, bands_feature=config.feature_bands, reference_boundary=config.reference_boundary, reference_distance=config.reference_distance, reference_extent=config.reference_extent) _ = save_task.execute(eop, eopatch_folder=eopatch_name) del eop return dict(name=eopatch_name, status='Success') except Exception as exc: return dict(name=eopatch_name, status=exc)