def _run_and_test_execution(self, workers, multiprocess, filter_logs): for execution_names in [None, [4, 'x', 'y', 'z']]: with tempfile.TemporaryDirectory() as tmp_dir_name: executor = EOExecutor( self.workflow, self.execution_args, save_logs=True, logs_folder=tmp_dir_name, logs_filter=CustomLogFilter() if filter_logs else None, execution_names=execution_names) executor.run(workers=workers, multiprocess=multiprocess) self.assertEqual(len(executor.execution_logs), 4) for log in executor.execution_logs: self.assertTrue(len(log.split()) >= 3) log_filenames = sorted(os.listdir(executor.report_folder)) self.assertEqual(len(log_filenames), 4) if execution_names: for name, log_filename in zip(execution_names, log_filenames): self.assertTrue( log_filename == 'eoexecution-{}.log'.format(name)) log_path = os.path.join(executor.report_folder, log_filenames[0]) with open(log_path, 'r') as fp: line_count = len(fp.readlines()) expected_line_count = 2 if filter_logs else 12 self.assertEqual(line_count, expected_line_count)
def _load_with_index(self): """ Split image to a number of EOPatches(lazy load data) with given splitter, and index each EOPatch using two dimension list. :param feature: Feature to be loaded :type feature: (FeatureType, feature_name) or FeatureType """ add_data = ImportFromGeogenius(feature=self.feature, geogenius_image=self.geogenius_image) tile_rows, tile_columns = self._get_tile_rows_columns() self.patch_index = [[0] * tile_columns for i in range(tile_rows)] index_feature = IndexTask(patch_index=self.patch_index) workflow = LinearWorkflow(add_data, index_feature) execution_args = [] bbox_list = np.array(self.splitter.get_pixel_bbox_list()) for idx, bbox in enumerate(bbox_list): row = idx % tile_rows column = idx // tile_rows execution_args.append({ add_data: { 'pixelbox': bbox }, index_feature: { "row": row, "column": column } }) executor = EOExecutor(workflow, execution_args) executor.run(workers=1, multiprocess=False) return self.patch_index
def test_exceptions(workflow, execution_kwargs): with pytest.raises(ValueError): EOExecutor(workflow, {}) with pytest.raises(ValueError): EOExecutor(workflow, execution_kwargs, execution_names={1, 2, 3, 4}) with pytest.raises(ValueError): EOExecutor(workflow, execution_kwargs, execution_names=["a", "b"])
def test_read_logs(test_args, execution_names, workflow, execution_kwargs): workers, multiprocess, filter_logs = test_args with tempfile.TemporaryDirectory() as tmp_dir_name: executor = EOExecutor( workflow, execution_kwargs, save_logs=True, logs_folder=tmp_dir_name, logs_filter=CustomLogFilter() if filter_logs else None, execution_names=execution_names, ) executor.run(workers=workers, multiprocess=multiprocess) execution_logs = executor.read_logs() assert len(execution_logs) == 4 for log in execution_logs: assert len(log.split()) >= 3 log_filenames = sorted(executor.filesystem.listdir(executor.report_folder)) assert len(log_filenames) == 4 if execution_names: for name, log_filename in zip(execution_names, log_filenames): assert log_filename == f"eoexecution-{name}.log" log_path = os.path.join(executor.report_folder, log_filenames[0]) with executor.filesystem.open(log_path, "r") as fp: line_count = len(fp.readlines()) expected_line_count = 2 if filter_logs else 12 assert line_count == expected_line_count
def test_execution_logs(self): with tempfile.TemporaryDirectory() as tmp_dir_name: executor = EOExecutor(self.workflow, self.execution_args, save_logs=True, logs_folder=tmp_dir_name) executor.run() self.assertEqual(len(executor.execution_logs), 4) for log in executor.execution_logs: self.assertTrue(len(log.split()) >= 3)
def test_exceptions(self): with self.assertRaises(ValueError): EOExecutor(self.workflow, {}) with self.assertRaises(ValueError): EOExecutor(self.workflow, self.execution_args, execution_names={1, 2, 3, 4}) with self.assertRaises(ValueError): EOExecutor(self.workflow, self.execution_args, execution_names=['a', 'b'])
def test_execution_stats(self): with tempfile.TemporaryDirectory() as tmp_dir_name: executor = EOExecutor(self.workflow, self.execution_args, logs_folder=tmp_dir_name) executor.run(workers=2) self.assertEqual(len(executor.execution_stats), 4) for stats in executor.execution_stats: for time_stat in ['start_time', 'end_time']: self.assertTrue(time_stat in stats and isinstance(stats[time_stat], datetime.datetime))
def test_execution_results(workflow, execution_kwargs): with tempfile.TemporaryDirectory() as tmp_dir_name: executor = EOExecutor(workflow, execution_kwargs, logs_folder=tmp_dir_name) executor.run(workers=2) assert len(executor.execution_results) == 4 for results in executor.execution_results: for time_stat in [results.start_time, results.end_time]: assert isinstance(time_stat, datetime.datetime)
def test_execution_results2(workflow, execution_kwargs): executor = EOExecutor(workflow, execution_kwargs) results = executor.run(workers=2, multiprocess=True) assert isinstance(results, list) for idx, workflow_results in enumerate(results): assert isinstance(workflow_results, WorkflowResults) if idx != 3: assert workflow_results.outputs["output"] == 42
def test_execution_errors(self): with tempfile.TemporaryDirectory() as tmp_dir_name: executor = EOExecutor(self.workflow, self.execution_args, logs_folder=tmp_dir_name) executor.run(workers=5) for idx, stats in enumerate(executor.execution_stats): if idx != 3: self.assertFalse('error' in stats, 'Workflow {} should be executed without errors'.format(idx)) else: self.assertTrue('error' in stats and stats['error'], 'This workflow should be executed with an error')
def test_mix_with_eoexecutor(workflow, execution_kwargs, simple_cluster): rayexecutor = RayExecutor(workflow, execution_kwargs) eoexecutor = EOExecutor(workflow, execution_kwargs) for _ in range(10): ray_results = rayexecutor.run() eo_results = eoexecutor.run() ray_outputs = [results.outputs for results in ray_results] eo_outputs = [results.outputs for results in eo_results] assert ray_outputs == eo_outputs
def test_execution_errors(self): task = RaiserErrorTask() workflow = EOWorkflow(dependencies=[ Dependency(task=task, inputs=[]), ]) execution_args = [ {'arg1': 1} ] with tempfile.TemporaryDirectory() as tmpdirname: executor = EOExecutor(workflow, execution_args, file_path=tmpdirname) executor.run() self.assertTrue('error' in executor.execution_stats[0])
def test_report_creation(self): task = ExampleTask() workflow = EOWorkflow(dependencies=[ Dependency(task=task, inputs=[]), ]) execution_args = [ {'arg1': 1} ] with tempfile.TemporaryDirectory() as tmpdirname: executor = EOExecutor(workflow, execution_args, file_path=tmpdirname) executor.run() self.assertIsNotNone(executor.make_report())
def save_patch(self, save_folder, feature=None, overwrite_permission=OverwritePermission.OVERWRITE_PATCH, compress_level=0): """ Save indexed EOPatches to a folder. :param save_folder: folder to save eopatches :type save_folder: str :param feature: Feature to be exported :type feature: (FeatureType, feature_name) or FeatureType :param overwrite_permission: Permissions to overwrite exist EOPatch. Permissions are in the following hierarchy: - `ADD_ONLY` - Only new features can be added, anything that is already saved cannot be changed. - `OVERWRITE_FEATURES` - Overwrite only data for features which have to be saved. The remaining content of saved EOPatch will stay unchanged. - `OVERWRITE_PATCH` - Overwrite entire content of saved EOPatch and replace it with the new content. :type overwrite_permission: OverwritePermission :param compress_level: A level of data compression and can be specified with an integer from 0 (no compression) to 9 (highest compression). :type compress_level: int """ if not feature: feature = self.feature if not self._is_loaded(): self._load_with_index(feature=feature) tile_rows, tile_columns = self._get_tile_rows_columns() self._assure_folder_exist(save_folder) save_task = SaveToDisk(save_folder, features=[feature, FeatureType.BBOX], overwrite_permission=overwrite_permission, compress_level=compress_level) workflow = LinearWorkflow(save_task) execution_args = [] for row in range(tile_rows): for column in range(tile_columns): execution_args.append({ save_task: { 'eopatch_folder': 'patch_{row}_{column}'.format(row=row, column=column), 'eopatch': self.patch_index[row][column] } }) executor = EOExecutor(workflow, execution_args) executor.run(workers=1, multiprocess=False)
def test_execution_logs(self): for execution_names in [None, [4, 'x', 'y', 'z']]: with tempfile.TemporaryDirectory() as tmp_dir_name: executor = EOExecutor(self.workflow, self.execution_args, save_logs=True, logs_folder=tmp_dir_name, execution_names=execution_names) executor.run() self.assertEqual(len(executor.execution_logs), 4) for log in executor.execution_logs: self.assertTrue(len(log.split()) >= 3) log_filenames = sorted(os.listdir(executor.report_folder)) self.assertEqual(len(log_filenames), 4) if execution_names: for name, log_filename in zip(execution_names, log_filenames): self.assertTrue(log_filename == 'eoexecution-{}.log'.format(name))
def test_execution_stats(self): task = ExampleTask() workflow = EOWorkflow(dependencies=[ Dependency(task=task, inputs=[]), ]) execution_args = [ {'arg1': 1}, {'arg1': 2} ] with tempfile.TemporaryDirectory() as tmpdirname: executor = EOExecutor(workflow, execution_args, file_path=tmpdirname) executor.run() self.assertEqual(len(executor.execution_stats), 2)
def test_report_creation(self): with tempfile.TemporaryDirectory() as tmp_dir_name: executor = EOExecutor(self.workflow, self.execution_args, logs_folder=tmp_dir_name) executor.run(workers=10) executor.make_report() self.assertTrue(os.path.exists(executor.get_report_filename()), 'Execution report was not created')
def test_execution_results(self): for return_results in [True, False]: executor = EOExecutor(self.workflow, self.execution_args) results = executor.run(workers=2, multiprocess=True, return_results=return_results) if return_results: self.assertTrue(isinstance(results, list)) for idx, workflow_results in enumerate(results): if idx == 3: self.assertEqual(workflow_results, None) else: self.assertTrue(isinstance(workflow_results, WorkflowResults)) self.assertEqual(workflow_results[self.final_task], 42) self.assertTrue(self.task not in workflow_results) else: self.assertEqual(results, None)
def execute_workflow(workflow, input_task, save_task, bbox_splitter, time_interval): """ Helper function for executing the EO-Learn workflow """ bbox_list = np.array(bbox_splitter.get_bbox_list()) total_patches = len(bbox_splitter.bbox_list) # Define additional parameters of the workflow execution_args = [ { input_task: {'bbox':bbox_list[idx], 'time_interval':time_interval}, save_task: {'eopatch_folder': 'eopatch_{}'.format(idx)} } for idx in range(total_patches) ] executor = EOExecutor(workflow, execution_args, save_logs=True) executor.run(workers=5, multiprocess=True) return total_patches
def test_keyboard_interrupt(): exception_node = EONode(KeyboardExceptionTask()) workflow = EOWorkflow([exception_node]) execution_kwargs = [] for _ in range(10): execution_kwargs.append({exception_node: {"arg1": 1}}) run_kwargs = [{"workers": 1}, {"workers": 3, "multiprocess": True}, {"workers": 3, "multiprocess": False}] for kwarg in run_kwargs: with pytest.raises(KeyboardInterrupt): EOExecutor(workflow, execution_kwargs).run(**kwarg)
def download_patches(path, shp, bbox_list, indexes): add_data = S2L1CWCSInput( layer='BANDS-S2-L1C', feature=(FeatureType.DATA, 'BANDS'), # save under name 'BANDS' resx='10m', # resolution x resy='10m', # resolution y maxcc=0.8, # maximum allowed cloud cover of original ESA tiles ) path_out = path + '/Slovenia/' if not os.path.isdir(path_out): os.makedirs(path_out) save = SaveTask(path_out, overwrite_permission=OverwritePermission.OVERWRITE_PATCH) workflow = LinearWorkflow(add_data, save) time_interval = ['2017-01-01', '2017-12-31'] # time interval for the SH request execution_args = [] for idx, bbox in zip(indexes, bbox_list[indexes]): execution_args.append({ add_data: { 'bbox': bbox, 'time_interval': time_interval }, save: { 'eopatch_folder': 'eopatch_{}'.format(idx) } }) start_time = time.time() executor = EOExecutor(workflow, execution_args, save_logs=True) executor.run(workers=1, multiprocess=False) file = open('timing.txt', 'a') running = str( dt.datetime.now()) + ' Running time: {}\n'.format(time.time() - start_time) print(running) file.write(running) file.close()
def test_report_creation(save_logs, include_logs): with tempfile.TemporaryDirectory() as tmp_dir_name: executor = EOExecutor( WORKFLOW, EXECUTION_KWARGS, logs_folder=tmp_dir_name, save_logs=save_logs, execution_names=["ex 1", 2, 0.4, None], ) executor.run(workers=10) executor.make_report(include_logs=include_logs) assert os.path.exists( executor.get_report_path()), "Execution report was not created"
def test_execution_errors(multiprocess, workflow, execution_kwargs): with tempfile.TemporaryDirectory() as tmp_dir_name: executor = EOExecutor(workflow, execution_kwargs, logs_folder=tmp_dir_name) executor.run(workers=5, multiprocess=multiprocess) for idx, results in enumerate(executor.execution_results): if idx == 3: assert results.workflow_failed() else: assert not results.workflow_failed() assert executor.get_successful_executions() == [0, 1, 2] assert executor.get_failed_executions() == [3]
def test_keyboardInterrupt(self): exeption_task = KeyboardExceptionTask() workflow = LinearWorkflow(exeption_task) execution_args = [] for _ in range(10): execution_args.append({exeption_task: {'arg1': 1}}) run_args = [{ 'workers': 1 }, { 'workers': 3, 'multiprocess': True }, { 'workers': 3, 'multiprocess': False }] for arg in run_args: self.assertRaises(KeyboardInterrupt, EOExecutor(workflow, execution_args).run, **arg)
def test_execution_errors(self): for multiprocess in [True, False]: with tempfile.TemporaryDirectory() as tmp_dir_name: executor = EOExecutor(self.workflow, self.execution_args, logs_folder=tmp_dir_name) executor.run(workers=5, multiprocess=multiprocess) for idx, stats in enumerate(executor.execution_stats): if idx != 3: self.assertFalse('error' in stats, 'Workflow {} should be executed without errors'.format(idx)) else: self.assertTrue('error' in stats and stats['error'], 'This workflow should be executed with an error') self.assertEqual(executor.get_successful_executions(), [0, 1, 2]) self.assertEqual(executor.get_failed_executions(), [3])
for i in range(len(patchIDs)): execution_args.append({ load: { 'eopatch_folder': 'eopatch_{}'.format(i) }, export_tiff: { 'filename': '{}/prediction_eopatch_{}.tiff'.format(tiff_location, i) }, save: { 'eopatch_folder': 'eopatch_{}'.format(i) } }) # run the executor on 2 cores executor = EOExecutor(workflow, execution_args) executor.run(workers=2) # uncomment below save the logs in the current directory and produce a report! #executor = EOExecutor(workflow, execution_args, save_logs=True) #executor.run(workers = 2) #executor.make_report() #%%Plot: Frequency of classes fig = plt.figure(figsize=(15, 8)) label_ids, label_counts = np.unique(labels_train, return_counts=True) plt.bar(range(len(label_ids)), label_counts) plt.xticks(range(len(label_ids)), [class_names[i] for i in label_ids],
workflow = LinearWorkflow(add_data, save) # Execute the workflow # define additional parameters of the workflow execution_args = [] for idx, bbox in enumerate(bbox_list[patchIDs]): execution_args.append({ add_data: { 'bbox': bbox }, save: { 'eopatch_folder': 'eopatch_{}'.format(idx) } }) executor = EOExecutor(workflow, execution_args, save_logs=True) executor.run(workers=5, multiprocess=False) # should install graphviz # executor.make_report() # Load GeogeniusEOPatch eopatch = GeogeniusEOPatch.load(path=os.path.join(path_out, 'eopatch_{}'.format(0)), lazy_loading=True) print(eopatch) # Print data print(eopatch.get_feature(FeatureType.DATA, 'BANDS')) # Convert all patches to tiff tiff_out = get_current_folder("tiff")
def download_data(path_save, coords_top, coords_bot, patch_n, s_date, e_date, debug=False): # before moving onto actual tasks, check setup check_sentinel_cfg() [lat_left_top, lon_left_top] = coords_top [lat_right_bot, lon_right_bot] = coords_bot # TASK FOR BAND DATA # add a request for B(B02), G(B03), R(B04), NIR (B08), SWIR1(B11), SWIR2(B12) # from default layer 'ALL_BANDS' at 10m resolution # Here we also do a simple filter of cloudy scenes. A detailed cloud cover # detection is performed in the next step custom_script = "return [B02, B03, B04, B08, B11, B12];" add_data = S2L1CWCSInput( layer="BANDS-S2-L1C", feature=(FeatureType.DATA, "BANDS"), # save under name 'BANDS' # custom url for 6 specific bands custom_url_params={CustomUrlParam.EVALSCRIPT: custom_script}, resx="10m", # resolution x resy="10m", # resolution y maxcc=0.1, # maximum allowed cloud cover of original ESA tiles ) # TASK FOR CLOUD INFO # cloud detection is performed at 80m resolution # and the resulting cloud probability map and mask # are scaled to EOPatch's resolution cloud_classifier = get_s2_pixel_cloud_detector(average_over=2, dilation_size=1, all_bands=False) add_clm = AddCloudMaskTask( cloud_classifier, "BANDS-S2CLOUDLESS", cm_size_y="80m", cm_size_x="80m", cmask_feature="CLM", # cloud mask name cprobs_feature="CLP", # cloud prob. map name ) # TASKS FOR CALCULATING NEW FEATURES # NDVI: (B08 - B04)/(B08 + B04) # NDWI: (B03 - B08)/(B03 + B08) # NORM: sqrt(B02^2 + B03^2 + B04^2 + B08^2 + B11^2 + B12^2) ndvi = NormalizedDifferenceIndex("NDVI", "BANDS/3", "BANDS/2") ndwi = NormalizedDifferenceIndex("NDWI", "BANDS/1", "BANDS/3") norm = EuclideanNorm("NORM", "BANDS") # TASK FOR VALID MASK # validate pixels using SentinelHub's cloud detection mask and region of acquisition add_sh_valmask = AddValidDataMaskTask( SentinelHubValidData(), "IS_VALID" # name of output mask ) # TASK FOR COUNTING VALID PIXELS # count number of valid observations per pixel using valid data mask count_val_sh = CountValid( "IS_VALID", "VALID_COUNT" # name of existing mask # name of output scalar ) # TASK FOR SAVING TO OUTPUT (if needed) path_save = Path(path_save) path_save.mkdir(exist_ok=True) # if not os.path.isdir(path_save): # os.makedirs(path_save) save = SaveToDisk(path_save, overwrite_permission=OverwritePermission.OVERWRITE_PATCH) # Define the workflow workflow = LinearWorkflow(add_data, add_clm, ndvi, ndwi, norm, add_sh_valmask, count_val_sh, save) # Execute the workflow # time interval for the SH request # TODO: need to check if specified time interval is valid time_interval = [s_date, e_date] # define additional parameters of the workflow execution_args = [] path_EOPatch = path_save / f"eopatch_{patch_n}" execution_args.append({ add_data: { "bbox": BBox( ((lon_left_top, lat_left_top), (lon_right_bot, lat_right_bot)), crs=CRS.WGS84, ), "time_interval": time_interval, }, save: { "eopatch_folder": path_EOPatch.stem }, }) executor = EOExecutor(workflow, execution_args, save_logs=True) if debug: print("Downloading Satellite data ...") executor.run(workers=2, multiprocess=False) if executor.get_failed_executions(): raise RuntimeError("EOExecutor failed in finishing tasks!") if debug: executor.make_report() if debug: print("Satellite data is downloaded") return path_EOPatch
ndwi, ndbi, save, ) time_interval = { '18/19': ['2018-11-01', '2019-05-01'], '19/20': ['2019-11-01', '2020-05-01'], } downloaded = os.listdir(SAVE_PATH) downloaded = list(map(lambda x: int(x.split('_')[1]), downloaded)) execution_args = [] for id, row in gdf.loc[~gdf.index.isin(downloaded), :].iterrows(): bbox = row.geometry.bounds bbox = BBox(bbox, CRS('32720')) execution_args.append({ add_data: { 'bbox': bbox, 'time_interval': time_interval[row.Campania] }, save: { 'eopatch_folder': f'eopatch_{id}' } }) executor = EOExecutor(workflow, execution_args) executor.run(workers=None, multiprocess=True) executor.make_report()
def load_LPIS(country, year, path, no_patches): patch_location = path + '/{}/'.format(country) load = LoadFromDisk(patch_location) save_path_location = patch_location if not os.path.isdir(save_path_location): os.makedirs(save_path_location) save = SaveToDisk(save_path_location, overwrite_permission=OverwritePermission.OVERWRITE_PATCH) # workflow_data = get_create_and_add_lpis_workflow(country, year, save_path_location) name_of_feature = 'LPIS_{}'.format(year) groups_to_number, crops_to_number = create_mapping(country) layer_id = GEOPEDIA_LPIS_LAYERS[f'{country}_LPIS_{year}'] ftr_name = f'LPIS_{year}' year_filter = ( GEOPEDIA_LPIS_YEAR_NAME[country], year) if GEOPEDIA_LPIS_YEAR_NAME[country] is not None else None add_lpis = AddGeopediaVectorFeature( (FeatureType.VECTOR_TIMELESS, ftr_name), layer=layer_id, year_filter=year_filter, drop_duplicates=True) area_ratio = AddAreaRatio( (FeatureType.VECTOR_TIMELESS, ftr_name), (FeatureType.SCALAR_TIMELESS, 'FIELD_AREA_RATIO')) fixlpis = FixLPIS(feature=name_of_feature, country=country) rasterize = VectorToRaster(vector_input=(FeatureType.VECTOR_TIMELESS, name_of_feature), raster_feature=(FeatureType.MASK_TIMELESS, name_of_feature), values=None, values_column='GROUP', raster_shape=(FeatureType.DATA, 'BANDS'), raster_dtype=np.int16, no_data_value=np.nan) add_group = AddGroup(crops_to_number, name_of_feature) remove_dtf = RemoveFeature(FeatureType.VECTOR_TIMELESS, name_of_feature) exclude = WorkflowExclude(area_ratio, fixlpis, add_group, rasterize, remove_dtf) workflow = LinearWorkflow(load, add_lpis, exclude, save) execution_args = [] for i in range(no_patches): execution_args.append({ load: { 'eopatch_folder': 'eopatch_{}'.format(i) }, save: { 'eopatch_folder': 'eopatch_{}'.format(i) } }) ##### here you choose how many processes/threads you will run, workers=none is max of processors executor = EOExecutor(workflow, execution_args, save_logs=True, logs_folder='ExecutionLogs') # executor.run(workers=None, multiprocess=True) executor.run()