def get_gsaa_to_eopatch_workflow(config: GsaaToEopatchConfig) -> EOWorkflow: # set up AWS credentials sh_config = set_sh_config(config) # load patch load_task = LoadTask(path=f's3://{config.bucket_name}/{config.eopatches_folder}', config=sh_config) # add original vectors to patch vec2vec = DB2Vector(database=config.database, user=config.user, password=config.password, host=config.host, port=config.port, crs=config.crs, vector_output_feature=config.vector_feature) # get extent mask from vector vec2ras = VectorToRaster(config.vector_feature, config.extent_feature, values=1, raster_shape=(config.width, config.height), no_data_value=config.no_data_value, buffer=config.buffer_poly, write_to_existing=False) # get boundary mask from extent mask ras2bound = Extent2Boundary(config.extent_feature, config.boundary_feature, structure=disk(config.disk_radius)) # get distance from extent mask ras2dist = Extent2Distance(config.extent_feature, config.distance_feature, normalize=True) # save new features save_task = SaveTask(path=f's3://{config.bucket_name}/{config.eopatches_folder}', features=[config.vector_feature, config.extent_feature, config.boundary_feature, config.distance_feature], overwrite_permission=OverwritePermission.OVERWRITE_FEATURES, config=sh_config) return LinearWorkflow(load_task, vec2vec, vec2ras, ras2bound, ras2dist, save_task)
def create_workflow(resolution, land_cover_data, output_bucket): """ Helper function for creating the EO-Learn workflow """ # Maximum allowed cloud cover of original ESA tiles maxcc = 0.2 # Task to get S2 L2A images input_task = S2L2AWCSInput( layer='TRUE_COLOR', resx='{}m'.format(resolution), # resolution x resy='{}m'.format(resolution), # resolution y maxcc=maxcc, # maximum allowed cloud cover of original ESA tiles ) # Task to rasterize ground-truth from Corine Land Cover 2018 rasterization_task = VectorToRaster(land_cover_data, (FeatureType.MASK_TIMELESS, 'LAND_COVER'), values_column='LABEL_ID', raster_shape=(FeatureType.MASK, 'IS_DATA'), raster_dtype=np.uint8) # Task to compute pixelwise median values pixelwise our time-series get_median_pixel_task = MedianPixel((FeatureType.DATA, 'TRUE_COLOR'), feature_out=(FeatureType.DATA_TIMELESS, 'MEDIAN_PIXEL')) save_task = SaveToGcp((FeatureType.DATA_TIMELESS, 'MEDIAN_PIXEL'), (FeatureType.MASK_TIMELESS, 'LAND_COVER'), output_bucket) # Putting workflow together workflow = LinearWorkflow(input_task, rasterization_task, get_median_pixel_task, save_task) return workflow, input_task, save_task
def test_transformation_back(self): for test_case in self.test_cases: if test_case.test_reverse: with self.subTest(msg='Test case {}'.format(test_case.name)): new_raster_feature = test_case.feature[0], '{}_NEW'.format(test_case.feature[1]) old_raster_feature = test_case.feature[:2] vector2raster_task = VectorToRaster(test_case.vector_feature, new_raster_feature, values_column=test_case.task.values_column, raster_shape=old_raster_feature) eop = vector2raster_task(test_case.result) new_raster = eop[new_raster_feature[0]][new_raster_feature[1]] old_raster = eop[old_raster_feature[0]][old_raster_feature[1]] self.assertTrue(np.array_equal(new_raster, old_raster), msg='Old and new raster features should be the same')
def lulc_arr(lulc_mask, lulc_codes): """read in dataset LULC mask and the associated codes and rasterized the mask into raster""" land_cover = gpd.read_file(lulc_mask) land_cover_array = [] for val in lulc_codes: temp = land_cover[land_cover.lulcid == val] temp.reset_index(drop=True, inplace=True) land_cover_array.append(temp) del temp rshape = (FeatureType.MASK, 'IS_VALID') land_cover_task_array = [] for el, val in zip(land_cover_array, lulc_codes): land_cover_task_array.append( VectorToRaster(raster_feature=(FeatureType.MASK_TIMELESS, 'LULC'), vector_input=el, values=val, raster_shape=rshape, write_to_existing=True, raster_dtype=np.uint8)) return land_cover_task_array
bands_feature=(FeatureType.DATA, 'BANDS'), resolution=20, maxcc=0.5, bands=['B02', 'B03', 'B04', 'B08'], additional_data=[(FeatureType.MASK, 'dataMask', 'IS_DATA'), (FeatureType.MASK, 'CLM')], config=config) calculate_ndwi = NormalizedDifferenceIndexTask( (FeatureType.DATA, 'BANDS'), (FeatureType.DATA, 'NDWI'), (1, 3)) dam_gdf = gpd.GeoDataFrame(crs=CRS.WGS84.pyproj_crs(), geometry=[dam_nominal]) add_nominal_water = VectorToRaster( dam_gdf, (FeatureType.MASK_TIMELESS, 'NOMINAL_WATER'), values=1, raster_shape=(FeatureType.MASK, 'IS_DATA'), raster_dtype=np.uint8) add_valid_mask = AddValidDataMaskTask( predicate=calculate_valid_data_mask) add_coverage = AddValidDataCoverage() cloud_coverage_threshold = 0.05 remove_cloudy_scenes = SimpleFilterTask( (FeatureType.MASK, 'VALID_DATA'), ValidDataCoveragePredicate(cloud_coverage_threshold)) water_detection = WaterDetector() # Define the EOWorkflow
maxy = maxy + dely * inflate_bbox dam_bbox = BBox(bbox=[minx, miny, maxx, maxy], crs=CRS.WGS84) input_task = S2L1CWCSInput('BANDS-S2-L1C', resx='20m', resy='20m', maxcc=1., time_difference=datetime.timedelta(hours=2), instance_id=WMS_INSTANCE) add_ndwi = S2L1CWCSInput('NDWI', instance_id=WMS_INSTANCE) gdf = gpd.GeoDataFrame(crs={'init': 'epsg:4326'}, geometry=[dam_nominal]) gdf.plot() add_nominal_water = VectorToRaster( (FeatureType.MASK_TIMELESS, 'NOMINAL_WATER'), gdf, 1, (FeatureType.MASK, 'IS_DATA'), np.uint8) cloud_classifier = get_s2_pixel_cloud_detector(average_over=2, dilation_size=1, all_bands=False) cloud_det = AddCloudMaskTask(cloud_classifier, 'BANDS-S2CLOUDLESS', cm_size_y='60m', cm_size_x='60m', cmask_feature='CLM', cprobs_feature='CLP', instance_id=WMS_INSTANCE) class ValidDataPredicate:
def setUpClass(cls): cls.vector_feature = FeatureType.VECTOR_TIMELESS, 'LULC' cls.raster_feature = FeatureType.MASK_TIMELESS, 'RASTERIZED_LULC' custom_dataframe = EOPatch.load( cls.TestCase.TEST_PATCH_FILENAME).vector_timeless['LULC'] custom_dataframe = custom_dataframe[(custom_dataframe['AREA'] < 10**3)] cls.test_cases = [ cls.TestCase('basic test', VectorToRaster(cls.vector_feature, cls.raster_feature, values_column='LULC_ID', raster_shape=(FeatureType.DATA, 'BANDS-S2-L1C'), no_data_value=20), img_min=0, img_max=8, img_mean=2.33267, img_median=2, img_dtype=np.uint8, img_shape=(101, 100, 1)), cls.TestCase('single value filter, fixed shape', VectorToRaster(cls.vector_feature, cls.raster_feature, values=8, values_column='LULC_ID', raster_shape=(50, 50), no_data_value=20, write_to_existing=True, raster_dtype=np.int32), img_min=8, img_max=20, img_mean=19.76, img_median=20, img_dtype=np.int32, img_shape=(50, 50, 1)), cls.TestCase('multiple values filter, resolution, all touched', VectorToRaster(cls.vector_feature, cls.raster_feature, values=[1, 5], values_column='LULC_ID', raster_resolution='60m', no_data_value=13, raster_dtype=np.uint16, all_touched=True, write_to_existing=False), img_min=1, img_max=13, img_mean=12.7093, img_median=13, img_dtype=np.uint16, img_shape=(17, 17, 1)), cls.TestCase( 'deprecated parameters, single value, custom resolution', VectorToRaster(vector_input=custom_dataframe, raster_feature=cls.raster_feature, values=14, raster_resolution=(32, 15), no_data_value=-1, raster_dtype=np.int32), img_min=-1, img_max=14, img_mean=-0.8411, img_median=-1, img_dtype=np.int32, img_shape=(67, 31, 1)), cls.TestCase( 'empty vector data test', VectorToRaster(vector_input=custom_dataframe[( custom_dataframe.LULC_NAME == 'some_none_existent_name')], raster_feature=cls.raster_feature, values_column='LULC_ID', raster_shape=(FeatureType.DATA, 'BANDS-S2-L1C'), no_data_value=0), img_min=0, img_max=0, img_mean=0, img_median=0, img_dtype=np.uint8, img_shape=(101, 100, 1)), cls.TestCase('negative polygon buffering', VectorToRaster(vector_input=custom_dataframe, raster_feature=cls.raster_feature, values_column='LULC_ID', buffer=-2, raster_shape=(FeatureType.DATA, 'BANDS-S2-L1C'), no_data_value=0), img_min=0, img_max=8, img_mean=0.0229, img_median=0, img_dtype=np.uint8, img_shape=(101, 100, 1)), cls.TestCase('positive polygon buffering', VectorToRaster(vector_input=custom_dataframe, raster_feature=cls.raster_feature, values_column='LULC_ID', buffer=2, raster_shape=(FeatureType.DATA, 'BANDS-S2-L1C'), no_data_value=0), img_min=0, img_max=8, img_mean=0.0664, img_median=0, img_dtype=np.uint8, img_shape=(101, 100, 1)), ] for test_case in cls.test_cases: test_case.execute()
def test_polygon_overlap(self): patch_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../example_data', 'TestEOPatch') patch = EOPatch.load(patch_path) # create two test bboxes to overlap existing classes bounds = patch.vector_timeless['LULC'].total_bounds test_bounds1 = bounds[0] + 500, bounds[1] + 1000, bounds[ 2] - 1450, bounds[3] - 1650 test_bounds2 = bounds[0] + 300, bounds[1] + 1400, bounds[ 2] - 1750, bounds[3] - 1300 dframe = patch.vector_timeless['LULC'][0:50] # override 0th row with a test polygon of class 10 test_row = dframe.index[0] dframe.at[test_row, 'LULC_ID'] = 10 dframe.at[test_row, 'geometry'] = Polygon.from_bounds(*test_bounds1) # override the last row with a test polygon of class 5 test_row = dframe.index[-1] dframe.at[test_row, 'LULC_ID'] = 5 dframe.at[test_row, 'geometry'] = Polygon.from_bounds(*test_bounds2) patch.vector_timeless['TEST'] = dframe shape_feature = FeatureType.DATA, 'BANDS-S2-L1C' # no overlap patch = VectorToRaster(dframe[1:-1], (FeatureType.MASK_TIMELESS, 'OVERLAP_0'), values_column='LULC_ID', raster_shape=shape_feature, overlap_value=5)(patch) # overlap without taking intersection into account patch = VectorToRaster(dframe, (FeatureType.MASK_TIMELESS, 'OVERLAP_1'), values_column='LULC_ID', raster_shape=shape_feature, overlap_value=None)(patch) # overlap without setting intersections to 0 patch = VectorToRaster(dframe, (FeatureType.MASK_TIMELESS, 'OVERLAP_2'), values_column='LULC_ID', raster_shape=shape_feature, overlap_value=0)(patch) # overlap without setting intersections to class 7 patch = VectorToRaster(dframe, (FeatureType.MASK_TIMELESS, 'OVERLAP_3'), values_column='LULC_ID', raster_shape=shape_feature, overlap_value=7)(patch) # separately render bboxes for comparisons in asserts patch = VectorToRaster(dframe[:1], (FeatureType.MASK_TIMELESS, 'TEST_BBOX1'), values_column='LULC_ID', raster_shape=shape_feature)(patch) patch = VectorToRaster(dframe[-1:], (FeatureType.MASK_TIMELESS, 'TEST_BBOX2'), values_column='LULC_ID', raster_shape=shape_feature)(patch) bbox1 = patch.mask_timeless['TEST_BBOX1'] bbox2 = patch.mask_timeless['TEST_BBOX2'] overlap0 = patch.mask_timeless['OVERLAP_0'] overlap1 = patch.mask_timeless['OVERLAP_1'] overlap2 = patch.mask_timeless['OVERLAP_2'] # 4 gets partially covered by 5 self.assertTrue( np.count_nonzero(overlap0 == 4) > np.count_nonzero(overlap1 == 4)) # 2 doesn't get covered, stays the same self.assertTrue( np.count_nonzero(overlap0 == 2) == np.count_nonzero(overlap1 == 2)) # 10 is bbox2 and it gets covered by other classes self.assertTrue( np.count_nonzero(bbox1) > np.count_nonzero(overlap1 == 10)) # 5 is bbox1 and it is rendered on top of all others, so it doesn't get covered self.assertTrue( np.count_nonzero(bbox2) == np.count_nonzero(overlap1 == 5)) # all classes have their parts intersected, so the sum should reduce self.assertTrue( np.count_nonzero(bbox1) > np.count_nonzero(overlap2 == 10)) self.assertTrue( np.count_nonzero(bbox2) > np.count_nonzero(overlap2 == 5)) self.assertTrue( np.count_nonzero(overlap0 == 4) > np.count_nonzero(overlap2 == 4)) # 2 gets covered completely self.assertTrue(np.count_nonzero(overlap2 == 2) == 0)
def load_LPIS(country, year, path, no_patches): patch_location = path + '/{}/'.format(country) load = LoadFromDisk(patch_location) save_path_location = patch_location if not os.path.isdir(save_path_location): os.makedirs(save_path_location) save = SaveToDisk(save_path_location, overwrite_permission=OverwritePermission.OVERWRITE_PATCH) # workflow_data = get_create_and_add_lpis_workflow(country, year, save_path_location) name_of_feature = 'LPIS_{}'.format(year) groups_to_number, crops_to_number = create_mapping(country) layer_id = GEOPEDIA_LPIS_LAYERS[f'{country}_LPIS_{year}'] ftr_name = f'LPIS_{year}' year_filter = ( GEOPEDIA_LPIS_YEAR_NAME[country], year) if GEOPEDIA_LPIS_YEAR_NAME[country] is not None else None add_lpis = AddGeopediaVectorFeature( (FeatureType.VECTOR_TIMELESS, ftr_name), layer=layer_id, year_filter=year_filter, drop_duplicates=True) area_ratio = AddAreaRatio( (FeatureType.VECTOR_TIMELESS, ftr_name), (FeatureType.SCALAR_TIMELESS, 'FIELD_AREA_RATIO')) fixlpis = FixLPIS(feature=name_of_feature, country=country) rasterize = VectorToRaster(vector_input=(FeatureType.VECTOR_TIMELESS, name_of_feature), raster_feature=(FeatureType.MASK_TIMELESS, name_of_feature), values=None, values_column='GROUP', raster_shape=(FeatureType.DATA, 'BANDS'), raster_dtype=np.int16, no_data_value=np.nan) add_group = AddGroup(crops_to_number, name_of_feature) remove_dtf = RemoveFeature(FeatureType.VECTOR_TIMELESS, name_of_feature) exclude = WorkflowExclude(area_ratio, fixlpis, add_group, rasterize, remove_dtf) workflow = LinearWorkflow(load, add_lpis, exclude, save) execution_args = [] for i in range(no_patches): execution_args.append({ load: { 'eopatch_folder': 'eopatch_{}'.format(i) }, save: { 'eopatch_folder': 'eopatch_{}'.format(i) } }) ##### here you choose how many processes/threads you will run, workers=none is max of processors executor = EOExecutor(workflow, execution_args, save_logs=True, logs_folder='ExecutionLogs') # executor.run(workers=None, multiprocess=True) executor.run()