Ejemplo n.º 1
0
def get_gsaa_to_eopatch_workflow(config: GsaaToEopatchConfig) -> EOWorkflow:
    # set up AWS credentials
    sh_config = set_sh_config(config)

    # load patch
    load_task = LoadTask(path=f's3://{config.bucket_name}/{config.eopatches_folder}', config=sh_config)
    # add original vectors to patch
    vec2vec = DB2Vector(database=config.database,
                        user=config.user, password=config.password,
                        host=config.host, port=config.port, crs=config.crs,
                        vector_output_feature=config.vector_feature)
    # get extent mask from vector
    vec2ras = VectorToRaster(config.vector_feature,
                             config.extent_feature,
                             values=1, raster_shape=(config.width, config.height),
                             no_data_value=config.no_data_value,
                             buffer=config.buffer_poly, write_to_existing=False)
    # get boundary mask from extent mask
    ras2bound = Extent2Boundary(config.extent_feature,
                                config.boundary_feature,
                                structure=disk(config.disk_radius))
    # get distance from extent mask
    ras2dist = Extent2Distance(config.extent_feature,
                               config.distance_feature,
                               normalize=True)
    # save new features
    save_task = SaveTask(path=f's3://{config.bucket_name}/{config.eopatches_folder}',
                         features=[config.vector_feature,
                                   config.extent_feature,
                                   config.boundary_feature,
                                   config.distance_feature],
                         overwrite_permission=OverwritePermission.OVERWRITE_FEATURES, config=sh_config)

    return LinearWorkflow(load_task, vec2vec, vec2ras, ras2bound, ras2dist, save_task)
Ejemplo n.º 2
0
def create_workflow(resolution, land_cover_data, output_bucket):
    """
    Helper function for creating the EO-Learn workflow
    """

    # Maximum allowed cloud cover of original ESA tiles
    maxcc = 0.2

    # Task to get S2 L2A images
    input_task = S2L2AWCSInput(
        layer='TRUE_COLOR',
        resx='{}m'.format(resolution), # resolution x
        resy='{}m'.format(resolution), # resolution y
        maxcc=maxcc, # maximum allowed cloud cover of original ESA tiles
    )

    # Task to rasterize ground-truth from Corine Land Cover 2018
    rasterization_task = VectorToRaster(land_cover_data,
                                        (FeatureType.MASK_TIMELESS, 'LAND_COVER'),
                                        values_column='LABEL_ID',
                                        raster_shape=(FeatureType.MASK, 'IS_DATA'),
                                        raster_dtype=np.uint8)

    # Task to compute pixelwise median values pixelwise our time-series
    get_median_pixel_task = MedianPixel((FeatureType.DATA, 'TRUE_COLOR'),
                                        feature_out=(FeatureType.DATA_TIMELESS, 'MEDIAN_PIXEL'))

    save_task = SaveToGcp((FeatureType.DATA_TIMELESS, 'MEDIAN_PIXEL'),
                          (FeatureType.MASK_TIMELESS, 'LAND_COVER'),
                          output_bucket)

    # Putting workflow together
    workflow = LinearWorkflow(input_task, rasterization_task, get_median_pixel_task, save_task)

    return workflow, input_task, save_task
Ejemplo n.º 3
0
    def test_transformation_back(self):
        for test_case in self.test_cases:
            if test_case.test_reverse:
                with self.subTest(msg='Test case {}'.format(test_case.name)):

                    new_raster_feature = test_case.feature[0], '{}_NEW'.format(test_case.feature[1])
                    old_raster_feature = test_case.feature[:2]
                    vector2raster_task = VectorToRaster(test_case.vector_feature, new_raster_feature,
                                                        values_column=test_case.task.values_column,
                                                        raster_shape=old_raster_feature)

                    eop = vector2raster_task(test_case.result)

                    new_raster = eop[new_raster_feature[0]][new_raster_feature[1]]
                    old_raster = eop[old_raster_feature[0]][old_raster_feature[1]]
                    self.assertTrue(np.array_equal(new_raster, old_raster),
                                    msg='Old and new raster features should be the same')
Ejemplo n.º 4
0
def lulc_arr(lulc_mask, lulc_codes):
    """read in dataset LULC mask and the associated codes and rasterized the mask into raster"""
    land_cover = gpd.read_file(lulc_mask)
    land_cover_array = []
    for val in lulc_codes:
        temp = land_cover[land_cover.lulcid == val]
        temp.reset_index(drop=True, inplace=True)
        land_cover_array.append(temp)
        del temp

    rshape = (FeatureType.MASK, 'IS_VALID')

    land_cover_task_array = []
    for el, val in zip(land_cover_array, lulc_codes):
        land_cover_task_array.append(
            VectorToRaster(raster_feature=(FeatureType.MASK_TIMELESS, 'LULC'),
                           vector_input=el,
                           values=val,
                           raster_shape=rshape,
                           write_to_existing=True,
                           raster_dtype=np.uint8))

    return land_cover_task_array
            bands_feature=(FeatureType.DATA, 'BANDS'),
            resolution=20,
            maxcc=0.5,
            bands=['B02', 'B03', 'B04', 'B08'],
            additional_data=[(FeatureType.MASK, 'dataMask', 'IS_DATA'),
                             (FeatureType.MASK, 'CLM')],
            config=config)

        calculate_ndwi = NormalizedDifferenceIndexTask(
            (FeatureType.DATA, 'BANDS'), (FeatureType.DATA, 'NDWI'), (1, 3))

        dam_gdf = gpd.GeoDataFrame(crs=CRS.WGS84.pyproj_crs(),
                                   geometry=[dam_nominal])
        add_nominal_water = VectorToRaster(
            dam_gdf, (FeatureType.MASK_TIMELESS, 'NOMINAL_WATER'),
            values=1,
            raster_shape=(FeatureType.MASK, 'IS_DATA'),
            raster_dtype=np.uint8)

        add_valid_mask = AddValidDataMaskTask(
            predicate=calculate_valid_data_mask)
        add_coverage = AddValidDataCoverage()

        cloud_coverage_threshold = 0.05
        remove_cloudy_scenes = SimpleFilterTask(
            (FeatureType.MASK, 'VALID_DATA'),
            ValidDataCoveragePredicate(cloud_coverage_threshold))

        water_detection = WaterDetector()

        # Define the EOWorkflow
Ejemplo n.º 6
0
maxy = maxy + dely * inflate_bbox

dam_bbox = BBox(bbox=[minx, miny, maxx, maxy], crs=CRS.WGS84)

input_task = S2L1CWCSInput('BANDS-S2-L1C',
                           resx='20m',
                           resy='20m',
                           maxcc=1.,
                           time_difference=datetime.timedelta(hours=2),
                           instance_id=WMS_INSTANCE)
add_ndwi = S2L1CWCSInput('NDWI', instance_id=WMS_INSTANCE)

gdf = gpd.GeoDataFrame(crs={'init': 'epsg:4326'}, geometry=[dam_nominal])
gdf.plot()
add_nominal_water = VectorToRaster(
    (FeatureType.MASK_TIMELESS, 'NOMINAL_WATER'), gdf, 1,
    (FeatureType.MASK, 'IS_DATA'), np.uint8)

cloud_classifier = get_s2_pixel_cloud_detector(average_over=2,
                                               dilation_size=1,
                                               all_bands=False)
cloud_det = AddCloudMaskTask(cloud_classifier,
                             'BANDS-S2CLOUDLESS',
                             cm_size_y='60m',
                             cm_size_x='60m',
                             cmask_feature='CLM',
                             cprobs_feature='CLP',
                             instance_id=WMS_INSTANCE)


class ValidDataPredicate:
Ejemplo n.º 7
0
    def setUpClass(cls):
        cls.vector_feature = FeatureType.VECTOR_TIMELESS, 'LULC'
        cls.raster_feature = FeatureType.MASK_TIMELESS, 'RASTERIZED_LULC'

        custom_dataframe = EOPatch.load(
            cls.TestCase.TEST_PATCH_FILENAME).vector_timeless['LULC']
        custom_dataframe = custom_dataframe[(custom_dataframe['AREA'] < 10**3)]

        cls.test_cases = [
            cls.TestCase('basic test',
                         VectorToRaster(cls.vector_feature,
                                        cls.raster_feature,
                                        values_column='LULC_ID',
                                        raster_shape=(FeatureType.DATA,
                                                      'BANDS-S2-L1C'),
                                        no_data_value=20),
                         img_min=0,
                         img_max=8,
                         img_mean=2.33267,
                         img_median=2,
                         img_dtype=np.uint8,
                         img_shape=(101, 100, 1)),
            cls.TestCase('single value filter, fixed shape',
                         VectorToRaster(cls.vector_feature,
                                        cls.raster_feature,
                                        values=8,
                                        values_column='LULC_ID',
                                        raster_shape=(50, 50),
                                        no_data_value=20,
                                        write_to_existing=True,
                                        raster_dtype=np.int32),
                         img_min=8,
                         img_max=20,
                         img_mean=19.76,
                         img_median=20,
                         img_dtype=np.int32,
                         img_shape=(50, 50, 1)),
            cls.TestCase('multiple values filter, resolution, all touched',
                         VectorToRaster(cls.vector_feature,
                                        cls.raster_feature,
                                        values=[1, 5],
                                        values_column='LULC_ID',
                                        raster_resolution='60m',
                                        no_data_value=13,
                                        raster_dtype=np.uint16,
                                        all_touched=True,
                                        write_to_existing=False),
                         img_min=1,
                         img_max=13,
                         img_mean=12.7093,
                         img_median=13,
                         img_dtype=np.uint16,
                         img_shape=(17, 17, 1)),
            cls.TestCase(
                'deprecated parameters, single value, custom resolution',
                VectorToRaster(vector_input=custom_dataframe,
                               raster_feature=cls.raster_feature,
                               values=14,
                               raster_resolution=(32, 15),
                               no_data_value=-1,
                               raster_dtype=np.int32),
                img_min=-1,
                img_max=14,
                img_mean=-0.8411,
                img_median=-1,
                img_dtype=np.int32,
                img_shape=(67, 31, 1)),
            cls.TestCase(
                'empty vector data test',
                VectorToRaster(vector_input=custom_dataframe[(
                    custom_dataframe.LULC_NAME == 'some_none_existent_name')],
                               raster_feature=cls.raster_feature,
                               values_column='LULC_ID',
                               raster_shape=(FeatureType.DATA, 'BANDS-S2-L1C'),
                               no_data_value=0),
                img_min=0,
                img_max=0,
                img_mean=0,
                img_median=0,
                img_dtype=np.uint8,
                img_shape=(101, 100, 1)),
            cls.TestCase('negative polygon buffering',
                         VectorToRaster(vector_input=custom_dataframe,
                                        raster_feature=cls.raster_feature,
                                        values_column='LULC_ID',
                                        buffer=-2,
                                        raster_shape=(FeatureType.DATA,
                                                      'BANDS-S2-L1C'),
                                        no_data_value=0),
                         img_min=0,
                         img_max=8,
                         img_mean=0.0229,
                         img_median=0,
                         img_dtype=np.uint8,
                         img_shape=(101, 100, 1)),
            cls.TestCase('positive polygon buffering',
                         VectorToRaster(vector_input=custom_dataframe,
                                        raster_feature=cls.raster_feature,
                                        values_column='LULC_ID',
                                        buffer=2,
                                        raster_shape=(FeatureType.DATA,
                                                      'BANDS-S2-L1C'),
                                        no_data_value=0),
                         img_min=0,
                         img_max=8,
                         img_mean=0.0664,
                         img_median=0,
                         img_dtype=np.uint8,
                         img_shape=(101, 100, 1)),
        ]

        for test_case in cls.test_cases:
            test_case.execute()
Ejemplo n.º 8
0
    def test_polygon_overlap(self):
        patch_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                  '../../../example_data', 'TestEOPatch')
        patch = EOPatch.load(patch_path)

        # create two test bboxes to overlap existing classes
        bounds = patch.vector_timeless['LULC'].total_bounds
        test_bounds1 = bounds[0] + 500, bounds[1] + 1000, bounds[
            2] - 1450, bounds[3] - 1650
        test_bounds2 = bounds[0] + 300, bounds[1] + 1400, bounds[
            2] - 1750, bounds[3] - 1300

        dframe = patch.vector_timeless['LULC'][0:50]

        # override 0th row with a test polygon of class 10
        test_row = dframe.index[0]
        dframe.at[test_row, 'LULC_ID'] = 10
        dframe.at[test_row, 'geometry'] = Polygon.from_bounds(*test_bounds1)

        # override the last row with a test polygon of class 5
        test_row = dframe.index[-1]
        dframe.at[test_row, 'LULC_ID'] = 5
        dframe.at[test_row, 'geometry'] = Polygon.from_bounds(*test_bounds2)

        patch.vector_timeless['TEST'] = dframe

        shape_feature = FeatureType.DATA, 'BANDS-S2-L1C'

        # no overlap
        patch = VectorToRaster(dframe[1:-1],
                               (FeatureType.MASK_TIMELESS, 'OVERLAP_0'),
                               values_column='LULC_ID',
                               raster_shape=shape_feature,
                               overlap_value=5)(patch)

        # overlap without taking intersection into account
        patch = VectorToRaster(dframe,
                               (FeatureType.MASK_TIMELESS, 'OVERLAP_1'),
                               values_column='LULC_ID',
                               raster_shape=shape_feature,
                               overlap_value=None)(patch)

        # overlap without setting intersections to 0
        patch = VectorToRaster(dframe,
                               (FeatureType.MASK_TIMELESS, 'OVERLAP_2'),
                               values_column='LULC_ID',
                               raster_shape=shape_feature,
                               overlap_value=0)(patch)

        # overlap without setting intersections to class 7
        patch = VectorToRaster(dframe,
                               (FeatureType.MASK_TIMELESS, 'OVERLAP_3'),
                               values_column='LULC_ID',
                               raster_shape=shape_feature,
                               overlap_value=7)(patch)

        # separately render bboxes for comparisons in asserts
        patch = VectorToRaster(dframe[:1],
                               (FeatureType.MASK_TIMELESS, 'TEST_BBOX1'),
                               values_column='LULC_ID',
                               raster_shape=shape_feature)(patch)
        patch = VectorToRaster(dframe[-1:],
                               (FeatureType.MASK_TIMELESS, 'TEST_BBOX2'),
                               values_column='LULC_ID',
                               raster_shape=shape_feature)(patch)

        bbox1 = patch.mask_timeless['TEST_BBOX1']
        bbox2 = patch.mask_timeless['TEST_BBOX2']

        overlap0 = patch.mask_timeless['OVERLAP_0']
        overlap1 = patch.mask_timeless['OVERLAP_1']
        overlap2 = patch.mask_timeless['OVERLAP_2']

        # 4 gets partially covered by 5
        self.assertTrue(
            np.count_nonzero(overlap0 == 4) > np.count_nonzero(overlap1 == 4))
        # 2 doesn't get covered, stays the same
        self.assertTrue(
            np.count_nonzero(overlap0 == 2) == np.count_nonzero(overlap1 == 2))
        # 10 is bbox2 and it gets covered by other classes
        self.assertTrue(
            np.count_nonzero(bbox1) > np.count_nonzero(overlap1 == 10))
        # 5 is bbox1 and it is rendered on top of all others, so it doesn't get covered
        self.assertTrue(
            np.count_nonzero(bbox2) == np.count_nonzero(overlap1 == 5))

        # all classes have their parts intersected, so the sum should reduce
        self.assertTrue(
            np.count_nonzero(bbox1) > np.count_nonzero(overlap2 == 10))
        self.assertTrue(
            np.count_nonzero(bbox2) > np.count_nonzero(overlap2 == 5))
        self.assertTrue(
            np.count_nonzero(overlap0 == 4) > np.count_nonzero(overlap2 == 4))
        # 2 gets covered completely
        self.assertTrue(np.count_nonzero(overlap2 == 2) == 0)
Ejemplo n.º 9
0
def load_LPIS(country, year, path, no_patches):
    patch_location = path + '/{}/'.format(country)
    load = LoadFromDisk(patch_location)
    save_path_location = patch_location
    if not os.path.isdir(save_path_location):
        os.makedirs(save_path_location)
    save = SaveToDisk(save_path_location,
                      overwrite_permission=OverwritePermission.OVERWRITE_PATCH)

    # workflow_data = get_create_and_add_lpis_workflow(country, year, save_path_location)

    name_of_feature = 'LPIS_{}'.format(year)

    groups_to_number, crops_to_number = create_mapping(country)

    layer_id = GEOPEDIA_LPIS_LAYERS[f'{country}_LPIS_{year}']
    ftr_name = f'LPIS_{year}'
    year_filter = (
        GEOPEDIA_LPIS_YEAR_NAME[country],
        year) if GEOPEDIA_LPIS_YEAR_NAME[country] is not None else None
    add_lpis = AddGeopediaVectorFeature(
        (FeatureType.VECTOR_TIMELESS, ftr_name),
        layer=layer_id,
        year_filter=year_filter,
        drop_duplicates=True)
    area_ratio = AddAreaRatio(
        (FeatureType.VECTOR_TIMELESS, ftr_name),
        (FeatureType.SCALAR_TIMELESS, 'FIELD_AREA_RATIO'))
    fixlpis = FixLPIS(feature=name_of_feature, country=country)

    rasterize = VectorToRaster(vector_input=(FeatureType.VECTOR_TIMELESS,
                                             name_of_feature),
                               raster_feature=(FeatureType.MASK_TIMELESS,
                                               name_of_feature),
                               values=None,
                               values_column='GROUP',
                               raster_shape=(FeatureType.DATA, 'BANDS'),
                               raster_dtype=np.int16,
                               no_data_value=np.nan)

    add_group = AddGroup(crops_to_number, name_of_feature)
    remove_dtf = RemoveFeature(FeatureType.VECTOR_TIMELESS, name_of_feature)

    exclude = WorkflowExclude(area_ratio, fixlpis, add_group, rasterize,
                              remove_dtf)

    workflow = LinearWorkflow(load, add_lpis, exclude, save)

    execution_args = []
    for i in range(no_patches):
        execution_args.append({
            load: {
                'eopatch_folder': 'eopatch_{}'.format(i)
            },
            save: {
                'eopatch_folder': 'eopatch_{}'.format(i)
            }
        })
    ##### here you choose how many processes/threads you will run, workers=none is max of processors

    executor = EOExecutor(workflow,
                          execution_args,
                          save_logs=True,
                          logs_folder='ExecutionLogs')
    # executor.run(workers=None, multiprocess=True)
    executor.run()