Esempio n. 1
0
def test_cog_mem_dask(tmpdir):
    pp = Path(str(tmpdir))
    xx, ds = gen_test_data(pp, dask=True)

    # write to memory 1
    bb = write_cog(xx, ":mem:")
    assert isinstance(bb, Delayed)
    bb = bb.compute()
    assert isinstance(bb, bytes)

    path = pp / "cog1.tiff"
    with open(str(path), "wb") as f:
        f.write(bb)

    yy = rio_slurp_xarray(path)
    np.testing.assert_array_equal(yy.values, xx.values)
    assert yy.geobox == xx.geobox
    assert yy.nodata == xx.nodata

    # write to memory 2
    bb = to_cog(xx)
    assert isinstance(bb, Delayed)
    bb = bb.compute()
    assert isinstance(bb, bytes)
    path = pp / "cog2.tiff"
    with open(str(path), "wb") as f:
        f.write(bb)

    yy = rio_slurp_xarray(path)
    np.testing.assert_array_equal(yy.values, xx.values)
    assert yy.geobox == xx.geobox
    assert yy.nodata == xx.nodata
def add_chirps(
    urls: Dict[Any, Any],
    ds: xr.Dataset,
    era: str,
    training: bool = True,
    dask_chunks: Dict[Any, Any] = {
        "x": "auto",
        "y": "auto"
    },
) -> Optional[xr.Dataset]:
    # load rainfall climatology
    if era == "_S1":
        chirps = rio_slurp_xarray(urls["chirps"][0])
    if era == "_S2":
        chirps = rio_slurp_xarray(urls["chirps"][1])

    if chirps.size >= 2:
        if training:
            chirps = xr_reproject(chirps, ds.geobox, "bilinear")
            ds["rain"] = chirps
        else:
            # Clip CHIRPS to ~ S2 tile boundaries so we can handle NaNs local to S2 tile
            xmin, xmax = ds.x.values[0], ds.x.values[-1]
            ymin, ymax = ds.y.values[0], ds.y.values[-1]
            inProj = Proj("epsg:6933")
            outProj = Proj("epsg:4326")
            xmin, ymin = transform(inProj, outProj, xmin, ymin)
            xmax, ymax = transform(inProj, outProj, xmax, ymax)

            # create lat/lon indexing slices - buffer S2 bbox by 0.05deg
            if (xmin < 0) & (xmax < 0):
                x_slice = list(np.arange(xmin + 0.05, xmax - 0.05, -0.05))
            else:
                x_slice = list(np.arange(xmax - 0.05, xmin + 0.05, 0.05))

            y_slice = list(np.arange(ymin - 0.05, ymax + 0.1, 0.05))

            # index global chirps using buffered s2 tile bbox
            chirps = assign_crs(
                chirps.sel(longitude=y_slice,
                           latitude=x_slice,
                           method="nearest"))
            # fill any NaNs in CHIRPS with local (s2-tile bbox) mean
            chirps = chirps.fillna(chirps.mean())
            chirps = xr_reproject(chirps, ds.geobox, "bilinear")
            chirps = chirps.chunk(dask_chunks)
            ds["rain"] = chirps

        # rename bands to include era
        for band in ds.data_vars:
            ds = ds.rename({band: band + era})

        return ds

    return None
Esempio n. 3
0
def add_chirps(ds, era, training=True, dask_chunks={'x': 'auto', 'y': 'auto'}):

    # load rainfall climatology
    if era == "_S1":
        chirps = rio_slurp_xarray(
            "s3://deafrica-input-datasets/rainfall/CHPclim_jan_jun_cumulative_rainfall.tif"
        )
    if era == "_S2":
        chirps = rio_slurp_xarray(
            "s3://deafrica-input-datasets/rainfall/CHPclim_jul_dec_cumulative_rainfall.tif"
        )

    if training:
        chirps = xr_reproject(chirps, ds.geobox, "bilinear")
        ds["rain"] = chirps

    else:
        # Clip CHIRPS to ~ S2 tile boundaries so we can handle NaNs local to S2 tile
        xmin, xmax = ds.x.values[0], ds.x.values[-1]
        ymin, ymax = ds.y.values[0], ds.y.values[-1]
        inProj = Proj("epsg:6933")
        outProj = Proj("epsg:4326")
        xmin, ymin = transform(inProj, outProj, xmin, ymin)
        xmax, ymax = transform(inProj, outProj, xmax, ymax)

        # create lat/lon indexing slices - buffer S2 bbox by 0.05deg
        if (xmin < 0) & (xmax < 0):
            x_slice = list(np.arange(xmin + 0.05, xmax - 0.05, -0.05))
        else:
            x_slice = list(np.arange(xmax - 0.05, xmin + 0.05, 0.05))

        if (ymin < 0) & (ymax < 0):
            y_slice = list(np.arange(ymin + 0.05, ymax - 0.05, -0.05))
        else:
            y_slice = list(np.arange(ymin - 0.05, ymax + 0.05, 0.05))

        # index global chirps using buffered s2 tile bbox
        chirps = assign_crs(
            chirps.sel(longitude=y_slice, latitude=x_slice, method="nearest"))

        # fill any NaNs in CHIRPS with local (s2-tile bbox) mean
        chirps = chirps.fillna(chirps.mean())
        chirps = xr_reproject(chirps, ds.geobox, "bilinear")
        chirps = chirps.chunk(dask_chunks)
        ds["rain"] = chirps

    #rename bands to include era
    for band in ds.data_vars:
        ds = ds.rename({band: band + era})

    return ds
Esempio n. 4
0
def post_processing(predicted: xr.Dataset, ) -> xr.DataArray:
    """
    filter prediction results with post processing filters.
    
    Simplified from production code to skip
    segmentation, probability, and mode calcs

    """

    dc = Datacube(app='whatever')

    predict = predicted.Predictions

    #--Post process masking---------------------------------------------------------------
    #print("  masking with AEZ,WDPA,WOfS,slope & elevation")

    # mask out classification beyond AEZ boundary
    gdf = gpd.read_file('data/Southern.shp').to_crs('epsg:6933')
    with HiddenPrints():
        mask = xr_rasterize(gdf, predicted)
    predict = predict.where(mask, 0)

    # mask with WDPA
    url_wdpa = "s3://deafrica-input-datasets/protected_areas/WDPA_southern.tif"
    wdpa = rio_slurp_xarray(url_wdpa, gbox=predicted.geobox)
    wdpa = wdpa.astype(bool)
    predict = predict.where(~wdpa, 0)

    #mask with WOFS
    wofs = dc.load(product='wofs_ls_summary_annual',
                   like=predicted.geobox,
                   time=('2019'))
    wofs = wofs.frequency > 0.2  # threshold
    predict = predict.where(~wofs, 0)

    #mask steep slopes
    url_slope = "https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/srtm_dem/srtm_africa_slope.tif"
    slope = rio_slurp_xarray(url_slope, gbox=predicted.geobox)
    slope = slope > 50
    predict = predict.where(~slope, 0)

    #mask where the elevation is above 3600m
    elevation = dc.load(product='dem_srtm', like=predicted.geobox)
    elevation = elevation.elevation > 3600  # threshold
    predict = predict.where(~elevation.squeeze(), 0)

    #set dtype
    predict = predict.astype(np.int8)

    return predict
Esempio n. 5
0
def test_cog_file(tmpdir, opts):
    pp = Path(str(tmpdir))
    xx, ds = gen_test_data(pp)

    # write to file
    ff = write_cog(xx, pp / "cog.tif", **opts)
    assert isinstance(ff, Path)
    assert ff == pp / "cog.tif"
    assert ff.exists()

    yy = rio_slurp_xarray(pp / "cog.tif")
    np.testing.assert_array_equal(yy.values, xx.values)
    assert yy.geobox == xx.geobox
    assert yy.nodata == xx.nodata

    _write_cog(np.stack([xx.values, xx.values]),
               xx.geobox,
               pp / "cog-2-bands.tif",
               overview_levels=[],
               **opts)

    yy, mm = rio_slurp(pp / "cog-2-bands.tif")
    assert mm.gbox == xx.geobox
    assert yy.shape == (2, *xx.shape)
    np.testing.assert_array_equal(yy[0], xx.values)
    np.testing.assert_array_equal(yy[1], xx.values)

    with pytest.raises(ValueError, match="Need 2d or 3d ndarray on input"):
        _write_cog(xx.values.ravel(), xx.geobox, pp / "wontwrite.tif")

    # sizes that are not multiples of 16
    # also check that supplying `nodata=` doesn't break things
    xx_odd = xx[:23, :63]
    ff = write_cog(xx_odd,
                   pp / "cog_odd.tif",
                   nodata=xx_odd.attrs["nodata"],
                   **opts)
    assert isinstance(ff, Path)
    assert ff == pp / "cog_odd.tif"
    assert ff.exists()

    yy = rio_slurp_xarray(pp / "cog_odd.tif")
    np.testing.assert_array_equal(yy.values, xx_odd.values)
    assert yy.geobox == xx_odd.geobox
    assert yy.nodata == xx_odd.nodata

    with pytest.warns(UserWarning):
        write_cog(xx, pp / "cog_badblocksize.tif", blocksize=50)
Esempio n. 6
0
def post_processing(
    predicted: xr.Dataset,

) -> xr.DataArray:
    """
    filter prediction results with post processing filters.
    :param predicted: The prediction results

    """
    
    dc = Datacube(app='whatever')
 
    #grab predictions and proba for post process filtering
    predict=predicted.Predictions

    
    # mask out classification beyond AEZ boundary
    gdf = gpd.read_file('data/Western.geojson')
    with HiddenPrints():
        mask = xr_rasterize(gdf, predicted)
    predict = predict.where(mask,0)
    
    # mask with WDPA
    url_wdpa="s3://deafrica-input-datasets/protected_areas/WDPA_western.tif"
    wdpa=rio_slurp_xarray(url_wdpa, gbox=predicted.geobox)
    wdpa = wdpa.astype(bool)
    predict = predict.where(~wdpa, 0)
    
    #mask with WOFS
    wofs=dc.load(product='ga_ls8c_wofs_2_summary',like=predicted.geobox)
    wofs=wofs.frequency > 0.2 # threshold
    predict=predict.where(~wofs, 0)

    #mask steep slopes
    url_slope="https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope=rio_slurp_xarray(url_slope, gbox=predicted.geobox)
    slope=slope > 35
    predict=predict.where(~slope, 0)

    #mask where the elevation is above 3600m
    elevation=dc.load(product='dem_srtm', like=predicted.geobox)
    elevation=elevation.elevation > 3600 # threshold
    predict=predict.where(~elevation.squeeze(), 0)
    
    #set dtype
    predict=predict.astype(np.int8)

    return predict
Esempio n. 7
0
def test_cog_file(tmpdir):
    pp = Path(str(tmpdir))
    xx, ds = gen_test_data(pp)

    # write to file
    ff = write_cog(xx, pp / "cog.tif")
    assert isinstance(ff, Path)
    assert ff == pp / "cog.tif"
    assert ff.exists()

    yy = rio_slurp_xarray(pp / "cog.tif")
    np.testing.assert_array_equal(yy.values, xx.values)
    assert yy.geobox == xx.geobox
    assert yy.nodata == xx.nodata

    _write_cog(np.stack([xx.values, xx.values]),
               xx.geobox,
               pp / "cog-2-bands.tif",
               overview_levels=[])

    yy, mm = rio_slurp(pp / "cog-2-bands.tif")
    assert mm.gbox == xx.geobox
    assert yy.shape == (2, *xx.shape)
    np.testing.assert_array_equal(yy[0], xx.values)
    np.testing.assert_array_equal(yy[1], xx.values)

    with pytest.raises(ValueError, match="Need 2d or 3d ndarray on input"):
        _write_cog(xx.values.ravel(), xx.geobox, pp / "wontwrite.tif")
Esempio n. 8
0
def test_cog_rgba(tmpdir, use_windowed_writes):
    pp = Path(str(tmpdir))
    xx, ds = gen_test_data(pp)
    pix = np.dstack([xx.values] * 4)
    rgba = xr.DataArray(pix,
                        attrs=xx.attrs,
                        dims=("y", "x", "band"),
                        coords=xx.coords)
    assert rgba.geobox == xx.geobox
    assert rgba.shape[:2] == rgba.geobox.shape

    ff = write_cog(rgba,
                   pp / "cog.tif",
                   use_windowed_writes=use_windowed_writes)
    yy = rio_slurp_xarray(ff)

    assert yy.geobox == rgba.geobox
    assert yy.shape == rgba.shape
    np.testing.assert_array_equal(yy.values, rgba.values)

    with pytest.raises(ValueError):
        _write_cog(
            rgba.values[1:, :, :],
            rgba.geobox,
            ":mem:",
            use_windowed_writes=use_windowed_writes,
        )
Esempio n. 9
0
def gm_mads_two_seasons_production(ds1, ds2):
    """
    Feature layer function for production run of
    eastern crop-mask
    """

    def fun(ds, era):
        # normalise SR and edev bands
        for band in ds.data_vars:
            if band not in ["sdev", "bcdev"]:
                ds[band] = ds[band] / 10000

        gm_mads = calculate_indices(
            ds,
            index=["NDVI", "LAI", "MNDWI"],
            drop=False,
            normalise=False,
            collection="s2",
        )

        gm_mads["sdev"] = -np.log(gm_mads["sdev"])
        gm_mads["bcdev"] = -np.log(gm_mads["bcdev"])
        gm_mads["edev"] = -np.log(gm_mads["edev"])

        # rainfall climatology
        if era == "_S1":
            chirps = assign_crs(
                xr.open_rasterio(
                    "/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc"
                ),
                crs="epsg:4326",
            )
        if era == "_S2":
            chirps = assign_crs(
                xr.open_rasterio(
                    "/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc"
                ),
                crs="epsg:4326",
            )

        chirps = xr_reproject(chirps, ds.geobox, "bilinear")
        gm_mads["rain"] = chirps

        for band in gm_mads.data_vars:
            gm_mads = gm_mads.rename({band: band + era})

        return gm_mads

    epoch1 = fun(ds1, era="_S1")
    epoch2 = fun(ds2, era="_S2")

    # slope
    url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds1.geobox)
    slope = slope.to_dataset(name="slope")

    result = xr.merge([epoch1, epoch2, slope], compat="override")

    result = result.astype(np.float32)
    return result.squeeze()
Esempio n. 10
0
def gm_mads_two_seasons_training(query):

    #connect to the datacube
    dc = datacube.Datacube(app='feature_layers')

    #load S2 geomedian
    ds = dc.load(product='gm_s2_semiannual', **query)

    # load the data
    dss = {"S1": ds.isel(time=0), "S2": ds.isel(time=1)}

    #create features
    epoch1 = common_ops(dss["S1"], era="_S1")
    epoch1 = add_chirps(epoch1, era='_S1')
    epoch2 = common_ops(dss["S2"], era="_S2")
    epoch2 = add_chirps(epoch2, era='_S2')

    # add slope
    url_slope = "https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/srtm_dem/srtm_africa_slope.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds.geobox)
    slope = slope.to_dataset(name="slope")

    result = xr.merge([epoch1, epoch2, slope], compat="override")

    return result.astype(np.float32).squeeze()
Esempio n. 11
0
def gm_mads_two_seasons_predict(ds):
    dc = datacube.Datacube(app="training")
    ds = ds / 10000
    ds1 = ds.sel(time=slice("2019-01", "2019-06"))
    ds2 = ds.sel(time=slice("2019-07", "2019-12"))

    def fun(ds, era):
        # geomedian and tmads
        # gm_mads = xr_geomedian_tmad(ds)
        gm_mads = xr_geomedian_tmad_new(ds).compute()
        gm_mads = calculate_indices(
            gm_mads,
            index=["NDVI", "LAI", "MNDWI"],
            drop=False,
            normalise=False,
            collection="s2",
        )

        gm_mads["sdev"] = -np.log(gm_mads["sdev"])
        gm_mads["bcdev"] = -np.log(gm_mads["bcdev"])
        gm_mads["edev"] = -np.log(gm_mads["edev"])
        gm_mads = gm_mads.chunk({"x": 2000, "y": 2000})

        # rainfall climatology
        if era == "_S1":
            chirps = assign_crs(
                xr.open_rasterio(
                    "/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc"
                ),
                crs="epsg:4326",
            )
        if era == "_S2":
            chirps = assign_crs(
                xr.open_rasterio(
                    "/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc"
                ),
                crs="epsg:4326",
            )

        chirps = xr_reproject(chirps, ds.geobox, "bilinear")
        chirps = chirps.chunk({"x": 2000, "y": 2000})
        gm_mads["rain"] = chirps

        for band in gm_mads.data_vars:
            gm_mads = gm_mads.rename({band: band + era})

        return gm_mads

    epoch1 = fun(ds1, era="_S1")
    epoch2 = fun(ds2, era="_S2")

    # slope
    url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds.geobox)
    slope = slope.to_dataset(name="slope").chunk({"x": 2000, "y": 2000})

    result = xr.merge([epoch1, epoch2, slope], compat="override")

    return result.squeeze()
def gm_mads_two_seasons_prediction(
    datasets,
    geobox,
    measurements: List[str],
    urls: Dict[Any, Any],
    dask_chunks: Dict[str, Any] = {
        "x": -1,
        "y": -1
    },
) -> Optional[xr.Dataset]:
    """
    Feature layer function for production run of
    eastern crop-mask. Similar to the training function
    but data is loaded internally, CHIRPS is reprojected differently,
    and dask chunks are used.
    """

    ds = load_with_native_transform(
        datasets,
        geobox=geobox,
        native_transform=lambda x: drop_nan_nodata(x),
        bands=measurements,
        chunks=dask_chunks,
        resampling="bilinear",
    )

    dss = {
        "S1": ds.isel(spec=0).drop(["spatial_ref", "spec"]),
        "S2": ds.isel(spec=1).drop(["spatial_ref", "spec"]),
    }

    # create features
    epoch1 = common_ops(dss["S1"], era="_S1")
    epoch1 = add_chirps(urls,
                        epoch1,
                        era="_S1",
                        training=False,
                        dask_chunks=dask_chunks)

    epoch2 = common_ops(dss["S2"], era="_S2")
    epoch2 = add_chirps(urls,
                        epoch2,
                        era="_S2",
                        training=False,
                        dask_chunks=dask_chunks)
    if (not epoch1) or (not epoch2):
        return None
    # add slope
    url_slope = urls["slope"]
    slope = rio_slurp_xarray(url_slope, gbox=ds.geobox)
    slope = slope.to_dataset(name="slope").chunk(dask_chunks)

    result = xr.merge([epoch1, epoch2, slope], compat="override")

    result = result.astype(np.float32)
    return result.squeeze()
Esempio n. 13
0
def gm_mads_two_seasons_prediction(geobox, dask_chunks):
    """
    Feature layer function for production run of
    eastern crop-mask. Similar to the training function
    but data is loaded internally, CHIRPS is reprojected differently,
    and dask chunks are used.
    """
    dc = datacube.Datacube(app="prediction")

    # load the data
    measurements = [
        "blue",
        "green",
        "red",
        "nir",
        "swir_1",
        "swir_2",
        "red_edge_1",
        "red_edge_2",
        "red_edge_3",
        "bcdev",
        "edev",
        "sdev",
    ]

    ds = dc.load(product="gm_s2_semiannual",
                 time="2019",
                 measurements=measurements,
                 like=geobox,
                 dask_chunks=dask_chunks,
                 resampling='bilinear')

    dss = {"S1": ds.isel(time=0), "S2": ds.isel(time=1)}

    #create features
    epoch1 = common_ops(dss["S1"], era="_S1")
    epoch1 = add_chirps(epoch1,
                        era='_S1',
                        training=False,
                        dask_chunks=dask_chunks)
    epoch2 = common_ops(dss["S2"], era="_S2")
    epoch2 = add_chirps(epoch2,
                        era='_S2',
                        training=False,
                        dask_chunks=dask_chunks)

    # add slope
    url_slope = "https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/srtm_dem/srtm_africa_slope.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds.geobox)
    slope = slope.to_dataset(name="slope").chunk(dask_chunks)

    result = xr.merge([epoch1, epoch2, slope], compat="override")

    result = result.astype(np.float32)
    return result.squeeze()
Esempio n. 14
0
def annual_gm_mads_evi_training(ds):
    dc = datacube.Datacube(app='training')
    
    # grab gm+tmads
    gm_mads=dc.load(product='ga_s2_gm',time='2019',like=ds.geobox,
                   measurements=['red', 'blue', 'green', 'nir',
                                 'swir_1', 'swir_2', 'red_edge_1',
                                 'red_edge_2', 'red_edge_3', 'SMAD',
                                 'BCMAD','EMAD'])
    
    gm_mads['SMAD'] = -np.log(gm_mads['SMAD'])
    gm_mads['BCMAD'] = -np.log(gm_mads['BCMAD'])
    gm_mads['EMAD'] = -np.log(gm_mads['EMAD']/10000)
    
    #calculate band indices on gm
    gm_mads = calculate_indices(gm_mads,
                               index=['EVI','LAI','MNDWI'],
                               drop=False,
                               collection='s2')
    
    #normalise spectral GM bands 0-1
    for band in gm_mads.data_vars:
        if band not in ['SMAD', 'BCMAD','EMAD', 'EVI', 'LAI', 'MNDWI']:
            gm_mads[band] = gm_mads[band] / 10000
    
    #calculate EVI on annual timeseries
    evi = calculate_indices(ds,index=['EVI'], drop=True, normalise=True, collection='s2')
    
    # EVI stats 
    gm_mads['evi_std'] = evi.EVI.std(dim='time')
    gm_mads['evi_10'] = evi.EVI.quantile(0.1, dim='time')
    gm_mads['evi_25'] = evi.EVI.quantile(0.25, dim='time')
    gm_mads['evi_75'] = evi.EVI.quantile(0.75, dim='time')
    gm_mads['evi_90'] = evi.EVI.quantile(0.9, dim='time')
    gm_mads['evi_range'] = gm_mads['evi_90'] - gm_mads['evi_10']
    
    #rainfall climatology
    chirps_S1 = xr_reproject(assign_crs(xr.open_rasterio('/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc'),
                                        crs='epsg:4326'), ds.geobox,"bilinear")
    
    chirps_S2 = xr_reproject(assign_crs(xr.open_rasterio('/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc'), 
                                        crs='epsg:4326'), ds.geobox,"bilinear")
        
    gm_mads['rain_S1'] = chirps_S1
    gm_mads['rain_S2'] = chirps_S2
    
    #slope
    url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds.geobox)
    slope = slope.to_dataset(name='slope')#.chunk({'x':2000,'y':2000})
    
    result = xr.merge([gm_mads,slope],compat='override')

    return result.squeeze()
Esempio n. 15
0
def gm_mads_two_seasons_predict(ds):
    dc = datacube.Datacube(app='training')
    ds = ds / 10_000
    ds1 = ds.sel(time=slice('2019-01', '2019-06'))
    ds2 = ds.sel(time=slice('2019-07', '2019-12'))

    def fun(ds, era):
        #geomedian and tmads
        #gm_mads = xr_geomedian_tmad(ds)
        gm_mads = xr_geomedian_tmad_new(ds).compute()
        gm_mads = calculate_indices(gm_mads,
                                    index=['NDVI', 'LAI', 'MNDWI'],
                                    drop=False,
                                    normalise=False,
                                    collection='s2')

        gm_mads['sdev'] = -np.log(gm_mads['sdev'])
        gm_mads['bcdev'] = -np.log(gm_mads['bcdev'])
        gm_mads['edev'] = -np.log(gm_mads['edev'])
        gm_mads = gm_mads.chunk({'x': 2000, 'y': 2000})

        #rainfall climatology
        if era == '_S1':
            chirps = assign_crs(xr.open_rasterio(
                '/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc'
            ),
                                crs='epsg:4326')
        if era == '_S2':
            chirps = assign_crs(xr.open_rasterio(
                '/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc'
            ),
                                crs='epsg:4326')

        chirps = xr_reproject(chirps, ds.geobox, "bilinear")
        chirps = chirps.chunk({'x': 2000, 'y': 2000})
        gm_mads['rain'] = chirps

        for band in gm_mads.data_vars:
            gm_mads = gm_mads.rename({band: band + era})

        return gm_mads

    epoch1 = fun(ds1, era='_S1')
    epoch2 = fun(ds2, era='_S2')

    #slope
    url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds.geobox)
    slope = slope.to_dataset(name='slope').chunk({'x': 2000, 'y': 2000})

    result = xr.merge([epoch1, epoch2, slope], compat='override')

    return result.squeeze()
Esempio n. 16
0
def test_cog_mem(tmpdir, shape):
    pp = Path(str(tmpdir))
    xx, ds = gen_test_data(pp, shape=shape)

    # write to memory 1
    bb = write_cog(xx, ":mem:")
    assert isinstance(bb, bytes)
    path = pp / "cog1.tiff"
    with open(str(path), "wb") as f:
        f.write(bb)

    yy = rio_slurp_xarray(path)
    np.testing.assert_array_equal(yy.values, xx.values)
    assert yy.geobox == xx.geobox
    assert yy.nodata == xx.nodata

    # write to memory 2
    bb = to_cog(xx)
    assert isinstance(bb, bytes)
    path = pp / "cog2.tiff"
    with open(str(path), "wb") as f:
        f.write(bb)

    yy = rio_slurp_xarray(path)
    np.testing.assert_array_equal(yy.values, xx.values)
    assert yy.geobox == xx.geobox
    assert yy.nodata == xx.nodata

    # write to memory 3 -- no overviews
    bb = to_cog(xx, overview_levels=[])
    assert isinstance(bb, bytes)
    path = pp / "cog3.tiff"
    with open(str(path), "wb") as f:
        f.write(bb)

    yy = rio_slurp_xarray(path)
    np.testing.assert_array_equal(yy.values, xx.values)
    assert yy.geobox == xx.geobox
    assert yy.nodata == xx.nodata
Esempio n. 17
0
def merge_tile_ds(
    x: int,
    y: int,
    config: FeaturePathConfig,
    geobox_dict: Dict[Tuple, GeoBox],
    gm_ds: Optional[xr.Dataset] = None,
) -> Tuple[str, GeoBox, xr.Dataset]:
    """
    overall all tile tifs and additional features merged  here,
    the xarray dataset, 3 extra indi:withces, integration of rainfall, slope with gm_ds
    :param gm_ds:
    :param x: tile index x
    :param y: time inde y
    :param config: FeaturePathConfig containing the model path and product info`et al.
    :param geobox_dict: geobox will calculate the tile geometry from the tile index
    :return: subfolder path and the xarray dataset of the features
    """
    # this folder naming x, y will change
    subfld = "x{x:+04d}/y{y:+04d}".format(x=x, y=y)
    P6M_tifs: Dict = get_tifs_paths(config.TIF_path, subfld)
    geobox = geobox_dict[(x, y)]
    seasoned_ds = {}
    for k, tifs in P6M_tifs.items():
        era = "_S1" if "2019-01--P6M" in k else "_S2"
        if not gm_ds:
            # no prepare base ds
            base_ds = merge_tifs_into_ds(k,
                                         tifs,
                                         rename_dict=config.rename_dict)
        else:
            base_ds = gm_ds
        # TODO: to validate the 6month geomedia is down scaled already.
        base_ds = down_scale_gm_band(base_ds)

        seasoned_ds[era] = complete_gm_mads(base_ds, geobox, era)

    slope = (rio_slurp_xarray(
        config.url_slope,
        gbox=geobox).drop("spatial_ref").to_dataset(name="slope"))

    return (
        subfld,
        geobox,
        xr.merge([seasoned_ds["_S1"], seasoned_ds["_S2"], slope],
                 compat="override").chunk({
                     "x": -1,
                     "y": -1
                 }),
    )
Esempio n. 18
0
def test_cog_file_dask(tmpdir):
    pp = Path(str(tmpdir))
    xx, ds = gen_test_data(pp, dask=True)
    assert dask.is_dask_collection(xx)

    path = pp / "cog.tif"
    ff = write_cog(xx, path, overview_levels=[2, 4])
    assert isinstance(ff, Delayed)
    assert path.exists() is False
    assert ff.compute() == path
    assert path.exists()

    yy = rio_slurp_xarray(pp / "cog.tif")
    np.testing.assert_array_equal(yy.values, xx.values)
    assert yy.geobox == xx.geobox
    assert yy.nodata == xx.nodata
Esempio n. 19
0
def test_cog_rgba(tmpdir):
    pp = Path(str(tmpdir))
    xx, ds = gen_test_data(pp)
    pix = np.dstack([xx.values] * 4)
    rgba = xr.DataArray(pix,
                        attrs=xx.attrs,
                        dims=('y', 'x', 'band'),
                        coords=xx.coords)
    assert(rgba.geobox == xx.geobox)
    assert(rgba.shape[:2] == rgba.geobox.shape)

    ff = write_cog(rgba, pp / "cog.tif")
    yy = rio_slurp_xarray(ff)

    assert(yy.geobox == rgba.geobox)
    assert(yy.shape == rgba.shape)
    np.testing.assert_array_equal(yy.values, rgba.values)

    with pytest.raises(ValueError):
        _write_cog(rgba.values[1:, :, :], rgba.geobox, ':mem:')
Esempio n. 20
0
def gm_mads_evi_rainfall(ds):
    """
    6 monthly and annual 
    gm + mads
    evi stats (10, 50, 90 percentile, range, std)
    rainfall actual stats (min, mean, max, range, std) from monthly data
    rainfall clim stats (min, mean, max, range, std) from monthly data
    """
    dc = datacube.Datacube(app='training')
    ds = ds / 10000
    ds = ds.rename({'nir_1':'nir_wide', 'nir_2':'nir'})
    ds1 = ds.sel(time=slice('2019-01', '2019-06'))
    ds2 = ds.sel(time=slice('2019-07', '2019-12')) 
    
    chirps = []
    chpclim = []
    for m in range(1,13):
        chirps.append(xr_reproject(assign_crs(xr.open_rasterio(f'/g/data/CHIRPS/monthly_2019/chirps-v2.0.2019.{m:02d}.tif').squeeze().expand_dims({'time':[m]}), crs='epsg:4326'), 
                                   ds.geobox, "bilinear"))
        chpclim.append(rio_slurp_xarray(f'https://deafrica-data-dev.s3.amazonaws.com/product-dev/deafrica_chpclim_50n_50s_{m:02d}.tif', gbox=ds.geobox, 
                                        resapling='bilinear').expand_dims({'time':[m]}))
    
    chirps = xr.concat(chirps, dim='time')
    chpclim = xr.concat(chpclim, dim='time')
   
    def fun(ds, chirps, chpclim, era):
        ds = calculate_indices(ds,
                               index=['EVI'],
                               drop=False,
                               normalise=False,
                               collection='s2')        
        #geomedian and tmads
        gm_mads = xr_geomedian_tmad(ds)
        gm_mads = calculate_indices(gm_mads,
                               index=['EVI','NDVI','LAI','MNDWI'],
                               drop=False,
                               normalise=False,
                               collection='s2')
        
        gm_mads['sdev'] = -np.log(gm_mads['sdev'])
        gm_mads['bcdev'] = -np.log(gm_mads['bcdev'])
        gm_mads['edev'] = -np.log(gm_mads['edev'])
        
        # EVI stats 
        gm_mads['evi_10'] = ds.EVI.quantile(0.1, dim='time')
        gm_mads['evi_50'] = ds.EVI.quantile(0.5, dim='time')
        gm_mads['evi_90'] = ds.EVI.quantile(0.9, dim='time')
        gm_mads['evi_range'] = gm_mads['evi_90'] - gm_mads['evi_10']
        gm_mads['evi_std'] = ds.EVI.std(dim='time')

        # rainfall actual
        gm_mads['rain_min'] = chirps.min(dim='time')
        gm_mads['rain_mean'] = chirps.mean(dim='time')
        gm_mads['rain_max'] = chirps.max(dim='time')
        gm_mads['rain_range'] = gm_mads['rain_max'] - gm_mads['rain_min']
        gm_mads['rain_std'] = chirps.std(dim='time')
         
        # rainfall climatology
        gm_mads['rainclim_min'] = chpclim.min(dim='time')
        gm_mads['rainclim_mean'] = chpclim.mean(dim='time')
        gm_mads['rainclim_max'] = chpclim.max(dim='time')
        gm_mads['rainclim_range'] = gm_mads['rainclim_max'] - gm_mads['rainclim_min']
        gm_mads['rainclim_std'] = chpclim.std(dim='time')
                
        for band in gm_mads.data_vars:
            gm_mads = gm_mads.rename({band:band+era})
        
        return gm_mads
    
    epoch0 = fun(ds, chirps, chpclim, era='_S0')
    time, month = slice('2019-01', '2019-06'), slice(1, 6)
    epoch1 = fun(ds.sel(time=time), chirps.sel(time=month), chpclim.sel(time=month), era='_S1')
    time, month = slice('2019-07', '2019-12'), slice(7, 12)
    epoch2 = fun(ds.sel(time=time), chirps.sel(time=month), chpclim.sel(time=month), era='_S2')
    
    #slope
    url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds.geobox)
    slope = slope.to_dataset(name='slope')
    
    result = xr.merge([epoch0,
                       epoch1,
                       epoch2,
                       slope],compat='override')
    
    return result.squeeze()
Esempio n. 21
0
def test_rio_slurp(tmpdir):
    w, h, dtype, nodata, ndw = 96, 64, 'int16', -999, 7

    pp = Path(str(tmpdir))

    aa = mk_test_image(w, h, dtype, nodata, nodata_width=ndw)

    assert aa.shape == (h, w)
    assert aa.dtype.name == dtype
    assert aa[10, 30] == (30 << 8) | 10
    assert aa[10, 11] == nodata

    aa0 = aa.copy()
    mm0 = write_gtiff(pp / "rio-slurp-aa.tif", aa, nodata=-999, overwrite=True)
    mm00 = write_gtiff(pp / "rio-slurp-aa-missing-nodata.tif",
                       aa,
                       nodata=None,
                       overwrite=True)

    aa, mm = rio_slurp(mm0.path)
    np.testing.assert_array_equal(aa, aa0)
    assert mm.gbox == mm0.gbox
    assert aa.shape == mm.gbox.shape
    xx = rio_slurp_xarray(mm0.path)
    assert mm.gbox == xx.geobox
    np.testing.assert_array_equal(xx.values, aa0)

    aa, mm = rio_slurp(mm0.path, aa0.shape)
    np.testing.assert_array_equal(aa, aa0)
    assert aa.shape == mm.gbox.shape
    assert mm.gbox is mm.src_gbox
    xx = rio_slurp_xarray(mm0.path, aa0.shape)
    assert mm.gbox == xx.geobox
    np.testing.assert_array_equal(xx.values, aa0)

    aa, mm = rio_slurp(mm0.path, (3, 7))
    assert aa.shape == (3, 7)
    assert aa.shape == mm.gbox.shape
    assert mm.gbox != mm.src_gbox
    assert mm.src_gbox == mm0.gbox
    assert mm.gbox.extent == mm0.gbox.extent

    aa, mm = rio_slurp(mm0.path, aa0.shape)
    np.testing.assert_array_equal(aa, aa0)
    assert aa.shape == mm.gbox.shape

    aa, mm = rio_slurp(mm0.path, mm0.gbox, resampling='nearest')
    np.testing.assert_array_equal(aa, aa0)
    xx = rio_slurp_xarray(mm0.path, mm0.gbox)
    assert mm.gbox == xx.geobox
    np.testing.assert_array_equal(xx.values, aa0)

    aa, mm = rio_slurp(mm0.path, gbox=mm0.gbox, dtype='float32')
    assert aa.dtype == 'float32'
    np.testing.assert_array_equal(aa, aa0.astype('float32'))
    xx = rio_slurp_xarray(mm0.path, gbox=mm0.gbox)
    assert mm.gbox == xx.geobox
    assert mm.nodata == xx.nodata
    np.testing.assert_array_equal(xx.values, aa0)

    aa, mm = rio_slurp(mm0.path, mm0.gbox, dst_nodata=-33)
    np.testing.assert_array_equal(aa == -33, aa0 == -999)

    aa, mm = rio_slurp(mm00.path, mm00.gbox, dst_nodata=None)
    np.testing.assert_array_equal(aa, aa0)
Esempio n. 22
0
def post_processing(predicted):
    """
    filter prediction results with post processing filters.
    :param predicted: The prediction results

    """

    dc = Datacube(app='whatever')

    # grab predictions and proba for post process filtering
    predict = predicted.Predictions
    #     proba = predicted.Probabilities
    #     proba = proba.where(predict == 1, 100 - proba)  # crop proba only

    #     #------image seg and filtering -------------
    #     # write out ndvi for image seg
    #     ndvi = assign_crs(predicted[["NDVI_S1", "NDVI_S2"]],
    #                       crs=predicted.geobox.crs)

    #     # call function with dask delayed
    #     filtered = image_segmentation(ndvi, predict)

    #     # convert delayed object to dask array
    #     filtered = dask.array.from_delayed(filtered.squeeze(),
    #                                        shape=predict.shape,
    #                                        dtype=np.int8)

    #     # convert dask array to xr.Datarray
    #     filtered = xr.DataArray(filtered,
    #                             coords=predict.coords,
    #                             attrs=predict.attrs)

    # --Post process masking------------------------------------------------

    # merge back together for masking
    ds = xr.Dataset({"mask":
                     predict})  #, "prob": proba, "filtered": filtered})

    # mask out classification beyond AEZ boundary
    gdf = gpd.read_file(
        'https://github.com/digitalearthafrica/crop-mask/blob/main/testing/eastern_cropmask/data/Eastern.geojson?raw=true'
    )
    with HiddenPrints():
        mask = xr_rasterize(gdf, predicted)
    mask = mask.chunk({})
    ds = ds.where(mask, 0)

    # mask with WDPA
    wdpa = rio_slurp_xarray(
        "s3://deafrica-input-datasets/protected_areas/WDPA_eastern.tif",
        gbox=predicted.geobox)
    wdpa = wdpa.chunk({})
    wdpa = wdpa.astype(bool)
    ds = ds.where(~wdpa, 0)

    # mask with WOFS
    wofs = dc.load(product="ga_ls8c_wofs_2_summary",
                   like=predicted.geobox,
                   dask_chunks={})
    wofs = wofs.frequency > 0.2  # threshold
    ds = ds.where(~wofs, 0)

    # mask steep slopes
    slope = rio_slurp_xarray(
        'https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif',
        gbox=predicted.geobox)
    slope = slope.chunk({})
    slope = slope > 35
    ds = ds.where(~slope, 0)

    # mask where the elevation is above 3600m
    elevation = dc.load(product="dem_srtm",
                        like=predicted.geobox,
                        dask_chunks={})
    elevation = elevation.elevation > 3600  # threshold
    ds = ds.where(~elevation.squeeze(), 0)

    return ds.squeeze()
Esempio n. 23
0
def gm_mads_two_seasons_production(x, y):
    """
    Feature layer function for production run of
    eastern crop-mask
    """
    rename_dict = {
        "B02": "blue",
        "B03": "green",
        "B04": "red",
        "B05": "red_edge_1",
        "B06": "red_edge_2",
        "B07": "red_edge_3",
        "B08": "nir",
        "B8A": "nir_narrow",
        "B11": "swir_1",
        "B12": "swir_2",
        "BCMAD": "bcdev",
        "EMAD": "edev",
        "SMAD": "sdev",
    }

    training_features = [
        "red_S1",
        "blue_S1",
        "green_S1",
        "nir_S1",
        "swir_1_S1",
        "swir_2_S1",
        "red_edge_1_S1",
        "red_edge_2_S1",
        "red_edge_3_S1",
        "edev_S1",
        "sdev_S1",
        "bcdev_S1",
        "NDVI_S1",
        "LAI_S1",
        "MNDWI_S1",
        "rain_S1",
        "red_S2",
        "blue_S2",
        "green_S2",
        "nir_S2",
        "swir_1_S2",
        "swir_2_S2",
        "red_edge_1_S2",
        "red_edge_2_S2",
        "red_edge_3_S2",
        "edev_S2",
        "sdev_S2",
        "bcdev_S2",
        "NDVI_S2",
        "LAI_S2",
        "MNDWI_S2",
        "rain_S2",
        "slope",
    ]

    DATA_PATH = "/g/data/u23/data/"
    TIF_path = osp.join(DATA_PATH, "tifs20")
    subfld = "x{x:+04d}/y{y:+04d}/".format(x=x, y=y)
    P6M_tifs = get_tifs_paths(TIF_path, subfld)

    seasoned_ds = {}
    for k, tifs in P6M_tifs.items():
        era = "_S1" if "2019-01--P6M" in k else "_S2"
        base_ds = merge_tifs_into_ds(k, tifs, rename_dict=rename_dict)

        seasoned_ds[era] = base_ds

    #convert from bands to features
    epoch1 = features(seasoned_ds['_S1'], era='_S1')
    epoch2 = features(seasoned_ds['_S2'], era='_S2')

    #append slope
    url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope = rio_slurp_xarray(url_slope, epoch2.geobox)
    slope = slope.to_dataset(name='slope')

    #merge everything
    result = xr.merge([epoch1, epoch2, slope], compat='override')

    #order the features correctly
    result = result[training_features]
    result = result.astype(np.float32)

    return result.squeeze()
Esempio n. 24
0
def gm_mads_two_seasons(geobox):
    """
    Feature layer function for production run of
    eastern crop-mask


    """
    dc = datacube.Datacube(app="prediction")
    # load the data
    measurements = [
        "blue",
        "green",
        "red",
        "nir",
        "swir_1",
        "swir_2",
        "red_edge_1",
        "red_edge_2",
        "red_edge_3",
        "bcdev",
        "edev",
        "sdev",
    ]
    ds1 = dc.load(
        product="ga_s2_gm", time="2019", measurements=measurements, like=geobox
    )
    ds2 = dc.load(
        product="ga_s2_gm", time="2019", measurements=measurements, like=geobox
    )

    dss = {"S1": ds1, "S2": ds2}

    def fun(ds, era):
        # normalise SR and edev bands
        for band in ds.data_vars:
            if band not in ["sdev", "bcdev"]:
                ds[band] = ds[band] / 10000

        gm_mads = calculate_indices(
            ds,
            index=["NDVI", "LAI", "MNDWI"],
            drop=False,
            normalise=False,
            collection="s2",
        )

        gm_mads["sdev"] = -np.log(gm_mads["sdev"])
        gm_mads["bcdev"] = -np.log(gm_mads["bcdev"])
        gm_mads["edev"] = -np.log(gm_mads["edev"])

        # rainfall climatology
        if era == "_S1":
            chirps = assign_crs(
                xr.open_rasterio(
                    "/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc"
                ),
                crs="epsg:4326",
            )
        if era == "_S2":
            chirps = assign_crs(
                xr.open_rasterio(
                    "/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc"
                ),
                crs="epsg:4326",
            )

        # Clip CHIRPS to ~ S2 tile boundaries so we can handle NaNs local to S2 tile
        xmin, xmax = ds.x.values[0], ds.x.values[-1]
        ymin, ymax = ds.y.values[0], ds.y.values[-1]
        inProj = Proj("epsg:6933")
        outProj = Proj("epsg:4326")
        xmin, ymin = transform(inProj, outProj, xmin, ymin)
        xmax, ymax = transform(inProj, outProj, xmax, ymax)

        # create lat/lon indexing slices - buffer S2 bbox by 0.05deg
        if (xmin < 0) & (xmax < 0):
            x_slice = list(np.arange(xmin + 0.05, xmax - 0.05, -0.05))
        else:
            x_slice = list(np.arange(xmax - 0.05, xmin + 0.05, 0.05))

        if (ymin < 0) & (ymax < 0):
            y_slice = list(np.arange(ymin + 0.05, ymax - 0.05, -0.05))
        else:
            y_slice = list(np.arange(ymin - 0.05, ymax + 0.05, 0.05))

        # index global chirps using buffered s2 tile bbox
        chirps = assign_crs(chirps.sel(x=y_slice, y=x_slice, method="nearest"))

        # fill any NaNs in CHIRPS with local (s2-tile bbox) mean
        chirps = chirps.fillna(chirps.mean())
        chirps = xr_reproject(chirps, ds.geobox, "bilinear")
        gm_mads["rain"] = chirps

        for band in gm_mads.data_vars:
            gm_mads = gm_mads.rename({band: band + era})

        return gm_mads

    epoch1 = fun(dss["S1"], era="_S1")
    epoch2 = fun(dss["S1"], era="_S2")

    # slope
    url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds1.geobox)
    slope = slope.to_dataset(name="slope")

    result = xr.merge([epoch1, epoch2, slope], compat="override")

    result = result.astype(np.float32)
    return result.squeeze()
Esempio n. 25
0
def annual_gm_mads_evi_training(ds):
    dc = datacube.Datacube(app="training")

    # grab gm+tmads
    gm_mads = dc.load(
        product="ga_s2_gm",
        time="2019",
        like=ds.geobox,
        measurements=[
            "red",
            "blue",
            "green",
            "nir",
            "swir_1",
            "swir_2",
            "red_edge_1",
            "red_edge_2",
            "red_edge_3",
            "SMAD",
            "BCMAD",
            "EMAD",
        ],
    )

    gm_mads["SMAD"] = -np.log(gm_mads["SMAD"])
    gm_mads["BCMAD"] = -np.log(gm_mads["BCMAD"])
    gm_mads["EMAD"] = -np.log(gm_mads["EMAD"] / 10000)

    # calculate band indices on gm
    gm_mads = calculate_indices(
        gm_mads, index=["EVI", "LAI", "MNDWI"], drop=False, collection="s2"
    )

    # normalise spectral GM bands 0-1
    for band in gm_mads.data_vars:
        if band not in ["SMAD", "BCMAD", "EMAD", "EVI", "LAI", "MNDWI"]:
            gm_mads[band] = gm_mads[band] / 10000

    # calculate EVI on annual timeseries
    evi = calculate_indices(
        ds, index=["EVI"], drop=True, normalise=True, collection="s2"
    )

    # EVI stats
    gm_mads["evi_std"] = evi.EVI.std(dim="time")
    gm_mads["evi_10"] = evi.EVI.quantile(0.1, dim="time")
    gm_mads["evi_25"] = evi.EVI.quantile(0.25, dim="time")
    gm_mads["evi_75"] = evi.EVI.quantile(0.75, dim="time")
    gm_mads["evi_90"] = evi.EVI.quantile(0.9, dim="time")
    gm_mads["evi_range"] = gm_mads["evi_90"] - gm_mads["evi_10"]

    # rainfall climatology
    chirps_S1 = xr_reproject(
        assign_crs(
            xr.open_rasterio(
                "/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc"
            ),
            crs="epsg:4326",
        ),
        ds.geobox,
        "bilinear",
    )

    chirps_S2 = xr_reproject(
        assign_crs(
            xr.open_rasterio(
                "/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc"
            ),
            crs="epsg:4326",
        ),
        ds.geobox,
        "bilinear",
    )

    gm_mads["rain_S1"] = chirps_S1
    gm_mads["rain_S2"] = chirps_S2

    # slope
    url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds.geobox)
    slope = slope.to_dataset(name="slope")  # .chunk({'x':2000,'y':2000})

    result = xr.merge([gm_mads, slope], compat="override")

    return result.squeeze()
Esempio n. 26
0
def gm_mads_evi_rainfall(ds):
    """
    6 monthly and annual
    gm + mads
    evi stats (10, 50, 90 percentile, range, std)
    rainfall actual stats (min, mean, max, range, std) from monthly data
    rainfall clim stats (min, mean, max, range, std) from monthly data
    """
    dc = datacube.Datacube(app="training")
    ds = ds / 10000
    ds = ds.rename({"nir_1": "nir_wide", "nir_2": "nir"})
    ds1 = ds.sel(time=slice("2019-01", "2019-06"))
    ds2 = ds.sel(time=slice("2019-07", "2019-12"))

    chirps = []
    chpclim = []
    for m in range(1, 13):
        chirps.append(
            xr_reproject(
                assign_crs(
                    xr.open_rasterio(
                        f"/g/data/CHIRPS/monthly_2019/chirps-v2.0.2019.{m:02d}.tif"
                    )
                    .squeeze()
                    .expand_dims({"time": [m]}),
                    crs="epsg:4326",
                ),
                ds.geobox,
                "bilinear",
            )
        )
        chpclim.append(
            rio_slurp_xarray(
                f"https://deafrica-data-dev.s3.amazonaws.com/product-dev/deafrica_chpclim_50n_50s_{m:02d}.tif",
                gbox=ds.geobox,
                resapling="bilinear",
            ).expand_dims({"time": [m]})
        )

    chirps = xr.concat(chirps, dim="time")
    chpclim = xr.concat(chpclim, dim="time")

    def fun(ds, chirps, chpclim, era):
        ds = calculate_indices(
            ds, index=["EVI"], drop=False, normalise=False, collection="s2"
        )
        # geomedian and tmads
        gm_mads = xr_geomedian_tmad(ds)
        gm_mads = calculate_indices(
            gm_mads,
            index=["EVI", "NDVI", "LAI", "MNDWI"],
            drop=False,
            normalise=False,
            collection="s2",
        )

        gm_mads["sdev"] = -np.log(gm_mads["sdev"])
        gm_mads["bcdev"] = -np.log(gm_mads["bcdev"])
        gm_mads["edev"] = -np.log(gm_mads["edev"])

        # EVI stats
        gm_mads["evi_10"] = ds.EVI.quantile(0.1, dim="time")
        gm_mads["evi_50"] = ds.EVI.quantile(0.5, dim="time")
        gm_mads["evi_90"] = ds.EVI.quantile(0.9, dim="time")
        gm_mads["evi_range"] = gm_mads["evi_90"] - gm_mads["evi_10"]
        gm_mads["evi_std"] = ds.EVI.std(dim="time")

        # rainfall actual
        gm_mads["rain_min"] = chirps.min(dim="time")
        gm_mads["rain_mean"] = chirps.mean(dim="time")
        gm_mads["rain_max"] = chirps.max(dim="time")
        gm_mads["rain_range"] = gm_mads["rain_max"] - gm_mads["rain_min"]
        gm_mads["rain_std"] = chirps.std(dim="time")

        # rainfall climatology
        gm_mads["rainclim_min"] = chpclim.min(dim="time")
        gm_mads["rainclim_mean"] = chpclim.mean(dim="time")
        gm_mads["rainclim_max"] = chpclim.max(dim="time")
        gm_mads["rainclim_range"] = gm_mads["rainclim_max"] - gm_mads["rainclim_min"]
        gm_mads["rainclim_std"] = chpclim.std(dim="time")

        for band in gm_mads.data_vars:
            gm_mads = gm_mads.rename({band: band + era})

        return gm_mads

    epoch0 = fun(ds, chirps, chpclim, era="_S0")
    time, month = slice("2019-01", "2019-06"), slice(1, 6)
    epoch1 = fun(
        ds.sel(time=time), chirps.sel(time=month), chpclim.sel(time=month), era="_S1"
    )
    time, month = slice("2019-07", "2019-12"), slice(7, 12)
    epoch2 = fun(
        ds.sel(time=time), chirps.sel(time=month), chpclim.sel(time=month), era="_S2"
    )

    # slope
    url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds.geobox)
    slope = slope.to_dataset(name="slope")

    result = xr.merge([epoch0, epoch1, epoch2, slope], compat="override")

    return result.squeeze()
Esempio n. 27
0
def post_processing(
    predicted: xr.Dataset, urls: Dict[str, Any]
) -> Tuple[xr.DataArray, xr.DataArray, xr.DataArray]:
    """
    Run the delayed post_processing functions, then create a lazy
    xr.Dataset to satisfy odc-stats
    """
    dc = Datacube(app="whatever")

    # grab predictions and proba for post process filtering
    predict = predicted.Predictions
    proba = predicted.Probabilities
    proba = proba.where(predict == 1, 100 - proba)  # crop proba only

    # ------image seg and filtering -------------
    # write out ndvi for image seg
    ndvi = assign_crs(predicted[["NDVI_S1", "NDVI_S2"]], crs=predicted.geobox.crs)

    # call function with dask delayed
    filtered = image_segmentation(ndvi, predict)

    # convert delayed object to dask array
    filtered = dask.array.from_delayed(
        filtered.squeeze(), shape=predict.shape, dtype=np.uint8
    )

    # convert dask array to xr.Datarray
    filtered = xr.DataArray(filtered, coords=predict.coords, attrs=predict.attrs)

    # --Post process masking----------------------------------------

    # merge back together for masking
    ds = xr.Dataset({"mask": predict, "prob": proba, "filtered": filtered})

    # mask out classification beyond AEZ boundary
    gdf = gpd.read_file(urls["aez"])
    with HiddenPrints():
        mask = xr_rasterize(gdf, predicted)
    mask = mask.chunk({})
    ds = ds.where(mask, 0)

    # mask with WDPA
    wdpa = rio_slurp_xarray(urls["wdpa"], gbox=predicted.geobox)
    wdpa = wdpa.chunk({})
    wdpa = wdpa.astype(bool)
    ds = ds.where(~wdpa, 0)

    # mask with WOFS
    wofs=dc.load(product='wofs_ls_summary_annual',
                 like=predicted.geobox,
                 dask_chunks={},
                 time=('2019'))
    wofs=wofs.frequency > 0.20 # threshold
    ds=ds.where(~wofs, 0)

    # mask steep slopes
    slope = rio_slurp_xarray(urls["slope"], gbox=predicted.geobox)
    slope = slope.chunk({})
    slope = slope > 50
    ds = ds.where(~slope, 0)

    # mask where the elevation is above 3600m
    elevation = dc.load(product="dem_srtm", like=predicted.geobox, dask_chunks={})
    elevation = elevation.elevation > 3600  # threshold
    ds = ds.where(~elevation.squeeze(), 0)

    return ds.squeeze()
Esempio n. 28
0
# from sklearn.impute import SimpleImputer

#dc = datacube.Datacube(config='/home/547/sc0554/datacube.conf', env='lccs_dev')

#query = {'time': ('2015-01-01', '2015-12-31')}
#query['crs'] = 'EPSG:3577'

#data = dc.load(product='fc_percentile_albers_annual', measurements='PV_PC_90', **query)
data = xr.open_rasterio(
    '/g/data/r78/LCCS_Aberystwyth/urban_tests/test_sites_peter/perth_2015_gm.tif'
)
data = assign_crs(data, crs='epsg:3577')
# quickshift expects multiband images with bands in the last dimension
data = data.transpose()
fname = '/g/data/r78/LCCS_Aberystwyth/continental_run_april2020/2015/lccs_2015_L4_0.5.0.tif'
LCCS = rio_slurp_xarray(fname, gbox=data.geobox)
LCCS = LCCS.isel(band=0)
print("LCCS shape", LCCS.shape)
meta_d = LCCS.copy()  ##.squeeze().drop('time')
seg = felzenszwalb(LCCS.data.transpose())
#seg = quickshift(LCCS.data.transpose(), kernel_size=3, convert2lab=False, max_dist=10, ratio=0.5)
print('seg shape', seg.shape)
data_seg_med = scipy.ndimage.median(input=LCCS.data.transpose(),
                                    labels=seg,
                                    index=seg)
#data_seg_med = data_seg_med.squeeze("time").drop("time")
print("seg_med shape", data_seg_med.shape)
out = xr.DataArray(data=data_seg_med.transpose(),
                   dims=meta_d.dims,
                   coords=meta_d.coords,
                   attrs=meta_d.attrs)