Python Datacube Beispiele, datacube.Datacube Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: process.py Projekt: sgm-projects/sgm-opendatacube

def calculate_index_task(params):
    item = params.get('item')
    index = params.get('index', 'rgb')
    dc = Datacube(config="datacube.conf")
    product = "ls8_level1_usgs"
    x = (item["bbox"][0], item["bbox"][2])
    y = (item["bbox"][1], item["bbox"][3])
    time = item["properties"]["datetime"].split("T")[0]
    measurements = ["band_2", "band_3", "band_4"]

    query = {
        'x': x,
        'y': y,
        'time': time,
        'measurements': ['nbart_red', 'nbart_green', 'nbart_blue'],
        'output_crs': 'EPSG:4326',
        'resolution': (-0.001, 0.001),
    }

    ds = dc.load(product=product, **query)
    print(ds)
    rgb_da = ds.to_array()
    suffix = 'rgb'
    filename = f'{item["id"]}_{suffix}.tif'
    path = config.STATIC_DIR / filename
    write_cog(geo_im=rgb_da, fname='rgb.tif', overwrite=True)
    return {"success": True, "url": str(path)}

Beispiel #2

0

Datei anzeigen

def check_data_with_api(index, time_slices):
    """Chek retrieved data for specific values.

    We scale down by 100 and check for predefined values in the
    corners.
    """
    from datacube import Datacube
    dc = Datacube(index=index)

    # Make the retrieved data 100 less granular
    shape_x = int(GEOTIFF['shape']['x'] / 100.0)
    shape_y = int(GEOTIFF['shape']['y'] / 100.0)
    pixel_x = int(GEOTIFF['pixel_size']['x'] * 100)
    pixel_y = int(GEOTIFF['pixel_size']['y'] * 100)

    input_type_name = 'ls5_nbar_albers'
    input_type = dc.index.products.get_by_name(input_type_name)
    geobox = geometry.GeoBox(
        shape_x + 1, shape_y + 1,
        Affine(pixel_x, 0.0, GEOTIFF['ul']['x'], 0.0, pixel_y,
               GEOTIFF['ul']['y']), geometry.CRS(GEOTIFF['crs']))
    observations = dc.find_datasets(product='ls5_nbar_albers',
                                    geopolygon=geobox.extent)
    group_by = query_group_by('time')
    sources = dc.group_datasets(observations, group_by)
    data = dc.load_data(sources, geobox, input_type.measurements.values())
    assert hashlib.md5(
        data.green.data).hexdigest() == '7f5ace486e88d33edf3512e8de6b6996'
    assert hashlib.md5(
        data.blue.data).hexdigest() == 'b58204f1e10dd678b292df188c242c7e'
    for time_slice in range(time_slices):
        assert data.blue.values[time_slice][-1, -1] == -999

Beispiel #3

0

Datei anzeigen

def xadataset_from_odcdataset(datasets: Union[List[ODCDataset],
                                              ODCDataset] = None,
                              ids: Union[List[UUID], UUID] = None,
                              measurements: List[str] = None) -> xa.Dataset:
    """ Loads a xaDataset from ODCDatasets or ODCDataset ids
     :param datasets: ODCDataset(s), optional
     :param ids: ODCDataset id(s), optional
     :param measurements: list of measurements/bands to load, optional
     :return: xa.Dataset containing given ODCDatasets or IDs """

    dc = Datacube(app="dataset_from_ODCDataset")

    if not datasets:
        if not isinstance(ids, list):
            ids = [ids]
        datasets = [dc.index.datasets.get(id_) for id_ in ids]

    if not isinstance(datasets, list):
        datasets = [datasets]

    product_name = datasets[0].metadata_doc["product"]["name"]
    crs = datasets[0].crs
    res = (10, -10)  # TODO: handle other resolutions

    ds = dc.load(product=product_name,
                 dask_chunks={},
                 measurements=measurements,
                 output_crs=str(crs),
                 resolution=res,
                 datasets=datasets)
    return ds

Beispiel #4

0

Datei anzeigen

Datei: tasks.py Projekt: shawndegroot/datacube-stats

    def __call__(self, index, product, time, group_by) -> Tile:
        # Do for a specific poly whose boundary is known
        output_crs = CRS(self.storage['crs'])
        filtered_items = [
            'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y'
        ]
        filtered_dict = {
            k: v
            for k, v in self.input_region.items() if k in filtered_items
        }
        if self.feature is not None:
            filtered_dict['geopolygon'] = self.feature.geopolygon
            geopoly = filtered_dict['geopolygon']
        else:
            geopoly = query_geopolygon(**self.input_region)

        dc = Datacube(index=index)
        datasets = dc.find_datasets(product=product,
                                    time=time,
                                    group_by=group_by,
                                    **filtered_dict)
        group_by = query_group_by(group_by=group_by)
        sources = dc.group_datasets(datasets, group_by)
        output_resolution = [
            self.storage['resolution'][dim] for dim in output_crs.dimensions
        ]
        geopoly = geopoly.to_crs(output_crs)
        geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution)

        return Tile(sources, geobox)

Beispiel #5

0

Datei anzeigen

class IndexGEETestCase(unittest.TestCase):
    def setUp(self):
        self.datacube = Datacube(config=DATACUBE_CONFIG)

        IndexerTestCase().test_product_generation()
        product = self.datacube.index.products.get_by_name('ls8_test')
        if product is None:
            self.skipTest('No product available to index')
        datasets = self.datacube.find_datasets(product='ls8_test')
        if datasets:
            self.skipTest('Indexed datasets already exist in database')

    def test_index_gee(self):
        product = 'ls8_test'
        latitude = (-4.15, -3.90)
        longitude = (39.50, 39.75)
        time = '2020-01'

        cmd = [
            "index_gee", "--product", product, "--latitude",
            str(latitude), "--longitude",
            str(longitude), "--time", time, "--config", DATACUBE_CONFIG,
            "--no_confirm", "-u"
        ]
        subprocess.check_output(cmd)
        datasets = self.datacube.find_datasets(product=product)
        self.assertGreater(len(datasets), 0,
                           'Expected to find datasets in index')

Beispiel #6

0

Datei anzeigen

Datei: io.py Projekt: nikhil003/datacube-core

def native_load(ds, measurements=None, basis=None, **kw):
    """Load single dataset in native resolution.

    :param ds: Dataset
    :param measurements: List of band names to load
    :param basis: Name of the band to use for computing reference frame, other
    bands might be reprojected if they use different pixel grid

    :param **kw: Any other parameter load_data accepts

    :return: Xarray dataset
    """
    from datacube import Datacube
    geobox = native_geobox(
        ds, measurements,
        basis)  # early exit via exception if no compatible grid exists
    if measurements is not None:
        mm = [ds.type.measurements[n] for n in measurements]
    else:
        mm = ds.type.measurements

    return Datacube.load_data(Datacube.group_datasets([ds], 'time'),
                              geobox,
                              measurements=mm,
                              **kw)

Beispiel #7

0

Datei anzeigen

Datei: test_env.py Projekt: senani/digitalearthau

def collect_uris(prod_index, products, expressions):
    """
    Collect all URIs of datasets from products
    matching search expressions.
    """
    dc = Datacube(index=prod_index)
    for prod in products:
        for dataset in dc.find_datasets_lazy(product=prod, **expressions):
            yield normalize_uri(dataset.local_uri)

Beispiel #8

0

Datei anzeigen

Datei: product.py Projekt: prasunkgupta/datacube-iirs

def list_products(index):
    """
    List products that are defined in the index
    """
    dc = Datacube(index)
    products = dc.list_products()

    echo(products.to_string(columns=('name', 'description', 'product_type', 'instrument',
                                     'format', 'platform'),
                            justify='left'))

Beispiel #9

0

Datei anzeigen

    def setUp(self):
        self.datacube = Datacube(config=DATACUBE_CONFIG)

        IndexerTestCase().test_product_generation()
        product = self.datacube.index.products.get_by_name('ls8_test')
        if product is None:
            self.skipTest('No product available to index')
        datasets = self.datacube.find_datasets(product='ls8_test')
        if datasets:
            self.skipTest('Indexed datasets already exist in database')

Beispiel #10

0

Datei anzeigen

Datei: mosaic.py Projekt: GispoCoding/CFSI

 def __get_mask_datasets(self) -> List[ODCDataset]:
     """ Finds mask datasets based on config """
     dc = Datacube(app="mosaic_creator")
     time_range = (str(self.__start_date), str(self.__end_date))
     datasets = dc.find_datasets(product=self.__product_name, time=time_range)
     if not datasets:
         LOGGER.warning("No mask datasets found for"
                        f"product={self.__product_name}, time={time_range}")
         raise ValueError("No datasets found")  # TODO: custom exception
     return datasets

Beispiel #11

0

Datei anzeigen

Datei: test_wofs.py Projekt: leochencipher/wofs

def test_woffles(query, expected):
    dc = Datacube(app='test_wofls')

    bands = ['blue', 'green', 'red', 'nir', 'swir1', 'swir2']  # inputs needed from EO data)
    source = dc.load(product='ls8_nbar_albers', measurements=bands, **query)
    pq = dc.load(product='ls8_pq_albers', like=source)
    dsm = dc.load(product='dsm1sv10', like=source, time=('1900-01-01', '2100-01-01'), resampling='cubic')

    wofls_output = woffles(*(x.isel(time=0) for x in [source, pq, dsm]))

    assert (wofls_output == expected).all()

Beispiel #12

0

Datei anzeigen

Datei: product.py Projekt: sharat910/datacube-iirs

def list_products(index):
    """
    List products that are defined in the index
    """
    dc = Datacube(index)
    products = dc.list_products()

    echo(
        products.to_string(columns=('name', 'description', 'product_type',
                                    'instrument', 'format', 'platform'),
                           justify='left'))

Beispiel #13

0

Datei anzeigen

def test_load_data(tmpdir):
    tmpdir = Path(str(tmpdir))

    group_by = query_group_by('time')
    spatial = dict(resolution=(15, -15),
                   offset=(11230, 1381110),)

    nodata = -999
    aa = mk_test_image(96, 64, 'int16', nodata=nodata)

    ds, gbox = gen_tiff_dataset([SimpleNamespace(name='aa', values=aa, nodata=nodata)],
                                tmpdir,
                                prefix='ds1-',
                                timestamp='2018-07-19',
                                **spatial)
    assert ds.time is not None

    ds2, _ = gen_tiff_dataset([SimpleNamespace(name='aa', values=aa, nodata=nodata)],
                              tmpdir,
                              prefix='ds2-',
                              timestamp='2018-07-19',
                              **spatial)
    assert ds.time is not None
    assert ds.time == ds2.time

    sources = Datacube.group_datasets([ds], 'time')
    sources2 = Datacube.group_datasets([ds, ds2], group_by)

    mm = ['aa']
    mm = [ds.type.measurements[k] for k in mm]

    ds_data = Datacube.load_data(sources, gbox, mm)
    assert ds_data.aa.nodata == nodata
    np.testing.assert_array_equal(aa, ds_data.aa.values[0])

    custom_fuser_call_count = 0

    def custom_fuser(dest, delta):
        nonlocal custom_fuser_call_count
        custom_fuser_call_count += 1
        dest[:] += delta

    progress_call_data = []

    def progress_cbk(n, nt):
        progress_call_data.append((n, nt))

    ds_data = Datacube.load_data(sources2, gbox, mm, fuse_func=custom_fuser,
                                 progress_cbk=progress_cbk)
    assert ds_data.aa.nodata == nodata
    assert custom_fuser_call_count > 0
    np.testing.assert_array_equal(nodata + aa + aa, ds_data.aa.values[0])

    assert progress_call_data == [(1, 2), (2, 2)]

Beispiel #14

0

Datei anzeigen

Datei: test_full_ingestion.py Projekt: loicdtx/datacube-core

def check_open_with_api(driver_manager, time_slices):
    from datacube import Datacube
    dc = Datacube(driver_manager=driver_manager)

    input_type_name = 'ls5_nbar_albers'
    input_type = dc.index.products.get_by_name(input_type_name)
    geobox = geometry.GeoBox(200, 200, Affine(25, 0.0, 638000, 0.0, -25, 6276000), geometry.CRS('EPSG:28355'))
    observations = dc.find_datasets(product='ls5_nbar_albers', geopolygon=geobox.extent)
    group_by = query_group_by('time')
    sources = dc.group_datasets(observations, group_by)
    data = dc.load_data(sources, geobox, input_type.measurements.values(), driver_manager=driver_manager)
    assert data.blue.shape == (time_slices, 200, 200)

Beispiel #15

0

Datei anzeigen

Datei: post_processing.py Projekt: digitalearthafrica/crop-mask

def post_processing(predicted):
    """
    filter prediction results with post processing filters.
    
    Simplified from production code to skip
    segmentation, probability, and mode calcs

    """

    dc = Datacube(app='whatever')

    predict = predicted.Predictions

    #--Post process masking---------------------------------------------------------------
    #print("  masking with AEZ,WDPA,WOfS,slope & elevation")

    # mask out classification beyond AEZ boundary
    gdf = gpd.read_file('data/Sahel.geojson')
    with HiddenPrints():
        mask = xr_rasterize(gdf, predicted)
    predict = predict.where(mask, 0)

    # mask with WDPA
    #     url_wdpa="s3://deafrica-input-datasets/protected_areas/WDPA_southern.tif"
    #     wdpa=rio_slurp_xarray(url_wdpa, gbox=predicted.geobox)
    #     wdpa = wdpa.astype(bool)
    #     predict = predict.where(~wdpa, 0)

    #mask with WOFS
    wofs = dc.load(product='wofs_ls_summary_annual',
                   like=predicted.geobox,
                   time=('2019'))
    wofs = wofs.frequency > 0.2  # threshold
    predict = predict.where(~wofs, 0)

    #mask steep slopes
    url_slope = "https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/srtm_dem/srtm_africa_slope.tif"
    slope = rio_slurp_xarray(url_slope, gbox=predicted.geobox)
    slope = slope > 50
    predict = predict.where(~slope, 0)

    #mask where the elevation is above 3600m
    elevation = dc.load(product='dem_srtm', like=predicted.geobox)
    elevation = elevation.elevation > 3600  # threshold
    predict = predict.where(~elevation.squeeze(), 0)

    #set dtype
    predict = predict.astype(np.int8)

    return predict

Beispiel #16

0

Datei anzeigen

Datei: main.py Projekt: sgm-projects/sgm-opendatacube

async def get_products(lon1: float, lat1: float, lon2: float, lat2: float, date1: str = None, date2: str = None, limit: int = 10, days: int = 7):
    lon1, lon2 = min(lon1, lon2), max(lon1, lon2)
    lat1, lat2 = min(lat1, lat2), max(lat1, lat2)
    if not date1 or not date2:
        dates = None
    else:
        dates = (date1, date2)
    dc = Datacube(config=config.DATACUBE_CONF)
    product = dc.list_products(with_pandas=False)[0]
    print(product)
    datasets = dc.index.datasets.search(product=product["name"])
    print(list(datasets))
    for dataset in datasets:
        print(dataset)

Beispiel #17

0

Datei anzeigen

def test_query_dataset_multi_product(index: Index, ls5_dataset_w_children: Dataset):
    # We have one ls5 level1 and its child nbar
    dc = Datacube(index)

    # Can we query a single product name?
    datasets = dc.find_datasets(product='ls5_nbar_scene')
    assert len(datasets) == 1

    # Can we query multiple products?
    datasets = dc.find_datasets(product=['ls5_nbar_scene', 'ls5_level1_scene'])
    assert len(datasets) == 2

    # Can we query multiple products in a tuple
    datasets = dc.find_datasets(product=('ls5_nbar_scene', 'ls5_level1_scene'))
    assert len(datasets) == 2

Beispiel #18

0

Datei anzeigen

def check_open_with_api(index):
    from datacube import Datacube
    dc = Datacube(index=index)

    input_type_name = 'ls5_nbar_albers'
    input_type = dc.index.products.get_by_name(input_type_name)

    geobox = GeoBox(200, 200, Affine(25, 0.0, 1500000, 0.0, -25, -3900000),
                    CRS('EPSG:3577'))
    observations = dc.find_datasets(product='ls5_nbar_albers',
                                    geopolygon=geobox.extent)
    group_by = query_group_by('time')
    sources = dc.group_datasets(observations, group_by)
    data = dc.load_data(sources, geobox, input_type.measurements.values())
    assert data.blue.shape == (1, 200, 200)

Beispiel #19

0

Datei anzeigen

def post_processing(
    predicted: xr.Dataset,

) -> xr.DataArray:
    """
    filter prediction results with post processing filters.
    :param predicted: The prediction results

    """
    
    dc = Datacube(app='whatever')
 
    #grab predictions and proba for post process filtering
    predict=predicted.Predictions

    
    # mask out classification beyond AEZ boundary
    gdf = gpd.read_file('data/Western.geojson')
    with HiddenPrints():
        mask = xr_rasterize(gdf, predicted)
    predict = predict.where(mask,0)
    
    # mask with WDPA
    url_wdpa="s3://deafrica-input-datasets/protected_areas/WDPA_western.tif"
    wdpa=rio_slurp_xarray(url_wdpa, gbox=predicted.geobox)
    wdpa = wdpa.astype(bool)
    predict = predict.where(~wdpa, 0)
    
    #mask with WOFS
    wofs=dc.load(product='ga_ls8c_wofs_2_summary',like=predicted.geobox)
    wofs=wofs.frequency > 0.2 # threshold
    predict=predict.where(~wofs, 0)

    #mask steep slopes
    url_slope="https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope=rio_slurp_xarray(url_slope, gbox=predicted.geobox)
    slope=slope > 35
    predict=predict.where(~slope, 0)

    #mask where the elevation is above 3600m
    elevation=dc.load(product='dem_srtm', like=predicted.geobox)
    elevation=elevation.elevation > 3600 # threshold
    predict=predict.where(~elevation.squeeze(), 0)
    
    #set dtype
    predict=predict.astype(np.int8)

    return predict

Beispiel #20

0

Datei anzeigen

Datei: cop_dem_to_dc.py Projekt: opendatacube/odc-tools

def cli(limit, update_if_exists, bbox, product, add_product, workers):
    """
    Index the Copernicus DEM automatically.
    """
    if product not in PRODUCTS.keys():
        raise ValueError(
            f"Unknown product {product}, must be one of {' '.join(PRODUCTS.keys())}"
        )

    dc = Datacube()

    if add_product:
        add_cop_dem_product(dc, product)

    print(f"Indexing Copernicus DEM for {product} with bounding box of {bbox}")

    added, failed = cop_dem_to_dc(dc,
                                  product,
                                  bbox,
                                  limit,
                                  update_if_exists,
                                  n_workers=workers)

    print(f"Added {added} Datasets, failed {failed} Datasets")

    if failed > 0:
        sys.exit(failed)

Beispiel #21

0

Datei anzeigen

    def compute_confidence_filtered(self):
        """
        Return the wofs filtered summary band data that is 10% filtered by confidence band.
        """

        con_layer = self.compute_confidence()
        env = self.cfg.get_env_of_product('wofs_summary')

        with Datacube(app='wofs_summary', env=env) as dc:
            gwf = GridWorkflow(dc.index, self.grid_spec)
            indexed_tile = gwf.list_cells(self.tile_index,
                                          product='wofs_summary')
            # load the data of the tile
            dataset = gwf.load(tile=indexed_tile[self.tile_index],
                               measurements=['frequency'])
            data = dataset.data_vars['frequency'].data.ravel().reshape(
                self.grid_spec.tile_resolution)

        con_filtering = self.cfg.cfg.get('confidence_filtering')
        threshold = None
        if con_filtering:
            threshold = con_filtering.get('threshold')

        if threshold:
            data[con_layer <= threshold] = DEFAULT_FLOAT_NODATA
        else:
            data[con_layer <= 0.10] = DEFAULT_FLOAT_NODATA

        return data

Beispiel #22

0

Datei anzeigen

    def load_tile_data(self, factors):
        """
        Load and return factor data for confidence band prediction.
        :param factors: List of factor info as given by Config
        """

        model_data = []
        for fac in factors:
            factor = self.cfg.get_factor_info(fac)
            with Datacube(app='confidence_layer', env=factor['env']) as dc:
                gwf = GridWorkflow(dc.index, self.grid_spec)
                indexed_tiles = gwf.list_cells(self.tile_index,
                                               product=factor['product'])
                # load the data of the tile
                dataset = gwf.load(tile=indexed_tiles[self.tile_index],
                                   measurements=[factor['band']])
                data = dataset.data_vars[factor['band']].data

            # Rescale where needed: Keep an eye on this since this is to do with different scaling factors used during
            # training than what is on datacube
            if factor['name'].startswith('phat'): data = data * 100.0

            if factor['name'].startswith('phat'): data[data < 0.0] = 0.0
            if factor['name'].startswith('mrvbf'): data[data > 10] = 10
            if factor['name'].startswith('modis'): data[data > 100] = 100
            model_data.append(data.ravel())
            del data
        return np.column_stack(model_data)

Beispiel #23

0

Datei anzeigen

Datei: _extents.py Projekt: whatnick/datacube-explorer

def get_mapped_crses(*product_names: str, index: Index = None) -> Iterable[Dict]:
    with Datacube(index=index) as dc:
        index = dc.index
        for product_name in product_names:
            product = index.products.get_by_name(product_name)

            # SQLAlchemy queries require "column == None", not "column is None" due to operator overloading:
            # pylint: disable=singleton-comparison
            res = (
                alchemy_engine(index)
                .execute(
                    select(
                        [
                            literal(product.name).label("product"),
                            get_dataset_srid_alchemy_expression(
                                product.metadata_type
                            ).label("crs"),
                        ]
                    )
                    .where(DATASET.c.dataset_type_ref == product.id)
                    .where(DATASET.c.archived == None)
                    .limit(1)
                )
                .fetchone()
            )
            if res:
                yield dict(res)

Beispiel #24

0

Datei anzeigen

def run_one(config_file, input_dataset, environment=None):
    """
    Run with CONFIG_FILE on a single INPUT_DATASET

    INPUT_DATASET may be either a URL or a Dataset ID
    """
    alchemist = Alchemist(config_file=config_file, dc_env=environment)

    dc = Datacube(env=environment)
    try:
        ds = dc.index.datasets.get(input_dataset)
    except ValueError as e:
        _LOG.info("Couldn't find dataset with ID={} with exception {} trying by URL".format(
            input_dataset, e
        ))
        # Couldn't find a dataset by ID, try something
        if '://' in input_dataset:
            # Smells like a url
            input_url = input_dataset
        else:
            # Treat the input as a local file path
            input_url = Path(input_dataset).as_uri()

        ds = dc.index.datasets.get_datasets_for_location(input_url)

    # Currently this doesn't work by URL... TODO: fixme!
    task = alchemist.generate_task(ds)
    execute_task(task)

Beispiel #25

0

Datei anzeigen

    def group(self, datasets: VirtualDatasetBag, **group_settings: Dict[str, Any]) -> VirtualDatasetBox:
        geopolygon = datasets.geopolygon
        selected = list(datasets.bag)

        # geobox
        merged = merge_search_terms(self, group_settings)

        try:
            geobox = output_geobox(datasets=selected,
                                   grid_spec=datasets.product_definitions[self._product].grid_spec,
                                   geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS))
            load_natively = False

        except ValueError:
            # we are not calculating geoboxes here for the moment
            # since it may require filesystem access
            # in ODC 2.0 the dataset should know the information required
            geobox = None
            load_natively = True

        # group by time
        group_query = query_group_by(**select_keys(merged, self._GROUPING_KEYS))

        # information needed for Datacube.load_data
        return VirtualDatasetBox(Datacube.group_datasets(selected, group_query),
                                 geobox,
                                 load_natively,
                                 datasets.product_definitions,
                                 geopolygon=None if not load_natively else geopolygon)

Beispiel #26

0

Datei anzeigen

Datei: s3_to_dc.py Projekt: whatnick/datacube-index

def cli(skip_lineage, fail_on_missing_lineage, verify_lineage, uri, product):
    """ Iterate through files in an S3 bucket and add them to datacube"""

    # Get a generator from supplied S3 Uri for metadata definitions
    fetcher = S3Fetcher()

    # TODO: Share Fetcher
    s3_obj_stream = s3_find_glob(uri, False)

    # Extract URL's from output of iterator before passing to Fetcher
    s3_url_stream = (o.url for o in s3_obj_stream)

    # TODO: Capture S3 URL's in batches and perform bulk_location_has

    # Consume generator and fetch YAML's
    dc = Datacube()
    added, failed = dump_to_odc(
        fetcher(s3_url_stream),
        dc,
        product,
        skip_lineage=skip_lineage,
        fail_on_missing_lineage=fail_on_missing_lineage,
        verify_lineage=verify_lineage,
    )

    print(f"Added {added} Datasets, Failed {failed} Datasets")

Beispiel #27

0

Datei anzeigen

def test_grouping_datasets():
    def group_func(d):
        return d.time

    dimension = 'time'
    units = None
    datasets = [
        SimpleNamespace(time=datetime.datetime(2016, 1, 1),
                        value='foo',
                        id=UUID(int=10)),
        SimpleNamespace(time=datetime.datetime(2016, 2, 1),
                        value='bar',
                        id=UUID(int=1)),
        SimpleNamespace(time=datetime.datetime(2016, 1, 1),
                        value='flim',
                        id=UUID(int=9)),
    ]

    group_by = GroupBy(dimension, group_func, units, sort_key=group_func)
    grouped = Datacube.group_datasets(datasets, group_by)
    dss = grouped.isel(time=0).values[()]
    assert isinstance(dss, tuple)
    assert len(dss) == 2
    assert [ds.value for ds in dss] == ['flim', 'foo']

    dss = grouped.isel(time=1).values[()]
    assert isinstance(dss, tuple)
    assert len(dss) == 1
    assert [ds.value for ds in dss] == ['bar']

    assert str(grouped.time.dtype) == 'datetime64[ns]'
    assert grouped.loc['2016-01-01':'2016-01-15']

Beispiel #28

0

Datei anzeigen

Datei: test_confidence_layer.py Projekt: GeoscienceAustralia/wofs-confidence

def test_wofs_filtered():
    cfg = Config('../configs/template_client.yaml')
    grid_spec = GridSpec(crs=CRS('EPSG:3577'),
                         tile_size=(100000, 100000),
                         resolution=(-25, 25))
    cell_index = (17, -39)
    wf = WofsFiltered(cfg, grid_spec, cell_index)
    confidence = wf.compute_confidence(cell_index)
    filtered = wf.compute_confidence_filtered()

    # Display images: to be removed later
    with Datacube(app='wofs_summary', env='dev') as dc:
        gwf = GridWorkflow(dc.index, grid_spec)
        indexed_tile = gwf.list_cells(cell_index,
                                      product='wofs_statistical_summary')
        # load the data of the tile
        dataset = gwf.load(tile=indexed_tile[cell_index],
                           measurements=['frequency'])
        frequency = dataset.data_vars['frequency'].data.ravel().reshape(
            grid_spec.tile_resolution)

    # Check with previous run
    with rasterio.open('confidenceFilteredWOfS_17_-39_epsilon=10.tiff') as f:
        data = f.read(1)
    plt.subplot(221)
    plt.imshow(frequency)
    plt.subplot(222)
    plt.imshow(data)
    plt.subplot(223)
    plt.imshow(confidence)
    plt.subplot(224)
    plt.imshow(filtered)
    plt.show()
    wf.compute_and_write()

Beispiel #29

0

Datei anzeigen

Datei: __init__.py Projekt: ricardogsilva/datacube-core

def mk_sample_xr_dataset(crs="EPSG:3578",
                         shape=(33, 74),
                         resolution=None,
                         xy=(0, 0),
                         time='2020-02-13T11:12:13.1234567Z',
                         name='band',
                         dtype='int16',
                         nodata=-999,
                         units='1'):
    """ Note that resolution is in Y,X order to match that of GeoBox.

        shape (height, width)
        resolution (y: float, x: float) - in YX, to match GeoBox/shape notation

        xy (x: float, y: float) -- location of the top-left corner of the top-left pixel in CRS units
    """

    if isinstance(crs, str):
        crs = CRS(crs)

    if resolution is None:
        resolution = (-10, 10) if crs is None or crs.projected else (-0.01, 0.01)

    t_coords = {}
    if time is not None:
        t_coords['time'] = mk_time_coord([time])

    transform = Affine.translation(*xy)*Affine.scale(*resolution[::-1])
    h, w = shape
    geobox = GeoBox(w, h, transform, crs)

    return Datacube.create_storage(t_coords, geobox, [Measurement(name=name, dtype=dtype, nodata=nodata, units=units)])

Beispiel #30

0

Datei anzeigen

Datei: cog_conv_app.py Projekt: cuulee/COG-Conversion

def get_dataset_values(product_name, product_config, time_range=None):
    """
    Extract the file list corresponding to a product for the given year and month using datacube API.
    """
    try:
        query = {**dict(product=product_name), **time_range}
    except TypeError:
        # Time range is None
        query = {**dict(product=product_name)}

    dc = Datacube(app='cog-worklist query')

    field_names = get_field_names(product_config)

    LOG.info(
        f"Perform a datacube dataset search returning only the specified fields, {field_names}."
    )
    ds_records = dc.index.datasets.search_returning(
        field_names=tuple(field_names), **query)

    search_results = False
    for ds_rec in ds_records:
        search_results = True
        yield check_prefix_from_query_result(ds_rec, product_config)

    if not search_results:
        LOG.warning(
            f"Datacube product query is empty for {product_name} product with time-range, {time_range}"
        )

Beispiel #31

0

Datei anzeigen

def main():
    config_yaml = """
    sources:
      - product: ls8_nbar_albers
        measurements: [red, green, blue]
        group_by: solar_day

    date_ranges:
        start_date: 2014-06-01
        end_date: 2014-07-01

    storage:
        # this driver enables in-memory computation
        driver: xarray

        crs: EPSG:3577
        tile_size:
            x: 40000.0
            y: 40000.0
        resolution:
            x: 25
            y: -25
        chunking:
            x: 200
            y: 200
            time: 1
        dimension_order: [time, y, x]

    computation:
        chunking:
            x: 800
            y: 800

    input_region:
          tile: [15, -41]

    output_products:
        - name: nbar_mean
          statistic: simple
          statistic_args:
               reduction_function: mean
    """

    # or manually creating a config dictionary works too
    config = yaml.load(config_yaml)

    print(yaml.dump(config, indent=4))

    dc = Datacube()
    app = StatsApp(config, dc.index)

    print('generating tasks')
    tasks = app.generate_tasks()

    print('running tasks')
    for task in tasks:
        # this method is only available for the xarray output driver
        output = app.execute_task(task)
        print('result for {}'.format(task.tile_index))
        print(output.result['nbar_mean'])

Beispiel #32

0

Datei anzeigen

Datei: test.py Projekt: hofmannedv/sourceforge-stats

#print "number of projects: ", len(projectList)
#for sfproject in projectList:
	#print "--"
	#print "name:    ", sfproject.getProjectName()
	#print "id:      ", sfproject.getProjectId()
	#pdl = sfproject.getProjectDependencyList()
	#for listItem in pdl:
		#print "deps:    ", listItem.getTimePeriodStart(), "-", listItem.getTimePeriodEnd(), ": ", listItem.getDependencies()
		#print "url:     ", listItem.getUrl()
		#print "version: ", listItem.getVersion()

#print "duration %1.8f seconds" % (testEnde1 - testStart1)

testStart2 = time.clock()
# create datacube
dataCube = Datacube()
# start with an empty list of data levels
dataLevelList = []
for sfproject in projectList:
	pdl = sfproject.getProjectDependencyList()
	for listItem in pdl:
		timePeriodStart = listItem.getTimePeriodStart()
		timePeriodEnd = listItem.getTimePeriodEnd()
		
		# verify datalevel existance
		# if not available -- create according data level entry
		currentTime = timePeriodStart
		while currentTime <= timePeriodEnd:
			dataLevelList.append(currentTime)
			currentTime = currentTime + datetime.timedelta(days=1)