Ejemplo n.º 1
0
 def __init__(self, name: str = "ODCIndexer"):
     """ Sets up the indexer """
     self.dc: Datacube = Datacube(app=name)
     self.session: Session = Session(
         aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
         aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'],
         region_name='eu-central-1')
Ejemplo n.º 2
0
def cli(
    skip_lineage: bool,
    fail_on_missing_lineage: bool,
    verify_lineage: bool,
    uri: str,
    product: str,
):
    skips = [".*NBAR.*", ".*SUPPLEMENTARY.*", ".*NBART.*", ".*/QA/.*"]
    select = [".*ARD-METADATA.yaml"]
    candidate_products = product.split()
    print(f"Crawling {uri} on Thredds")
    print(f"Matching to {candidate_products}")
    yaml_urls = thredds_find_glob(uri, skips, select)
    print(f"Found {len(yaml_urls)} datasets")

    yaml_contents = download_yamls(yaml_urls)

    # Consume generator and fetch YAML's
    dc = Datacube()
    added, failed = dump_list_to_odc(
        yaml_contents,
        dc,
        candidate_products,
        skip_lineage=skip_lineage,
        fail_on_missing_lineage=fail_on_missing_lineage,
        verify_lineage=verify_lineage,
    )

    print(f"Added {added} Datasets, Failed {failed} Datasets")
Ejemplo n.º 3
0
def worker():
    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                        level=logging.INFO)

    host = os.getenv("REDIS_SERVICE_HOST", "redis-master")
    q = rediswq.RedisWQ(name="jobProduct", host=host)

    logging.info("Worker with sessionID %s.", q.sessionID())
    logging.info("Initial queue state empty=%s.", q.empty())

    host = os.getenv("DASK_SCHEDULER_HOST",
                     "dask-scheduler.dask.svc.cluster.local")
    dask_client = Client(f"{host}:8786")

    dc = Datacube()

    s3_client = S3Client()

    lease_secs = int(os.getenv("JOB_LEASE_PERIOD", "3600"))

    while not q.empty():
        item = q.lease(lease_secs=lease_secs, block=True, timeout=600)
        if item is not None:
            itemstr = item.decode("utf=8")
            logging.info("Working on %s.", itemstr)
            process_job(dc, dask_client, s3_client, itemstr, lease_secs)
            q.complete(item)
        else:
            logging.info("Waiting for work.")

    logging.info("Queue empty, exiting.")
Ejemplo n.º 4
0
    def __call__(self, index, product, time, group_by) -> Tile:
        # Do for a specific poly whose boundary is known
        output_crs = CRS(self.storage['crs'])
        filtered_items = [
            'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y'
        ]
        filtered_dict = {
            k: v
            for k, v in self.input_region.items() if k in filtered_items
        }
        if self.feature is not None:
            filtered_dict['geopolygon'] = self.feature.geopolygon
            geopoly = filtered_dict['geopolygon']
        else:
            geopoly = query_geopolygon(**self.input_region)

        dc = Datacube(index=index)
        datasets = dc.find_datasets(product=product,
                                    time=time,
                                    group_by=group_by,
                                    **filtered_dict)
        group_by = query_group_by(group_by=group_by)
        sources = dc.group_datasets(datasets, group_by)
        output_resolution = [
            self.storage['resolution'][dim] for dim in output_crs.dimensions
        ]
        geopoly = geopoly.to_crs(output_crs)
        geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution)

        return Tile(sources, geobox)
Ejemplo n.º 5
0
def cli(limit, update_if_exists, bbox, product, add_product, workers):
    """
    Index the Copernicus DEM automatically.
    """
    if product not in PRODUCTS.keys():
        raise ValueError(
            f"Unknown product {product}, must be one of {' '.join(PRODUCTS.keys())}"
        )

    dc = Datacube()

    if add_product:
        add_cop_dem_product(dc, product)

    print(f"Indexing Copernicus DEM for {product} with bounding box of {bbox}")

    added, failed = cop_dem_to_dc(dc,
                                  product,
                                  bbox,
                                  limit,
                                  update_if_exists,
                                  n_workers=workers)

    print(f"Added {added} Datasets, failed {failed} Datasets")

    if failed > 0:
        sys.exit(failed)
Ejemplo n.º 6
0
def expected_bands(product_name):
    dc = Datacube(app='cog-worklist query')
    prod = dc.index.products.get_by_name(product_name)
    available_measurements = set(prod.measurements.keys())
    # TODO: Implement black and white listing
    # Actually, maybe delete references to that since I don't believe it's used
    return available_measurements
Ejemplo n.º 7
0
def check_data_with_api(index, time_slices):
    """Chek retrieved data for specific values.

    We scale down by 100 and check for predefined values in the
    corners.
    """
    from datacube import Datacube
    dc = Datacube(index=index)

    # Make the retrieved data 100 less granular
    shape_x = int(GEOTIFF['shape']['x'] / 100.0)
    shape_y = int(GEOTIFF['shape']['y'] / 100.0)
    pixel_x = int(GEOTIFF['pixel_size']['x'] * 100)
    pixel_y = int(GEOTIFF['pixel_size']['y'] * 100)

    input_type_name = 'ls5_nbar_albers'
    input_type = dc.index.products.get_by_name(input_type_name)
    geobox = geometry.GeoBox(
        shape_x + 1, shape_y + 1,
        Affine(pixel_x, 0.0, GEOTIFF['ul']['x'], 0.0, pixel_y,
               GEOTIFF['ul']['y']), geometry.CRS(GEOTIFF['crs']))
    observations = dc.find_datasets(product='ls5_nbar_albers',
                                    geopolygon=geobox.extent)
    group_by = query_group_by('time')
    sources = dc.group_datasets(observations, group_by)
    data = dc.load_data(sources, geobox, input_type.measurements.values())
    assert hashlib.md5(
        data.green.data).hexdigest() == '7f5ace486e88d33edf3512e8de6b6996'
    assert hashlib.md5(
        data.blue.data).hexdigest() == 'b58204f1e10dd678b292df188c242c7e'
    for time_slice in range(time_slices):
        assert data.blue.values[time_slice][-1, -1] == -999
Ejemplo n.º 8
0
def cli(
    skip_lineage: bool,
    fail_on_missing_lineage: bool,
    verify_lineage: bool,
    account_url: str,
    container_name: str,
    credential: str,
    product_names: List[str],
    prefix: str,
    suffix: str,
):
    print(f"Opening AZ Container {container_name} on {account_url}")
    print(
        f"Searching on prefix '{prefix}' for files matching suffix '{suffix}'")
    yaml_urls = find_blobs(account_url, container_name, credential, prefix,
                           suffix)

    print(f"Found {len(yaml_urls)} datasets")
    yaml_contents = download_yamls(yaml_urls)

    print(f"Matching to {product_names} products")
    # Consume generator and fetch YAML's
    dc = Datacube()
    added, failed = dump_list_to_odc(
        account_url,
        container_name,
        yaml_contents,
        dc,
        product_names,
        skip_lineage=skip_lineage,
        fail_on_missing_lineage=fail_on_missing_lineage,
        verify_lineage=verify_lineage)

    print(f"Added {added} Datasets, Failed to add {failed} Datasets")
Ejemplo n.º 9
0
def run_one(config_file, input_dataset, environment=None):
    """
    Run with CONFIG_FILE on a single INPUT_DATASET

    INPUT_DATASET may be either a URL or a Dataset ID
    """
    alchemist = Alchemist(config_file=config_file, dc_env=environment)

    dc = Datacube(env=environment)
    try:
        ds = dc.index.datasets.get(input_dataset)
    except ValueError as e:
        _LOG.info("Couldn't find dataset with ID={} with exception {} trying by URL".format(
            input_dataset, e
        ))
        # Couldn't find a dataset by ID, try something
        if '://' in input_dataset:
            # Smells like a url
            input_url = input_dataset
        else:
            # Treat the input as a local file path
            input_url = Path(input_dataset).as_uri()

        ds = dc.index.datasets.get_datasets_for_location(input_url)

    # Currently this doesn't work by URL... TODO: fixme!
    task = alchemist.generate_task(ds)
    execute_task(task)
Ejemplo n.º 10
0
def get_mapped_crses(*product_names: str, index: Index = None) -> Iterable[Dict]:
    with Datacube(index=index) as dc:
        index = dc.index
        for product_name in product_names:
            product = index.products.get_by_name(product_name)

            # SQLAlchemy queries require "column == None", not "column is None" due to operator overloading:
            # pylint: disable=singleton-comparison
            res = (
                alchemy_engine(index)
                .execute(
                    select(
                        [
                            literal(product.name).label("product"),
                            get_dataset_srid_alchemy_expression(
                                product.metadata_type
                            ).label("crs"),
                        ]
                    )
                    .where(DATASET.c.dataset_type_ref == product.id)
                    .where(DATASET.c.archived == None)
                    .limit(1)
                )
                .fetchone()
            )
            if res:
                yield dict(res)
def test_wofs_filtered():
    cfg = Config('../configs/template_client.yaml')
    grid_spec = GridSpec(crs=CRS('EPSG:3577'),
                         tile_size=(100000, 100000),
                         resolution=(-25, 25))
    cell_index = (17, -39)
    wf = WofsFiltered(cfg, grid_spec, cell_index)
    confidence = wf.compute_confidence(cell_index)
    filtered = wf.compute_confidence_filtered()

    # Display images: to be removed later
    with Datacube(app='wofs_summary', env='dev') as dc:
        gwf = GridWorkflow(dc.index, grid_spec)
        indexed_tile = gwf.list_cells(cell_index,
                                      product='wofs_statistical_summary')
        # load the data of the tile
        dataset = gwf.load(tile=indexed_tile[cell_index],
                           measurements=['frequency'])
        frequency = dataset.data_vars['frequency'].data.ravel().reshape(
            grid_spec.tile_resolution)

    # Check with previous run
    with rasterio.open('confidenceFilteredWOfS_17_-39_epsilon=10.tiff') as f:
        data = f.read(1)
    plt.subplot(221)
    plt.imshow(frequency)
    plt.subplot(222)
    plt.imshow(data)
    plt.subplot(223)
    plt.imshow(confidence)
    plt.subplot(224)
    plt.imshow(filtered)
    plt.show()
    wf.compute_and_write()
Ejemplo n.º 12
0
def test_init_null(null_config):
    from datacube.drivers.indexes import index_cache
    idxs = index_cache()
    assert "default" in idxs._drivers
    assert "null" in idxs._drivers
    with Datacube(config=null_config, validate_connection=True) as dc:
        assert (dc.index.url) == "null"
Ejemplo n.º 13
0
    def load_tile_data(self, factors):
        """
        Load and return factor data for confidence band prediction.
        :param factors: List of factor info as given by Config
        """

        model_data = []
        for fac in factors:
            factor = self.cfg.get_factor_info(fac)
            with Datacube(app='confidence_layer', env=factor['env']) as dc:
                gwf = GridWorkflow(dc.index, self.grid_spec)
                indexed_tiles = gwf.list_cells(self.tile_index,
                                               product=factor['product'])
                # load the data of the tile
                dataset = gwf.load(tile=indexed_tiles[self.tile_index],
                                   measurements=[factor['band']])
                data = dataset.data_vars[factor['band']].data

            # Rescale where needed: Keep an eye on this since this is to do with different scaling factors used during
            # training than what is on datacube
            if factor['name'].startswith('phat'): data = data * 100.0

            if factor['name'].startswith('phat'): data[data < 0.0] = 0.0
            if factor['name'].startswith('mrvbf'): data[data > 10] = 10
            if factor['name'].startswith('modis'): data[data > 100] = 100
            model_data.append(data.ravel())
            del data
        return np.column_stack(model_data)
Ejemplo n.º 14
0
    def compute_confidence_filtered(self):
        """
        Return the wofs filtered summary band data that is 10% filtered by confidence band.
        """

        con_layer = self.compute_confidence()
        env = self.cfg.get_env_of_product('wofs_summary')

        with Datacube(app='wofs_summary', env=env) as dc:
            gwf = GridWorkflow(dc.index, self.grid_spec)
            indexed_tile = gwf.list_cells(self.tile_index,
                                          product='wofs_summary')
            # load the data of the tile
            dataset = gwf.load(tile=indexed_tile[self.tile_index],
                               measurements=['frequency'])
            data = dataset.data_vars['frequency'].data.ravel().reshape(
                self.grid_spec.tile_resolution)

        con_filtering = self.cfg.cfg.get('confidence_filtering')
        threshold = None
        if con_filtering:
            threshold = con_filtering.get('threshold')

        if threshold:
            data[con_layer <= threshold] = DEFAULT_FLOAT_NODATA
        else:
            data[con_layer <= 0.10] = DEFAULT_FLOAT_NODATA

        return data
Ejemplo n.º 15
0
def calculate_index_task(params):
    item = params.get('item')
    index = params.get('index', 'rgb')
    dc = Datacube(config="datacube.conf")
    product = "ls8_level1_usgs"
    x = (item["bbox"][0], item["bbox"][2])
    y = (item["bbox"][1], item["bbox"][3])
    time = item["properties"]["datetime"].split("T")[0]
    measurements = ["band_2", "band_3", "band_4"]

    query = {
        'x': x,
        'y': y,
        'time': time,
        'measurements': ['nbart_red', 'nbart_green', 'nbart_blue'],
        'output_crs': 'EPSG:4326',
        'resolution': (-0.001, 0.001),
    }

    ds = dc.load(product=product, **query)
    print(ds)
    rgb_da = ds.to_array()
    suffix = 'rgb'
    filename = f'{item["id"]}_{suffix}.tif'
    path = config.STATIC_DIR / filename
    write_cog(geo_im=rgb_da, fname='rgb.tif', overwrite=True)
    return {"success": True, "url": str(path)}
Ejemplo n.º 16
0
def xadataset_from_odcdataset(datasets: Union[List[ODCDataset],
                                              ODCDataset] = None,
                              ids: Union[List[UUID], UUID] = None,
                              measurements: List[str] = None) -> xa.Dataset:
    """ Loads a xaDataset from ODCDatasets or ODCDataset ids
     :param datasets: ODCDataset(s), optional
     :param ids: ODCDataset id(s), optional
     :param measurements: list of measurements/bands to load, optional
     :return: xa.Dataset containing given ODCDatasets or IDs """

    dc = Datacube(app="dataset_from_ODCDataset")

    if not datasets:
        if not isinstance(ids, list):
            ids = [ids]
        datasets = [dc.index.datasets.get(id_) for id_ in ids]

    if not isinstance(datasets, list):
        datasets = [datasets]

    product_name = datasets[0].metadata_doc["product"]["name"]
    crs = datasets[0].crs
    res = (10, -10)  # TODO: handle other resolutions

    ds = dc.load(product=product_name,
                 dask_chunks={},
                 measurements=measurements,
                 output_crs=str(crs),
                 resolution=res,
                 datasets=datasets)
    return ds
Ejemplo n.º 17
0
def main():
    config_yaml = """
    sources:
      - product: ls8_nbar_albers
        measurements: [red, green, blue]
        group_by: solar_day

    date_ranges:
        start_date: 2014-06-01
        end_date: 2014-07-01

    storage:
        # this driver enables in-memory computation
        driver: xarray

        crs: EPSG:3577
        tile_size:
            x: 40000.0
            y: 40000.0
        resolution:
            x: 25
            y: -25
        chunking:
            x: 200
            y: 200
            time: 1
        dimension_order: [time, y, x]

    computation:
        chunking:
            x: 800
            y: 800

    input_region:
          tile: [15, -41]

    output_products:
        - name: nbar_mean
          statistic: simple
          statistic_args:
               reduction_function: mean
    """

    # or manually creating a config dictionary works too
    config = yaml.load(config_yaml)

    print(yaml.dump(config, indent=4))

    dc = Datacube()
    app = StatsApp(config, dc.index)

    print('generating tasks')
    tasks = app.generate_tasks()

    print('running tasks')
    for task in tasks:
        # this method is only available for the xarray output driver
        output = app.execute_task(task)
        print('result for {}'.format(task.tile_index))
        print(output.result['nbar_mean'])
Ejemplo n.º 18
0
def parse_path(path, parse_only, folders, styles, input_file, output_file):
    try:
        raw_cfg = read_config(path)
        cfg = OWSConfig(refresh=True, cfg=raw_cfg)
        if not parse_only:
            with Datacube() as dc:
                cfg.make_ready(dc)
    except ConfigException as e:
        print("Config exception for path", str(e))
        return False
    print("Configuration parsed OK")
    if folders:
        print()
        print("Folder/Layer Hierarchy")
        print("======================")
        print_layers(cfg.layers, styles, depth=0)
        print()
    elif styles:
        print()
        print("Layers and Styles")
        print("=================")
        for lyr in cfg.product_index.values():
            print(lyr.name, f"[{','.join(lyr.product_names)}]")
            print_styles(lyr)
        print()
    if input_file or output_file:
        layers_report(cfg.product_index, input_file, output_file)
    return True
Ejemplo n.º 19
0
def get_dataset_values(product_name, product_config, time_range=None):
    """
    Extract the file list corresponding to a product for the given year and month using datacube API.
    """
    try:
        query = {**dict(product=product_name), **time_range}
    except TypeError:
        # Time range is None
        query = {**dict(product=product_name)}

    dc = Datacube(app='cog-worklist query')

    field_names = get_field_names(product_config)

    LOG.info(
        f"Perform a datacube dataset search returning only the specified fields, {field_names}."
    )
    ds_records = dc.index.datasets.search_returning(
        field_names=tuple(field_names), **query)

    search_results = False
    for ds_rec in ds_records:
        search_results = True
        yield check_prefix_from_query_result(ds_rec, product_config)

    if not search_results:
        LOG.warning(
            f"Datacube product query is empty for {product_name} product with time-range, {time_range}"
        )
Ejemplo n.º 20
0
def cli(skip_lineage, fail_on_missing_lineage, verify_lineage, uri, product):
    """ Iterate through files in an S3 bucket and add them to datacube"""

    # Get a generator from supplied S3 Uri for metadata definitions
    fetcher = S3Fetcher()

    # TODO: Share Fetcher
    s3_obj_stream = s3_find_glob(uri, False)

    # Extract URL's from output of iterator before passing to Fetcher
    s3_url_stream = (o.url for o in s3_obj_stream)

    # TODO: Capture S3 URL's in batches and perform bulk_location_has

    # Consume generator and fetch YAML's
    dc = Datacube()
    added, failed = dump_to_odc(
        fetcher(s3_url_stream),
        dc,
        product,
        skip_lineage=skip_lineage,
        fail_on_missing_lineage=fail_on_missing_lineage,
        verify_lineage=verify_lineage,
    )

    print(f"Added {added} Datasets, Failed {failed} Datasets")
Ejemplo n.º 21
0
def test_multiple_environment_config(tmpdir):
    config_path = tmpdir.join('second.conf')

    config_path.write("""
[DEFAULT]
db_username: test_user
index_driver: default

[default]
db_hostname: db.opendatacube.test

[test_alt]
db_hostname: alt-db.opendatacube.test
    """)

    config_path = str(config_path)

    config = LocalConfig.find([config_path])
    assert config['db_hostname'] == 'db.opendatacube.test'
    alt_config = LocalConfig.find([config_path], env='test_alt')
    assert alt_config['db_hostname'] == 'alt-db.opendatacube.test'

    # Make sure the correct config is passed through the API
    # Parsed config:
    db_url = 'postgresql://{user}@db.opendatacube.test:5432/datacube'.format(
        user=config['db_username'])
    alt_db_url = 'postgresql://{user}@alt-db.opendatacube.test:5432/datacube'.format(
        user=config['db_username'])

    with Datacube(config=config, validate_connection=False) as dc:
        assert str(dc.index.url) == db_url

    # When none specified, default environment is loaded
    with Datacube(config=str(config_path), validate_connection=False) as dc:
        assert str(dc.index.url) == db_url
    # When specific environment is loaded
    with Datacube(config=config_path,
                  env='test_alt',
                  validate_connection=False) as dc:
        assert str(dc.index.url) == alt_db_url

    # An environment that isn't in any config files
    with pytest.raises(ValueError):
        with Datacube(config=config_path,
                      env='undefined-env',
                      validate_connection=False) as dc:
            pass
Ejemplo n.º 22
0
def main(products, output_file, start_date, end_date, time_divs):
    """ Entry point. """
    datacube = Datacube(app='find-those-gaps')

    summary = find_gaps(datacube, products, time_query(start_date, end_date),
                        time_divs)

    yaml.dump(summary, output_file, default_flow_style=False)
Ejemplo n.º 23
0
def test_null_dataset_resource(null_config):
    with Datacube(config=null_config, validate_connection=True) as dc:
        assert dc.index.datasets.get(test_uuid) is None
        assert dc.index.datasets.bulk_get([test_uuid, "foo"]) == []
        assert dc.index.datasets.get_derived(test_uuid) == []
        assert not dc.index.datasets.has(test_uuid)
        assert dc.index.datasets.bulk_has([test_uuid, "foo"]) == [False, False]
        with pytest.raises(NotImplementedError) as e:
            dc.index.datasets.add(MagicMock())
        with pytest.raises(NotImplementedError) as e:
            dc.index.datasets.can_update(MagicMock())
        with pytest.raises(NotImplementedError) as e:
            dc.index.datasets.update(MagicMock())
        with pytest.raises(NotImplementedError) as e:
            dc.index.datasets.archive([test_uuid, "foo"])
        with pytest.raises(NotImplementedError) as e:
            dc.index.datasets.restore([test_uuid, "foo"])
        with pytest.raises(NotImplementedError) as e:
            dc.index.datasets.purge([test_uuid, "foo"])

        assert dc.index.datasets.get_all_dataset_ids(True) == []
        assert dc.index.datasets.get_field_names() == []
        assert dc.index.datasets.get_locations(test_uuid) == []
        assert dc.index.datasets.get_archived_locations(test_uuid) == []
        assert dc.index.datasets.get_archived_location_times(test_uuid) == []
        assert dc.index.datasets.get_datasets_for_location(
            "http://a.uri/test") == []

        with pytest.raises(NotImplementedError) as e:
            dc.index.datasets.add_location(test_uuid, "http://a.uri/test")
        with pytest.raises(NotImplementedError) as e:
            dc.index.datasets.remove_location(test_uuid, "http://a.uri/test")
        with pytest.raises(NotImplementedError) as e:
            dc.index.datasets.archive_location(test_uuid, "http://a.uri/test")
        with pytest.raises(NotImplementedError) as e:
            dc.index.datasets.restore_location(test_uuid, "http://a.uri/test")
        with pytest.raises(NotImplementedError) as e:
            dc.index.datasets.get_product_time_bounds("product1")

        assert dc.index.datasets.search_product_duplicates(MagicMock()) == []
        assert dc.index.datasets.search_by_metadata({}) == []
        assert dc.index.datasets.search(foo="bar", baz=12) == []
        assert dc.index.datasets.search_by_product(foo="bar", baz=12) == []
        assert dc.index.datasets.search_returning(["foo", "bar"],
                                                  foo="bar",
                                                  baz=12) == []
        assert dc.index.datasets.count(foo="bar", baz=12) == 0
        assert dc.index.datasets.count_by_product(foo="bar", baz=12) == []
        assert dc.index.datasets.count_by_product_through_time("1 month",
                                                               foo="bar",
                                                               baz=12) == []
        assert dc.index.datasets.count_product_through_time("1 month",
                                                            foo="bar",
                                                            baz=12) == []
        assert dc.index.datasets.search_summaries(foo="bar", baz=12) == []
        assert dc.index.datasets.search_eager(foo="bar", baz=12) == []
        assert dc.index.datasets.search_returning_datasets_light(
            ("foo", "baz"), foo="bar", baz=12) == []
Ejemplo n.º 24
0
def test_null_user_resource(null_config):
    with Datacube(config=null_config, validate_connection=True) as dc:
        assert dc.index.users.list_users() == []
        with pytest.raises(NotImplementedError) as e:
            dc.index.users.create_user("user1", "password2", "role1")
        with pytest.raises(NotImplementedError) as e:
            dc.index.users.delete_user("user1", "user2")
        with pytest.raises(NotImplementedError) as e:
            dc.index.users.grant_role("role1", "user1", "user2")
Ejemplo n.º 25
0
def collect_uris(prod_index, products, expressions):
    """
    Collect all URIs of datasets from products
    matching search expressions.
    """
    dc = Datacube(index=prod_index)
    for prod in products:
        for dataset in dc.find_datasets_lazy(product=prod, **expressions):
            yield normalize_uri(dataset.local_uri)
Ejemplo n.º 26
0
def test_multiple_environment_config(tmpdir):
    config_path = tmpdir.join('second.conf')

    config_path.write("""
[user]
default_environment: test_default

[test_default]
db_hostname: db.opendatacube.test

[test_alt]
db_hostname: alt-db.opendatacube.test
    """)

    config_path = str(config_path)

    config = LocalConfig.find([config_path])
    assert config.db_hostname == 'db.opendatacube.test'
    alt_config = LocalConfig.find([config_path], env='test_alt')
    assert alt_config.db_hostname == 'alt-db.opendatacube.test'

    # Lazily connect: they shouldn't try to connect during this test as we're not using the API
    args = dict(validate_connection=False)

    # Make sure the correct config is passed through the API
    # Parsed config:
    db_url = 'postgresql://{user}@db.opendatacube.test:5432/datacube'.format(user=config.db_username)
    alt_db_url = 'postgresql://{user}@alt-db.opendatacube.test:5432/datacube'.format(user=config.db_username)

    with Datacube(config=config, **args) as dc:
        assert str(dc.index.url) == db_url

    # When none specified, default environment is loaded
    with Datacube(config=str(config_path), **args) as dc:
        assert str(dc.index.url) == db_url
    # When specific environment is loaded
    with Datacube(config=config_path, env='test_alt', **args) as dc:
        assert str(dc.index.url) == alt_db_url

    # An environment that isn't in any config files
    with pytest.raises(ValueError):
        with Datacube(config=config_path, env='undefined-env', **args) as dc:
            pass
Ejemplo n.º 27
0
 def __get_mask_datasets(self) -> List[ODCDataset]:
     """ Finds mask datasets based on config """
     dc = Datacube(app="mosaic_creator")
     time_range = (str(self.__start_date), str(self.__end_date))
     datasets = dc.find_datasets(product=self.__product_name, time=time_range)
     if not datasets:
         LOGGER.warning("No mask datasets found for"
                        f"product={self.__product_name}, time={time_range}")
         raise ValueError("No datasets found")  # TODO: custom exception
     return datasets
Ejemplo n.º 28
0
def get_sample_dataset(*product_names: str,
                       index: Index = None) -> Iterable[Dict]:
    with Datacube(index=index) as dc:
        index = dc.index
        for product_name in product_names:
            product = index.products.get_by_name(product_name)
            res = (alchemy_engine(index).execute(
                _select_dataset_extent_query(product).limit(1)).fetchone())
            if res:
                yield dict(res)
Ejemplo n.º 29
0
    def setUp(self):
        self.datacube = Datacube(config=DATACUBE_CONFIG)

        IndexerTestCase().test_product_generation()
        product = self.datacube.index.products.get_by_name('ls8_test')
        if product is None:
            self.skipTest('No product available to index')
        datasets = self.datacube.find_datasets(product='ls8_test')
        if datasets:
            self.skipTest('Indexed datasets already exist in database')
Ejemplo n.º 30
0
 def _get_factor_datasets(self):
     dts = []
     for fac in self.confidence_model.factors:
         factor = self.cfg.get_factor_info(fac)
         with Datacube(app='confidence_layer', env=factor['env']) as dc:
             gwf = GridWorkflow(dc.index, self.grid_spec)
             obs = gwf.cell_observations(cell_index=self.tile_index,
                                         product=factor['product'])
             for ds in obs[self.tile_index]['datasets']:
                 dts.append(ds)
     return dts