コード例 #1
0
ファイル: _datasets.py プロジェクト: ceos-seo/Data_Cube_v2
    def add(self, type_):
        """
        Add a Product

        :param datacube.model.DatasetType type_: Product to add
        :rtype: datacube.model.DatasetType
        """
        DatasetType.validate(type_.definition)

        existing = self._db.get_dataset_type_by_name(type_.name)
        if existing:
            # TODO: Support for adding/updating match rules?
            # They've passed us the same collection again. Make sure it matches what is stored.
            check_doc_unchanged(
                existing.definition,
                jsonify_document(type_.definition),
                'Dataset type {}'.format(type_.name)
            )
        else:
            self._db.add_dataset_type(
                name=type_.name,
                metadata=type_.metadata_doc,
                metadata_type_id=type_.metadata_type.id,
                definition=type_.definition
            )
        return self.get_by_name(type_.name)
コード例 #2
0
ファイル: _dscache.py プロジェクト: danlipsa/odc-tools
    def mk_product(doc, name):
        mt = doc.get("metadata_type")
        if mt is None:
            raise ValueError("Missing metadata_type key in product definition")
        metadata = mm.get(mt)

        if metadata is None:
            raise ValueError("No such metadata %s for product %s" % (mt, name))

        return DatasetType(metadata, doc)
コード例 #3
0
 def _make(self, query_row):
     """
     :rtype datacube.model.DatasetType
     """
     return DatasetType(
         definition=query_row['definition'],
         metadata_type=self.metadata_type_resource.get(
             query_row['metadata_type_ref']),
         id_=query_row['id'],
     )
コード例 #4
0
def mk_sample_product(name,
                      description='Sample',
                      measurements=('red', 'green', 'blue'),
                      with_grid_spec=False,
                      metadata_type=None,
                      storage=None,
                      load=None):

    if storage is None and with_grid_spec is True:
        storage = {'crs': 'EPSG:3577',
                   'resolution': {'x': 25, 'y': -25},
                   'tile_size': {'x': 100000.0, 'y': 100000.0}}

    common = dict(dtype='int16',
                  nodata=-999,
                  units='1',
                  aliases=[])

    if metadata_type is None:
        metadata_type = mk_sample_eo('eo')

    def mk_measurement(m):
        if isinstance(m, str):
            return dict(name=m, **common)
        elif isinstance(m, tuple):
            name, dtype, nodata = m
            m = common.copy()
            m.update(name=name, dtype=dtype, nodata=nodata)
            return m
        elif isinstance(m, dict):
            m_merged = common.copy()
            m_merged.update(m)
            return m_merged
        else:
            raise ValueError('Only support str|dict|(name, dtype, nodata)')

    measurements = [mk_measurement(m) for m in measurements]

    definition = dict(
        name=name,
        description=description,
        metadata_type=metadata_type.name,
        metadata={},
        measurements=measurements
    )

    if storage is not None:
        definition['storage'] = storage

    if load is not None:
        definition['load'] = load

    return DatasetType(metadata_type, definition)
コード例 #5
0
    def add(self, type_):
        """
        Add a Product

        :param datacube.model.DatasetType type_: Product to add
        :rtype: datacube.model.DatasetType
        """
        DatasetType.validate(type_.definition)

        existing = self._db.get_dataset_type_by_name(type_.name)
        if existing:
            # TODO: Support for adding/updating match rules?
            # They've passed us the same collection again. Make sure it matches what is stored.
            check_doc_unchanged(existing.definition,
                                jsonify_document(type_.definition),
                                'Dataset type {}'.format(type_.name))
        else:
            self._db.add_dataset_type(name=type_.name,
                                      metadata=type_.metadata_doc,
                                      metadata_type_id=type_.metadata_type.id,
                                      definition=type_.definition)
        return self.get_by_name(type_.name)
コード例 #6
0
def _get_product(index, definition, dry_run):
    """
    Get the database record corresponding to the given product definition
    """
    metadata_type = index.metadata_types.get_by_name(
        definition['metadata_type'])
    prototype = DatasetType(metadata_type, definition)

    if not dry_run:
        _LOG.info('Add product definition to the database.')
        prototype = index.products.add(prototype)  # idempotent operation

    return prototype
コード例 #7
0
def test_fuse_products(wo_definition, fc_definition):
    standard_metadata_types = {
        d["name"]: metadata_from_doc(d)
        for d in default_metadata_type_docs()
    }
    eo3 = standard_metadata_types["eo3"]

    wo_product = DatasetType(eo3, wo_definition)
    fc_product = DatasetType(eo3, fc_definition)
    fused_product = fuse_products(wo_product, fc_product)

    bad_definition = deepcopy(wo_definition)
    bad_definition["metadata"]["properties"]["odc:file_format"] = "bad"
    bad_product = DatasetType(eo3, bad_definition)
    with pytest.raises(AssertionError):
        fuse_products(bad_product, fc_product)

    bad_definition = deepcopy(wo_definition)
    bad_definition["measurements"].append(fc_definition["measurements"][1])
    bad_product = DatasetType(eo3, bad_definition)
    with pytest.raises(AssertionError):
        fuse_products(bad_product, fc_product)
コード例 #8
0
ファイル: _datasets.py プロジェクト: terranis/agdc-v2
    def can_update(self, product, allow_unsafe_updates=False):
        """
        Check if product can be updated. Return bool,safe_changes,unsafe_changes

        (An unsafe change is anything that may potentially make the product
        incompatible with existing datasets of that type)

        :param datacube.model.DatasetType product: Product to update
        :param bool allow_unsafe_updates: Allow unsafe changes. Use with caution.
        :rtype: bool,list[change],list[change]
        """
        DatasetType.validate(product.definition)

        existing = self.get_by_name(product.name)
        if not existing:
            raise ValueError(
                'Unknown product %s, cannot update – did you intend to add it?'
                % product.name)

        updates_allowed = {
            ('description', ):
            changes.allow_any,
            ('metadata_type', ):
            changes.allow_any,

            # You can safely make the match rules looser but not tighter.
            # Tightening them could exclude datasets already matched to the product.
            # (which would make search results wrong)
            (
                'metadata', ):
            changes.allow_truncation
        }

        doc_changes = get_doc_changes(existing.definition,
                                      jsonify_document(product.definition))
        good_changes, bad_changes = changes.classify_changes(
            doc_changes, updates_allowed)

        return allow_unsafe_updates or not bad_changes, good_changes, bad_changes
コード例 #9
0
ファイル: ingest.py プロジェクト: terranis/agdc-v2
def morph_dataset_type(source_type, config):
    output_type = DatasetType(source_type.metadata_type, deepcopy(source_type.definition))
    output_type.definition['name'] = config['output_type']
    output_type.definition['managed'] = True
    output_type.definition['description'] = config['description']
    output_type.definition['storage'] = config['storage']
    output_type.metadata_doc['format'] = {'name': 'NetCDF'}

    def merge_measurement(measurement, spec):
        measurement.update({k: spec.get(k, measurement[k]) for k in ('name', 'nodata', 'dtype')})
        return measurement

    output_type.definition['measurements'] = [merge_measurement(output_type.measurements[spec['src_varname']], spec)
                                              for spec in config['measurements']]
    return output_type
コード例 #10
0
ファイル: ingest.py プロジェクト: Max-AR/datacube-core
def morph_dataset_type(source_type, config, index, storage_format):
    output_metadata_type = source_type.metadata_type
    if 'metadata_type' in config:
        output_metadata_type = index.metadata_types.get_by_name(
            config['metadata_type'])

    output_type = DatasetType(output_metadata_type,
                              deepcopy(source_type.definition))
    output_type.definition['name'] = config['output_type']
    output_type.definition['managed'] = True
    output_type.definition['description'] = config['description']
    output_type.definition['storage'] = {
        k: v
        for (k, v) in config['storage'].items()
        if k in ('crs', 'tile_size', 'resolution', 'origin')
    }

    output_type.metadata_doc['format'] = {'name': storage_format}

    if 'metadata_type' in config:
        output_type.definition['metadata_type'] = config['metadata_type']

    def morph_measurement(src_measurements, spec):
        src_varname = spec.get('src_varname', spec.get('name', None))
        assert src_varname is not None

        measurement = src_measurements.get(src_varname, None)
        if measurement is None:
            raise ValueError(
                "No such variable in the source product: {}".format(
                    src_varname))

        measurement.update({
            k: spec.get(k, measurement[k])
            for k in ('name', 'nodata', 'dtype')
        })
        return Measurement(**measurement)

    output_type.definition['measurements'] = [
        morph_measurement(output_type.measurements, spec)
        for spec in config['measurements']
    ]
    return output_type
コード例 #11
0
def _ensure_products(app_config: dict, index: Index, dry_run=False) -> Tuple[DatasetType, DatasetType]:
    source_product_name = app_config['source_product']
    source_product = index.products.get_by_name(source_product_name)
    if not source_product:
        raise ValueError(f"Source Product {source_product_name} does not exist")

    output_product = DatasetType(
        source_product.metadata_type,
        _create_output_definition(app_config, source_product)
    )
    if not dry_run:
        _LOG.info('Built product %s. Adding to index.', output_product.name)
        output_product = index.products.add(output_product)
    return source_product, output_product
コード例 #12
0
ファイル: _datasets.py プロジェクト: terranis/agdc-v2
    def add(self, type_, allow_table_lock=False):
        """
        Add a Product.

        :param allow_table_lock:
            Allow an exclusive lock to be taken on the table while creating the indexes.
            This will halt other user's requests until completed.

            If false, creation will be slightly slower and cannot be done in a transaction.
        :param datacube.model.DatasetType type_: Product to add
        :rtype: datacube.model.DatasetType
        """
        DatasetType.validate(type_.definition)

        existing = self.get_by_name(type_.name)
        if existing:
            check_doc_unchanged(existing.definition,
                                jsonify_document(type_.definition),
                                'Metadata Type {}'.format(type_.name))
        else:
            metadata_type = self.metadata_type_resource.get_by_name(
                type_.metadata_type.name)
            if metadata_type is None:
                _LOG.warning('Adding metadata_type "%s" as it doesn\'t exist.',
                             type_.metadata_type.name)
                metadata_type = self.metadata_type_resource.add(
                    type_.metadata_type, allow_table_lock=allow_table_lock)
            with self._db.connect() as connection:
                connection.add_dataset_type(
                    name=type_.name,
                    metadata=type_.metadata_doc,
                    metadata_type_id=metadata_type.id,
                    search_fields=metadata_type.dataset_fields,
                    definition=type_.definition,
                    concurrently=not allow_table_lock,
                )
        return self.get_by_name(type_.name)
コード例 #13
0
ファイル: _datasets.py プロジェクト: ceos-seo/Data_Cube_v2
    def from_doc(self, definition):
        """
        Create a Product from its definitions

        :param dict definition: product definition document
        :rtype: datacube.model.DatasetType
        """
        # This column duplication is getting out of hand:
        DatasetType.validate(definition)

        metadata_type = definition['metadata_type']

        # They either specified the name of a metadata type, or specified a metadata type.
        # Is it a name?
        if isinstance(metadata_type, compat.string_types):
            metadata_type = self.metadata_type_resource.get_by_name(metadata_type)
        else:
            # Otherwise they embedded a document, add it if needed:
            metadata_type = self.metadata_type_resource.add(metadata_type, allow_table_lock=False)

        if not metadata_type:
            raise InvalidDocException('Unknown metadata type: %r' % definition['metadata_type'])

        return DatasetType(metadata_type, definition)
コード例 #14
0
ファイル: ingest.py プロジェクト: loicdtx/datacube-core
def morph_dataset_type(source_type, config, driver_manager):
    output_metadata_type = source_type.metadata_type
    if 'metadata_type' in config:
        output_metadata_type = driver_manager.index.metadata_types.get_by_name(config['metadata_type'])

    output_type = DatasetType(output_metadata_type, deepcopy(source_type.definition))
    output_type.definition['name'] = config['output_type']
    output_type.definition['managed'] = True
    output_type.definition['description'] = config['description']
    output_type.definition['storage'] = config['storage']
    output_type.definition['storage'] = {k: v for (k, v) in config['storage'].items()
                                         if k in ('crs', 'driver', 'tile_size', 'resolution', 'origin')}
    output_type.metadata_doc['format'] = {'name': driver_manager.driver.format}

    if 'metadata_type' in config:
        output_type.definition['metadata_type'] = config['metadata_type']

    def merge_measurement(measurement, spec):
        measurement.update({k: spec.get(k, measurement[k]) for k in ('name', 'nodata', 'dtype')})
        return measurement

    output_type.definition['measurements'] = [merge_measurement(output_type.measurements[spec['src_varname']], spec)
                                              for spec in config['measurements']]
    return output_type
コード例 #15
0
def mk_sample_product(name,
                      description='Sample',
                      measurements=['red', 'green', 'blue']):

    eo_type = MetadataType(
        {
            'name':
            'eo',
            'description':
            'Sample',
            'dataset':
            dict(
                id=['id'],
                label=['ga_label'],
                creation_time=['creation_dt'],
                measurements=['image', 'bands'],
                sources=['lineage', 'source_datasets'],
                format=['format', 'name'],
            )
        },
        dataset_search_fields={})

    common = dict(dtype='int16', nodata=-999, units='1', aliases=[])

    def mk_measurement(m):
        if isinstance(m, str):
            return dict(name=m, **common)
        if isinstance(m, tuple):
            name, dtype, nodata = m
            m = common.copy()
            m.update(name=name, dtype=dtype, nodata=nodata)
            return m
        if isinstance(m, dict):
            m_merged = common.copy()
            m_merged.update(m)
            return m_merged

        assert False and 'Only support str|dict|(name, dtype, nodata)'

    measurements = [mk_measurement(m) for m in measurements]

    return DatasetType(
        eo_type,
        dict(name=name,
             description=description,
             metadata_type='eo',
             metadata={},
             measurements=measurements))
コード例 #16
0
ファイル: ingest.py プロジェクト: prasunkgupta/datacube-iirs
def morph_dataset_type(source_type, config):
    output_type = DatasetType(source_type.metadata_type, deepcopy(source_type.definition))
    output_type.definition['name'] = config['output_type']
    output_type.definition['managed'] = True
    output_type.definition['description'] = config['description']
    output_type.definition['storage'] = config['storage']
    output_type.metadata['format'] = {'name': 'NetCDF'}

    def merge_measurement(measurement, spec):
        measurement.update({k: spec.get(k, measurement[k]) for k in ('name', 'nodata', 'dtype')})
        return measurement

    output_type.definition['measurements'] = [merge_measurement(output_type.measurements[spec['src_varname']], spec)
                                              for spec in config['measurements']]
    return output_type
コード例 #17
0
def _ensure_products(app_config: dict, index: Index, dry_run: bool,
                     input_source) -> DatasetType:
    source_product_name = input_source
    source_product = index.products.get_by_name(source_product_name)
    if not source_product:
        raise ValueError(
            f"Source product {source_product_name} does not exist")

    output_product = DatasetType(
        source_product.metadata_type,
        _create_output_definition(app_config, source_product))

    if not dry_run:
        _LOG.info('Add the output product definition for %s in the database.',
                  output_product.name)
        output_product = index.products.add(output_product)

    return output_product
コード例 #18
0
ファイル: utils.py プロジェクト: RichardScottOZ/odc-tools
def fuse_products(type_1: DatasetType, type_2: DatasetType) -> DatasetType:
    """
    Fuses two products. This function requires access to a Datacube to access the metadata type.
    
    Fusing two products requires that:
      - both metadata types are eo3
      - there are no conflicting band names
      - the file formats are identical
    """

    def_1, def_2 = type_1.definition, type_2.definition
    fused_def = dict()

    assert def_1["metadata_type"] == def_2["metadata_type"]
    assert def_1["metadata_type"] == "eo3"

    measurements_1 = set(m["name"] for m in def_1['measurements'])
    measurements_2 = set(m["name"] for m in def_2['measurements'])
    assert len(measurements_1.intersection(measurements_2)) == 0

    file_format = def_1["metadata"]["properties"]["odc:file_format"]
    assert file_format == def_2["metadata"]["properties"]["odc:file_format"]

    name = f"fused__{def_1['name']}__{def_2['name']}"

    fused_def["name"] = name
    fused_def["metadata"] = {
        "product": {
            "name": name
        },
        "properties": {
            "odc:file_format": file_format
        }
    }
    fused_def[
        "description"] = f"Fused products: {def_1['description']}, {def_2['description']}"
    fused_def["measurements"] = def_1["measurements"] + def_2["measurements"]
    fused_def["metadata_type"] = def_1["metadata_type"]

    return DatasetType(type_1.metadata_type, fused_def)
コード例 #19
0
            measurements=['image', 'bands'],
            sources=['lineage', 'source_datasets'],
            format=['format', 'name'],
        )
    },
    dataset_search_fields={})

_EXAMPLE_DATASET_TYPE = DatasetType(
    _EXAMPLE_METADATA_TYPE, {
        'name':
        'ls5_nbar_scene',
        'description':
        "Landsat 5 NBAR 25 metre",
        'metadata_type':
        'eo',
        'metadata': {},
        'measurements': [{
            'aliases': ['band_2', '2'],
            'dtype': 'int16',
            'name': 'green',
            'nodata': -999,
            'units': '1'
        }],
    })


def test_multiband_support_in_datasetsource(example_gdal_path):
    defn = {
        "id": '12345678123456781234567812345678',
        "format": {
            "name": "GeoTiff"
コード例 #20
0
def test_fuse_dss(wo_definition, fc_definition):

    standard_metadata_types = {
        d["name"]: metadata_from_doc(d)
        for d in default_metadata_type_docs()
    }
    eo3 = standard_metadata_types["eo3"]

    wo_product = DatasetType(eo3, wo_definition)
    fc_product = DatasetType(eo3, fc_definition)
    fused_product = fuse_products(wo_product, fc_product)

    wo_metadata = {
        'id': 'e9fb6737-b93d-5cd9-bfe6-7e634abc9905',
        'crs': 'epsg:32655',
        'grids': {
            'default': {
                'shape': [7211, 8311],
                'transform':
                [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0]
            }
        },
        'label': 'ga_ls_wo_3_091086_2020-04-04_final',
        '$schema': 'https://schemas.opendatacube.org/dataset',
        'lineage': {
            'source_datasets': {}
        },
        'product': {
            'name': 'ga_ls_wo_3'
        },
        'properties': {
            'title':
            'ga_ls_wo_3_091086_2020-04-04_final',
            'eo:gsd':
            30.0,
            'created':
            '2021-03-09T23:22:42.130266Z',
            'datetime':
            '2020-04-04T23:33:10.644420Z',
            'proj:epsg':
            32655,
            'proj:shape': [7211, 8311],
            'eo:platform':
            'landsat-7',
            'odc:product':
            'ga_ls_wo_3',
            'odc:producer':
            'ga.gov.au',
            'eo:instrument':
            'ETM',
            'eo:cloud_cover':
            44.870310145260326,
            'eo:sun_azimuth':
            49.20198554,
            'proj:transform':
            [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0],
            'landsat:wrs_row':
            86,
            'odc:file_format':
            'GeoTIFF',
            'odc:region_code':
            '091086',
            'dtr:end_datetime':
            '2020-04-04T23:33:24.461679Z',
            'eo:sun_elevation':
            32.7056476,
            'landsat:wrs_path':
            91,
            'dtr:start_datetime':
            '2020-04-04T23:32:56.662365Z',
            'odc:product_family':
            'wo',
            'odc:dataset_version':
            '1.6.0',
            'dea:dataset_maturity':
            'final',
            'odc:collection_number':
            3,
            'odc:naming_conventions':
            'dea_c3',
            'odc:processing_datetime':
            '2020-04-04T23:33:10.644420Z',
            'landsat:landsat_scene_id':
            'LE70910862020095ASA00',
            'landsat:collection_number':
            1,
            'landsat:landsat_product_id':
            'LE07_L1TP_091086_20200404_20200501_01_T1',
            'landsat:collection_category':
            'T1'
        },
        'measurements': {
            'water': {
                'path': 'ga_ls_wo_3_091086_2020-04-04_final_water.tif'
            }
        }
    }

    fc_metadata = {
        'id': '41980746-4f17-5e0c-86a0-92cca8d3c99d',
        'crs': 'epsg:32655',
        'grids': {
            'default': {
                'shape': [7211, 8311],
                'transform':
                [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0]
            }
        },
        'label': 'ga_ls_fc_3_091086_2020-04-04_final',
        '$schema': 'https://schemas.opendatacube.org/dataset',
        'product': {
            'name': 'ga_ls_fc_3'
        },
        'properties': {
            'title':
            'ga_ls_fc_3_091086_2020-04-04_final',
            'eo:gsd':
            30.0,
            'created':
            '2021-03-10T04:14:49.645196Z',
            'datetime':
            '2020-04-04T23:33:10.644420Z',
            'proj:epsg':
            32655,
            'proj:shape': [7211, 8311],
            'eo:platform':
            'landsat-7',
            'odc:product':
            'ga_ls_fc_3',
            'odc:producer':
            'ga.gov.au',
            'eo:instrument':
            'ETM',
            'eo:cloud_cover':
            44.870310145260326,
            'eo:sun_azimuth':
            49.20198554,
            'proj:transform':
            [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0],
            'landsat:wrs_row':
            86,
            'odc:file_format':
            'GeoTIFF',
            'odc:region_code':
            '091086',
            'dtr:end_datetime':
            '2020-04-04T23:33:24.461679Z',
            'eo:sun_elevation':
            32.7056476,
            'landsat:wrs_path':
            91,
            'dtr:start_datetime':
            '2020-04-04T23:32:56.662365Z',
            'odc:product_family':
            'fc',
            'odc:dataset_version':
            '2.5.0',
            'dea:dataset_maturity':
            'final',
            'odc:collection_number':
            3,
            'odc:naming_conventions':
            'dea_c3',
            'odc:processing_datetime':
            '2020-04-04T23:33:10.644420Z',
            'landsat:landsat_scene_id':
            'LE70910862020095ASA00',
            'landsat:collection_number':
            1,
            'landsat:landsat_product_id':
            'LE07_L1TP_091086_20200404_20200501_01_T1',
            'landsat:collection_category':
            'T1'
        },
        'measurements': {
            'bs': {
                'path': 'ga_ls_fc_3_091086_2020-04-04_final_bs.tif'
            },
            'pv': {
                'path': 'ga_ls_fc_3_091086_2020-04-04_final_pv.tif'
            },
            'ue': {
                'path': 'ga_ls_fc_3_091086_2020-04-04_final_ue.tif'
            },
            'npv': {
                'path': 'ga_ls_fc_3_091086_2020-04-04_final_npv.tif'
            }
        }
    }

    # paths get made absolute here
    # TODO: force paths to stay relative
    uris = [
        "s3://dea-public-data/derivative/ga_ls_wo_3/1-6-0/091/086/2020/04/04/ga_ls_wo_3_091086_2020-04-04_final.stac-item.json"
    ]
    wo_ds = Dataset(wo_product, prep_eo3(wo_metadata), uris=uris)
    uris = [
        "s3://dea-public-data/derivative/ga_ls_fc_3/2-5-0/091/086/2020/04/04/ga_ls_fc_3_091086_2020-04-04_final.stac-item.json"
    ]
    fc_ds = Dataset(fc_product, prep_eo3(fc_metadata), uris=uris)

    fused_ds = fuse_ds(wo_ds, fc_ds, fused_product)
    assert _get_msr_paths(fused_ds) == _get_msr_paths(fc_ds).union(
        _get_msr_paths(wo_ds))
    fused_ds = fuse_ds(wo_ds, fc_ds)
    assert _get_msr_paths(fused_ds) == _get_msr_paths(fc_ds).union(
        _get_msr_paths(wo_ds))

    bad_metadata = deepcopy(fc_metadata)
    bad_metadata["properties"]["datetime"] = '2020-04-03T23:33:10.644420Z'
    bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris)
    with pytest.raises(AssertionError):
        fused_ds = fuse_ds(wo_ds, bad_ds, fused_product)

    bad_metadata = deepcopy(fc_metadata)
    bad_metadata["crs"] = "epsg:32656"
    bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris)
    with pytest.raises(AssertionError):
        fused_ds = fuse_ds(wo_ds, bad_ds, fused_product)

    bad_metadata = deepcopy(fc_metadata)
    bad_metadata['grids']['default']['shape'] = [7212, 8311]
    bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris)
    with pytest.raises(AssertionError):
        fused_ds = fuse_ds(wo_ds, bad_ds, fused_product)

    bad_metadata = deepcopy(fc_metadata)
    bad_metadata['label'] += 'a'
    bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris)
    with pytest.raises(AssertionError):
        fused_ds = fuse_ds(wo_ds, bad_ds, fused_product)
コード例 #21
0
def eo3_dataset_s2(eo3_metadata):
    ds_doc = {
        '$schema': 'https://schemas.opendatacube.org/dataset',
        'id': '8b0e2770-5d4e-5238-8995-4aa91691ab85',
        'product': {'name': 's2b_msil2a'},
        'label': 'S2B_MSIL2A_20200101T070219_N0213_R120_T39LVG_20200101T091825',

        'crs': 'epsg:32739',
        'grids': {'g20m': {'shape': [5490, 5490],
                           'transform': [20, 0, 399960, 0, -20, 8700040, 0, 0, 1]},
                  'g60m': {'shape': [1830, 1830],
                           'transform': [60, 0, 399960, 0, -60, 8700040, 0, 0, 1]},
                  'default': {'shape': [10980, 10980],
                              'transform': [10, 0, 399960, 0, -10, 8700040, 0, 0, 1]}},
        'geometry': {'type': 'Polygon',
                     'coordinates': [[[509759.0000000001, 8590241.0],
                                      [399960.99999999977, 8590241.0],
                                      [399960.99999999977, 8700039.0],
                                      [509758.99999999965, 8700039.0],
                                      [509759.0000000001, 8590241.0]]]},
        'properties': {'eo:gsd': 10,
                       'datetime': '2020-01-01T07:02:54.188Z',
                       'eo:platform': 'sentinel-2b',
                       'eo:instrument': 'msi',
                       'eo:cloud_cover': 0,
                       'odc:file_format': 'GeoTIFF',
                       'odc:region_code': '39LVG',
                       'odc:processing_datetime': '2020-01-01T07:02:54.188Z'},

        'measurements': {'red': {'path': 'B04.tif'},
                         'scl': {'grid': 'g20m', 'path': 'SCL.tif'},
                         'blue': {'path': 'B02.tif'},
                         'green': {'path': 'B03.tif'},
                         'nir_1': {'path': 'B08.tif'},
                         'nir_2': {'grid': 'g20m', 'path': 'B8A.tif'},
                         'swir_1': {'grid': 'g20m', 'path': 'B11.tif'},
                         'swir_2': {'grid': 'g20m', 'path': 'B12.tif'},
                         'red_edge_1': {'grid': 'g20m', 'path': 'B05.tif'},
                         'red_edge_2': {'grid': 'g20m', 'path': 'B06.tif'},
                         'red_edge_3': {'grid': 'g20m', 'path': 'B07.tif'},
                         'water_vapour': {'grid': 'g60m', 'path': 'B09.tif'},
                         'coastal_aerosol': {'grid': 'g60m', 'path': 'B01.tif'}},
        'lineage': {}}
    product_doc = {
        'name': 's2b_msil2a',
        'description': 'Sentinel-2B Level 2 COGs',
        'metadata_type': 'eo3',
        'metadata': {'product': {'name': 's2b_msil2a'}},
        'measurements':
        [{'name': 'coastal_aerosol', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_01', 'B01']},
         {'name': 'blue', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_02', 'B02']},
         {'name': 'green', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_03', 'B03']},
         {'name': 'red', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_04', 'B04']},
         {'name': 'red_edge_1', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_05', 'B05']},
         {'name': 'red_edge_2', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_06', 'B06']},
         {'name': 'red_edge_3', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_07', 'B07']},
         {'name': 'nir_1', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_08', 'B08']},
         {'name': 'nir_2', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_8a', 'B8A']},
         {'name': 'water_vapour', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_09', 'B09']},
         {'name': 'swir_1', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_11', 'B11']},
         {'name': 'swir_2', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_12', 'B12']},
         {'name': 'scl', 'dtype': 'uint8', 'units': '1', 'nodata': 0, 'aliases': ['mask', 'qa'],
          'flags_definition': {'sca': {'description': 'Sen2Cor Scene Classification',
                                       'bits': [0, 1, 2, 3, 4, 5, 6, 7],
                                       'values': {
                                           '0': 'nodata',
                                           '1': 'defective',
                                           '2': 'dark',
                                           '3': 'shadow',
                                           '4': 'vegetation',
                                           '5': 'bare',
                                           '6': 'water',
                                           '7': 'unclassified',
                                           '8': 'cloud medium probability',
                                           '9': 'cloud high probability',
                                           '10': 'thin cirrus',
                                           '11': 'snow or ice'}}}}]
    }

    return Dataset(DatasetType(eo3_metadata, product_doc), prep_eo3(ds_doc))
コード例 #22
0
ファイル: __init__.py プロジェクト: waynedou/datacube-core
def mk_sample_product(name,
                      description='Sample',
                      measurements=('red', 'green', 'blue'),
                      with_grid_spec=False,
                      storage=None):

    if storage is None and with_grid_spec is True:
        storage = {
            'crs': 'EPSG:3577',
            'resolution': {
                'x': 25,
                'y': -25
            },
            'tile_size': {
                'x': 100000.0,
                'y': 100000.0
            }
        }

    eo_type = MetadataType(
        {
            'name':
            'eo',
            'description':
            'Sample',
            'dataset':
            dict(
                id=['id'],
                label=['ga_label'],
                creation_time=['creation_dt'],
                measurements=['image', 'bands'],
                sources=['lineage', 'source_datasets'],
                format=['format', 'name'],
                grid_spatial=['grid_spatial', 'projection'],
            )
        },
        dataset_search_fields={
            'time':
            parse_search_field({
                'type': 'datetime-range',
                'min_offset': [['time']],
                'max_offset': [['time']],
            }),
        })

    common = dict(dtype='int16', nodata=-999, units='1', aliases=[])

    def mk_measurement(m):
        if isinstance(m, str):
            return dict(name=m, **common)
        elif isinstance(m, tuple):
            name, dtype, nodata = m
            m = common.copy()
            m.update(name=name, dtype=dtype, nodata=nodata)
            return m
        elif isinstance(m, dict):
            m_merged = common.copy()
            m_merged.update(m)
            return m_merged
        else:
            raise ValueError('Only support str|dict|(name, dtype, nodata)')

    measurements = [mk_measurement(m) for m in measurements]

    definition = dict(name=name,
                      description=description,
                      metadata_type='eo',
                      metadata={},
                      measurements=measurements)

    if storage is not None:
        definition['storage'] = storage

    return DatasetType(eo_type, definition)
コード例 #23
0
def test_product_load_hints():
    product = mk_sample_product('test_product',
                                load=dict(crs='epsg:3857',
                                          resolution={'x': 10, 'y': -10}))

    assert 'load' in product.definition
    assert DatasetType.validate(product.definition) is None

    hints = product._extract_load_hints()
    assert hints['crs'] == geometry.CRS('epsg:3857')
    assert hints['resolution'] == (-10, 10)
    assert 'align' not in hints

    product = mk_sample_product('test_product',
                                load=dict(crs='epsg:3857',
                                          align={'x': 5, 'y': 6},
                                          resolution={'x': 10, 'y': -10}))

    hints = product.load_hints()
    assert hints['output_crs'] == geometry.CRS('epsg:3857')
    assert hints['resolution'] == (-10, 10)
    assert hints['align'] == (6, 5)
    assert product.default_crs == geometry.CRS('epsg:3857')
    assert product.default_resolution == (-10, 10)
    assert product.default_align == (6, 5)

    product = mk_sample_product('test_product',
                                load=dict(crs='epsg:4326',
                                          align={'longitude': 0.5, 'latitude': 0.6},
                                          resolution={'longitude': 1.2, 'latitude': -1.1}))

    hints = product.load_hints()
    assert hints['output_crs'] == geometry.CRS('epsg:4326')
    assert hints['resolution'] == (-1.1, 1.2)
    assert hints['align'] == (0.6, 0.5)

    # check it's cached
    assert product.load_hints() is product.load_hints()

    # check schema: crs and resolution are compulsory
    for k in ('resolution', 'crs'):
        doc = deepcopy(product.definition)
        assert DatasetType.validate(doc) is None

        doc['load'].pop(k)
        assert k not in doc['load']

        with pytest.raises(InvalidDocException):
            DatasetType.validate(doc)

    # check GridSpec leakage doesn't happen for fully defined gridspec
    product = mk_sample_product('test', with_grid_spec=True)
    assert product.grid_spec is not None
    assert product.load_hints() == {}

    # check for fallback into partially defined `storage:`
    product = mk_sample_product('test', storage=dict(
        crs='EPSG:3857',
        resolution={'x': 10, 'y': -10}))
    assert product.grid_spec is None
    assert product.default_resolution == (-10, 10)
    assert product.default_crs == geometry.CRS('EPSG:3857')

    # check for fallback into partially defined `storage:`
    # no resolution -- no hints
    product = mk_sample_product('test', storage=dict(
        crs='EPSG:3857'))
    assert product.grid_spec is None
    assert product.load_hints() == {}

    # check misspelled load hints
    product = mk_sample_product('test_product',
                                load=dict(crs='epsg:4326',
                                          resolution={'longtude': 1.2, 'latitude': -1.1}))
    assert product.load_hints() == {}
コード例 #24
0
_EXAMPLE_METADATA_TYPE = MetadataType(
    {
        'name':
        'eo',
        'dataset':
        dict(id=['id'],
             label=['ga_label'],
             creation_time=['creation_dt'],
             measurements=['image', 'bands'],
             sources=['lineage', 'source_datasets'])
    },
    dataset_search_fields={})

_EXAMPLE_DATASET_TYPE = DatasetType(_EXAMPLE_METADATA_TYPE, {
    'name': 'eo',
    'description': "",
    'metadata_type': 'eo',
    'metadata': {}
})


def _build_dataset(doc):
    sources = {
        name: _build_dataset(src)
        for name, src in doc['lineage']['source_datasets'].items()
    }
    return Dataset(_EXAMPLE_DATASET_TYPE,
                   doc,
                   uris=['file://test.zzz'],
                   sources=sources)

コード例 #25
0
def test_accepts_valid_measurements(valid_dataset_type_measurement):
    mapping = deepcopy(only_mandatory_fields)
    mapping['measurements'] = [valid_dataset_type_measurement]
    # Should have no errors.
    DatasetType.validate(mapping)
コード例 #26
0
def eo3_dataset_s2(eo3_metadata):
    ds_doc = {
        "$schema": "https://schemas.opendatacube.org/dataset",
        "id": "8b0e2770-5d4e-5238-8995-4aa91691ab85",
        "product": {
            "name": "s2b_msil2a"
        },
        "label":
        "S2B_MSIL2A_20200101T070219_N0213_R120_T39LVG_20200101T091825",
        "crs": "epsg:32739",
        "grids": {
            "g20m": {
                "shape": [5490, 5490],
                "transform": [20, 0, 399960, 0, -20, 8700040, 0, 0, 1],
            },
            "g60m": {
                "shape": [1830, 1830],
                "transform": [60, 0, 399960, 0, -60, 8700040, 0, 0, 1],
            },
            "default": {
                "shape": [10980, 10980],
                "transform": [10, 0, 399960, 0, -10, 8700040, 0, 0, 1],
            },
        },
        "geometry": {
            "type":
            "Polygon",
            "coordinates": [[
                [509759.0000000001, 8590241.0],
                [399960.99999999977, 8590241.0],
                [399960.99999999977, 8700039.0],
                [509758.99999999965, 8700039.0],
                [509759.0000000001, 8590241.0],
            ]],
        },
        "properties": {
            "eo:gsd": 10,
            "datetime": "2020-01-01T07:02:54.188Z",
            "eo:platform": "sentinel-2b",
            "eo:instrument": "msi",
            "eo:cloud_cover": 0,
            "odc:file_format": "GeoTIFF",
            "odc:region_code": "39LVG",
            "odc:processing_datetime": "2020-01-01T07:02:54.188Z",
        },
        "measurements": {
            "red": {
                "path": "B04.tif"
            },
            "scl": {
                "grid": "g20m",
                "path": "SCL.tif"
            },
            "blue": {
                "path": "B02.tif"
            },
            "green": {
                "path": "B03.tif"
            },
            "nir_1": {
                "path": "B08.tif"
            },
            "nir_2": {
                "grid": "g20m",
                "path": "B8A.tif"
            },
            "swir_1": {
                "grid": "g20m",
                "path": "B11.tif"
            },
            "swir_2": {
                "grid": "g20m",
                "path": "B12.tif"
            },
            "red_edge_1": {
                "grid": "g20m",
                "path": "B05.tif"
            },
            "red_edge_2": {
                "grid": "g20m",
                "path": "B06.tif"
            },
            "red_edge_3": {
                "grid": "g20m",
                "path": "B07.tif"
            },
            "water_vapour": {
                "grid": "g60m",
                "path": "B09.tif"
            },
            "coastal_aerosol": {
                "grid": "g60m",
                "path": "B01.tif"
            },
        },
        "lineage": {},
    }
    product_doc = {
        "name":
        "s2b_msil2a",
        "description":
        "Sentinel-2B Level 2 COGs",
        "metadata_type":
        "eo3",
        "metadata": {
            "product": {
                "name": "s2b_msil2a"
            }
        },
        "measurements": [
            {
                "name": "coastal_aerosol",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_01", "B01"],
            },
            {
                "name": "blue",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_02", "B02"],
            },
            {
                "name": "green",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_03", "B03"],
            },
            {
                "name": "red",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_04", "B04"],
            },
            {
                "name": "red_edge_1",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_05", "B05"],
            },
            {
                "name": "red_edge_2",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_06", "B06"],
            },
            {
                "name": "red_edge_3",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_07", "B07"],
            },
            {
                "name": "nir_1",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_08", "B08"],
            },
            {
                "name": "nir_2",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_8a", "B8A"],
            },
            {
                "name": "water_vapour",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_09", "B09"],
            },
            {
                "name": "swir_1",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_11", "B11"],
            },
            {
                "name": "swir_2",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_12", "B12"],
            },
            {
                "name": "scl",
                "dtype": "uint8",
                "units": "1",
                "nodata": 0,
                "aliases": ["mask", "qa"],
                "flags_definition": {
                    "sca": {
                        "description": "Sen2Cor Scene Classification",
                        "bits": [0, 1, 2, 3, 4, 5, 6, 7],
                        "values": {
                            "0": "nodata",
                            "1": "defective",
                            "2": "dark",
                            "3": "shadow",
                            "4": "vegetation",
                            "5": "bare",
                            "6": "water",
                            "7": "unclassified",
                            "8": "cloud medium probability",
                            "9": "cloud high probability",
                            "10": "thin cirrus",
                            "11": "snow or ice",
                        },
                    }
                },
            },
        ],
    }

    return Dataset(DatasetType(eo3_metadata, product_doc), prep_eo3(ds_doc))
コード例 #27
0
ファイル: test_virtual.py プロジェクト: zs856/datacube-core
def example_product(name):
    if name not in PRODUCT_LIST:
        return None

    blue = dict(name='blue', dtype='int16', nodata=-999, units='1')
    green = dict(name='green',
                 dtype='int16',
                 nodata=-999,
                 units='1',
                 aliases=['verde'])
    flags = {
        "cloud_acca": {
            "bits": 10,
            "values": {
                "0": "cloud",
                "1": "no_cloud"
            }
        },
        "contiguous": {
            "bits": 8,
            "values": {
                "0": False,
                "1": True
            }
        },
        "cloud_fmask": {
            "bits": 11,
            "values": {
                "0": "cloud",
                "1": "no_cloud"
            }
        },
        "nir_saturated": {
            "bits": 3,
            "values": {
                "0": True,
                "1": False
            }
        },
        "red_saturated": {
            "bits": 2,
            "values": {
                "0": True,
                "1": False
            }
        },
        "blue_saturated": {
            "bits": 0,
            "values": {
                "0": True,
                "1": False
            }
        },
        "green_saturated": {
            "bits": 1,
            "values": {
                "0": True,
                "1": False
            }
        },
        "swir1_saturated": {
            "bits": 4,
            "values": {
                "0": True,
                "1": False
            }
        },
        "swir2_saturated": {
            "bits": 7,
            "values": {
                "0": True,
                "1": False
            }
        },
        "cloud_shadow_acca": {
            "bits": 12,
            "values": {
                "0": "cloud_shadow",
                "1": "no_cloud_shadow"
            }
        },
        "cloud_shadow_fmask": {
            "bits": 13,
            "values": {
                "0": "cloud_shadow",
                "1": "no_cloud_shadow"
            }
        }
    }

    pixelquality = dict(name='pixelquality',
                        dtype='int16',
                        nodata=0,
                        units='1',
                        flags_definition=flags)

    result = DatasetType(
        example_metadata_type(),
        dict(name=name, description="", metadata_type='eo', metadata={}))
    result.grid_spec = GridSpec(crs=geometry.CRS('EPSG:3577'),
                                tile_size=(100000., 100000.),
                                resolution=(-25, 25))
    if '_pq_' in name:
        result.definition = {'name': name, 'measurements': [pixelquality]}
    else:
        result.definition = {'name': name, 'measurements': [blue, green]}
    return result
コード例 #28
0
def test_incomplete_dataset_type_invalid():
    # Invalid: An empty doc.
    with pytest.raises(InvalidDocException) as e:
        DatasetType.validate({})
コード例 #29
0
def test_accepts_valid_docs(valid_dataset_type_update):
    doc = deepcopy(only_mandatory_fields)
    doc.update(valid_dataset_type_update)
    # Should have no errors.
    DatasetType.validate(doc)
コード例 #30
0
def test_rejects_invalid_docs(invalid_dataset_type_update):
    mapping = deepcopy(only_mandatory_fields)
    mapping.update(invalid_dataset_type_update)
    with pytest.raises(InvalidDocException) as e:
        DatasetType.validate(mapping)
コード例 #31
0
def test_accepts_valid_docs(valid_dataset_type_update):
    doc = deepcopy(only_mandatory_fields)
    doc.update(valid_dataset_type_update)
    # Should have no errors.
    DatasetType.validate(doc)
コード例 #32
0
def test_incomplete_dataset_type_invalid():
    # Invalid: An empty doc.
    with pytest.raises(InvalidDocException) as e:
        DatasetType.validate({})
コード例 #33
0
def test_rejects_invalid_measurements(invalid_dataset_type_measurement):
    mapping = deepcopy(only_mandatory_fields)
    mapping['measurements'] = {'10': invalid_dataset_type_measurement}
    with pytest.raises(InvalidDocException) as e:
        DatasetType.validate(mapping)
コード例 #34
0
def test_rejects_invalid_docs(invalid_dataset_type_update):
    mapping = deepcopy(only_mandatory_fields)
    mapping.update(invalid_dataset_type_update)
    with pytest.raises(InvalidDocException) as e:
        DatasetType.validate(mapping)
コード例 #35
0
def test_accepts_valid_measurements(valid_dataset_type_measurement):
    mapping = deepcopy(only_mandatory_fields)
    mapping['measurements'] = [valid_dataset_type_measurement]
    # Should have no errors.
    DatasetType.validate(mapping)
コード例 #36
0
def test_rejects_invalid_measurements(invalid_dataset_type_measurement):
    mapping = deepcopy(only_mandatory_fields)
    mapping['measurements'] = {'10': invalid_dataset_type_measurement}
    with pytest.raises(InvalidDocException) as e:
        DatasetType.validate(mapping)