コード例 #1
0
def populate_index(dataset_loader, module_dea_index):
    """
    Index populated with example datasets. Assumes our tests wont modify the data!

    It's module-scoped as it's expensive to populate.
    """
    path, s2_product_doc = list(
        read_documents(TEST_DATA_DIR / "esa_s2_l2a.product.yaml"))[0]
    dataset_count = 0
    product_ = module_dea_index.products.from_doc(s2_product_doc)
    module_dea_index.products.add(product_)
    create_dataset = Doc2Dataset(module_dea_index)
    for _, s2_dataset_doc in read_documents(TEST_DATA_DIR /
                                            "s2_l2a-sample.yaml"):
        try:
            dataset, err = create_dataset(s2_dataset_doc,
                                          "file://example.com/test_dataset/")
            assert dataset is not None, err
            created = module_dea_index.datasets.add(dataset)
            assert created.type.name == "s2_l2a"
            dataset_count += 1
        except AttributeError as ae:
            assert dataset_count == 5
            print(ae)
    assert dataset_count == 5
    return module_dea_index
コード例 #2
0
def test_read_documents(sample_document_files):
    for filename, ndocs in sample_document_files:
        all_docs = list(read_documents(filename))
        assert len(all_docs) == ndocs

        for path, doc in all_docs:
            assert isinstance(doc, dict)
            assert isinstance(path, pathlib.Path)

        assert set(str(f) for f, _ in all_docs) == set([filename])

    for filename, ndocs in sample_document_files:
        all_docs = list(read_documents(filename, uri=True))
        assert len(all_docs) == ndocs

        for uri, doc in all_docs:
            assert isinstance(doc, dict)
            assert isinstance(uri, str)

        p = pathlib.Path(filename)
        if ndocs > 1:
            expect_uris = [p.as_uri() + '#part={}'.format(i) for i in range(ndocs)]
        else:
            expect_uris = [p.as_uri()]

        assert [f for f, _ in all_docs] == expect_uris
コード例 #3
0
ファイル: test_utils.py プロジェクト: eric-erki/datacube-core
def _read_documents_impl(sample_document_files):
    # Test case for returning native points to documents, may be pathlib.Path or URI
    for filepath, num_docs in sample_document_files:
        all_docs = list(read_documents(filepath))
        assert len(all_docs) == num_docs

        for path, doc in all_docs:
            assert isinstance(doc, dict)

        assert set(str(f) for f, _ in all_docs) == set([filepath])

    # Test case for returning URIs pointing to documents
    for filepath, num_docs in sample_document_files:
        all_docs = list(read_documents(filepath, uri=True))
        assert len(all_docs) == num_docs

        for uri, doc in all_docs:
            assert isinstance(doc, dict)
            assert isinstance(uri, str)

        url = as_url(filepath)
        if num_docs > 1:
            expect_uris = [
                as_url(url) + '#part={}'.format(i) for i in range(num_docs)
            ]
        else:
            expect_uris = [as_url(url)]

        assert [f for f, _ in all_docs] == expect_uris
コード例 #4
0
ファイル: system.py プロジェクト: senani/digitalearthau
def init_dea(index: Index,
             with_permissions: bool,
             log_header=print_header,
             log=print_):
    """
    Create or update a DEA configured ODC instance.
    """
    log_header(f"ODC init of {index.url}")
    was_created = index.init_db(with_default_types=False,
                                with_permissions=with_permissions)

    if was_created:
        log('Created.')
    else:
        log('Updated.')

    log('Checking indexes/views.')
    index.metadata_types.check_field_indexes(
        allow_table_lock=True,
        rebuild_indexes=False,
        rebuild_views=True,
    )

    log_header('Checking DEA metadata types')
    # Add DEA metadata types, products.
    for _, md_type_def in read_documents(DEA_MD_TYPES):
        md = index.metadata_types.add(
            index.metadata_types.from_doc(md_type_def))
        log(f"{md.name}")

    log_header('Checking DEA products')
    for _, product_def in read_documents(*DEA_PRODUCTS_DIR.glob('*.yaml')):
        product = index.products.add_document(product_def)
        log(f"{product.name}")

    log_header('Checking DEA ingested definitions')

    for path in DEA_INGESTION_DIR.glob('*.yaml'):
        ingest_config = ingest.load_config_from_file(path)

        driver_name = ingest_config['storage']['driver']
        driver = storage_writer_by_name(driver_name)
        if driver is None:
            raise ValueError("No driver found for {}".format(driver_name))

        source_type, output_type = ingest.ensure_output_type(
            index, ingest_config, driver.format, allow_product_changes=True)
        log(f"{output_type.name:<20}\t\t← {source_type.name}")
コード例 #5
0
ファイル: dataset.py プロジェクト: sharat910/datacube-iirs
def index_cmd(index, match_rules, dtype, auto_match, dry_run, datasets):
    if not (match_rules or dtype or auto_match):
        _LOG.error('Must specify one of [--match-rules, --type, --auto-match]')
        return

    if match_rules:
        rules = load_rules_from_file(match_rules, index)
    else:
        assert dtype or auto_match
        rules = load_rules_from_types(index, dtype)

    if rules is None:
        return

    for dataset_path in datasets:
        metadata_path = get_metadata_path(Path(dataset_path))
        if not metadata_path or not metadata_path.exists():
            raise ValueError('No supported metadata docs found for dataset {}'.format(dataset_path))

        for metadata_path, metadata_doc in read_documents(metadata_path):
            uri = metadata_path.absolute().as_uri()

            try:
                dataset = match_dataset(metadata_doc, uri, rules)
            except RuntimeError as e:
                _LOG.error('Unable to create Dataset for %s: %s', uri, e)
                continue

            if not check_dataset_consistent(dataset):
                _LOG.error("Dataset measurements don't match it's type specification %s", dataset.id)
                continue

            _LOG.info('Matched %s', dataset)
            if not dry_run:
                index.datasets.add(dataset)
コード例 #6
0
ファイル: dataset.py プロジェクト: etsangsplk/datacube-core
def load_datasets(datasets, rules):
    for dataset_path in datasets:
        metadata_path = get_metadata_path(Path(dataset_path))
        if not metadata_path or not metadata_path.exists():
            _LOG.error('No supported metadata docs found for dataset %s', dataset_path)
            continue

        try:
            for metadata_path, metadata_doc in read_documents(metadata_path):
                uri = metadata_path.absolute().as_uri()

                try:
                    dataset = create_dataset(metadata_doc, uri, rules)
                except BadMatch as e:
                    _LOG.error('Unable to create Dataset for %s: %s', uri, e)
                    continue

                is_consistent, reason = check_dataset_consistent(dataset)
                if not is_consistent:
                    _LOG.error("Dataset %s inconsistency: %s", dataset.id, reason)
                    continue

                yield dataset
        except InvalidDocException:
            _LOG.error("Failed reading documents from %s", metadata_path)
            continue
コード例 #7
0
ファイル: __init__.py プロジェクト: waynedou/datacube-core
def load_dataset_definition(path):
    if not isinstance(path, pathlib.Path):
        path = pathlib.Path(path)

    fname = get_metadata_path(path)
    for _, doc in read_documents(fname):
        return SimpleDocNav(doc)
コード例 #8
0
ファイル: metadata.py プロジェクト: jeremyh/datacube-core
def update_metadata_types(index: Index, allow_unsafe: bool,
                          allow_exclusive_lock: bool, dry_run: bool,
                          files: List):
    """
    Update existing metadata types.

    An error will be thrown if a change is potentially unsafe.

    (An unsafe change is anything that may potentially make the metadata type
    incompatible with existing types of the same name)
    """
    for descriptor_path, parsed_doc in read_documents(*files):
        try:
            type_ = index.metadata_types.from_doc(parsed_doc)
        except InvalidDocException as e:
            _LOG.exception(e)
            _LOG.error('Invalid metadata type definition: %s', descriptor_path)
            continue

        if not dry_run:
            index.metadata_types.update(
                type_,
                allow_unsafe_updates=allow_unsafe,
                allow_table_lock=allow_exclusive_lock,
            )
            echo('Updated "%s"' % type_.name)
        else:
            can_update, safe_changes, unsafe_changes = index.metadata_types.can_update(
                type_, allow_unsafe_updates=allow_unsafe)
            if can_update:
                echo('Can update "%s": %s unsafe changes, %s safe changes' %
                     (type_.name, len(unsafe_changes), len(safe_changes)))
            else:
                echo('Cannot update "%s": %s unsafe changes, %s safe changes' %
                     (type_.name, len(unsafe_changes), len(safe_changes)))
コード例 #9
0
ファイル: ingest.py プロジェクト: Max-AR/datacube-core
def load_config_from_file(path):
    config_file = Path(path)
    _, config = next(read_documents(config_file))
    IngestorConfig.validate(config)
    config['filename'] = str(normalise_path(config_file))

    return config
コード例 #10
0
ファイル: dataset.py プロジェクト: SAMoH-proj/datacube-core
def doc_path_stream(files, on_error, uri=True):
    for fname in files:
        try:
            for p, doc in read_documents(fname, uri=uri):
                yield p, SimpleDocNav(doc)
        except InvalidDocException as e:
            on_error(fname, e)
コード例 #11
0
def add_cop_dem_product(dc: Datacube, product):
    if product in PRODUCTS.keys():
        product_uri = PRODUCTS[product]
    else:
        raise ValueError(f"Unknown product {product}")

    for _, doc in read_documents(product_uri):
        dc.index.products.add_document(doc)
    print(f"Product definition added for {product}")
コード例 #12
0
def _path_dataset_ids(path: Path) -> Iterable[uuid.UUID]:
    for _, metadata_doc in read_documents(path):
        if metadata_doc is None:
            raise InvalidDocException("Empty document from path {}".format(path))

        if 'id' not in metadata_doc:
            raise InvalidDocException("No id in path metadata: {}".format(path))

        yield uuid.UUID(metadata_doc['id'])
コード例 #13
0
ファイル: main.py プロジェクト: augustinh22/agdc_statistics
def main(index, stats_config_file, qsub, runner, save_tasks, load_tasks,
         tile_index, tile_index_file, output_location, year, task_slice,
         batch):

    if qsub is not None and batch is not None:
        for i in range(batch):
            child = qsub.clone()
            child.reset_internal_args()
            child.add_internal_args('--task-slice', '{}::{}'.format(i, batch))
            click.echo(repr(child))
            exit_code, _ = child(auto=True, auto_clean=[('--batch', 1)])
            if exit_code != 0:
                return exit_code
        return 0

    elif qsub is not None:
        # TODO: verify config before calling qsub submit
        click.echo(repr(qsub))
        exit_code, _ = qsub(auto=True)
        return exit_code

    _log_setup()

    timer = MultiTimer().start('main')

    if len(tile_index) == 0:
        tile_index = None

    _, config = next(read_documents(stats_config_file))
    stats_schema(config)

    app = StatsApp.from_configuration_file(
        config, index, gather_tile_indexes(tile_index, tile_index_file),
        output_location, year)
    app.validate()

    if save_tasks:
        app.save_tasks_to_file(save_tasks)
        failed = 0
    elif load_tasks:
        successful, failed = app.run(runner,
                                     task_file=load_tasks,
                                     task_slice=task_slice)
    else:
        successful, failed = app.run(runner, task_slice=task_slice)

    timer.pause('main')
    _LOG.info('Stats processing completed in %s seconds.',
              timer.run_times['main'])

    if failed > 0:
        raise click.ClickException(
            '%s of %s tasks were not completed successfully.' %
            (failed, successful + failed))

    return 0
コード例 #14
0
ファイル: task_app.py プロジェクト: hkristen/datacube-core
def load_config(index, app_config_file, make_config, make_tasks, *args, **kwargs):
    app_config_path = Path(app_config_file)
    _, config = next(read_documents(app_config_path))
    config['app_config_file'] = app_config_path.name

    config = make_config(index, config, **kwargs)

    tasks = make_tasks(index, config, **kwargs)

    return config, iter(tasks)
コード例 #15
0
def path_doc_stream(files, on_error, uri=True, raw=False):
    maybe_wrap = {True: lambda x: x, False: SimpleDocNav}[raw]

    for fname in files:
        try:
            for p, doc in read_documents(fname, uri=uri):
                yield p, maybe_wrap(doc)

        except InvalidDocException as e:
            on_error(fname, e)
コード例 #16
0
ファイル: main.py プロジェクト: sciencewhispher/Data_Cube_v2
def main(index, app_config, year, executor):
    _, config = next(read_documents(app_config))

    tasks = make_tasks(index, config)

    futures = [executor.submit(do_stats, task, config) for task in tasks]

    for future in executor.as_completed(futures):
        result = executor.result(future)
        print(result)
コード例 #17
0
ファイル: main.py プロジェクト: ceos-seo/Data_Cube_v2
def main(index, app_config, year, executor):
    _, config = next(read_documents(app_config))

    tasks = make_tasks(index, config)

    futures = [executor.submit(do_stats, task, config) for task in tasks]

    for future in executor.as_completed(futures):
        result = executor.result(future)
        print(result)
コード例 #18
0
def read_document(path: Path) -> dict:
    """
    Read and parse exactly one document.
    """
    ds = list(read_documents(path))
    if len(ds) != 1:
        raise NotImplementedError("Expected one document to be in path %s" % path)

    _, doc = ds[0]
    return doc
コード例 #19
0
def read_document(path: Path) -> dict:
    """
    Read and parse exactly one document.
    """
    ds = list(read_documents(path))
    if len(ds) != 1:
        raise ValueError(f"Expected only one document to be in path {path}")

    _, doc = ds[0]
    return doc
コード例 #20
0
def _path_doc_stream(files, on_error, uri=True, raw=False):
    """See :func:`ui_path_doc_stream` for documentation"""
    maybe_wrap = identity if raw else SimpleDocNav

    for fname in files:
        try:
            for p, doc in read_documents(fname, uri=uri):
                yield p, maybe_wrap(doc)

        except InvalidDocException as e:
            on_error(fname, e)
コード例 #21
0
def check_dataset_metadata_in_storage_unit(nco, dataset_dir):
    assert len(nco.variables['dataset']) == 1  # 1 time slice
    stored_metadata = nco.variables['dataset'][0]
    if not isinstance(stored_metadata, str):
        stored_metadata = netCDF4.chartostring(stored_metadata)
        stored_metadata = str(np.char.decode(stored_metadata))
    ds_filename = dataset_dir / 'agdc-metadata.yaml'

    stored = yaml.safe_load(stored_metadata)
    [(_, original)] = read_documents(ds_filename)
    assert len(stored['lineage']['source_datasets']) == 1
    assert next(iter(stored['lineage']['source_datasets'].values())) == original
コード例 #22
0
ファイル: ingest.py プロジェクト: sharat910/datacube-iirs
def ingest_cmd(index, config, dry_run, executor):
    _, config = next(read_documents(Path(config)))
    source_type = index.datasets.types.get_by_name(config['source_type'])
    if not source_type:
        _LOG.error("Source DatasetType %s does not exist",
                   config['source_type'])
#    print (source_type)
#    print ("abcdefghijklmnopqrstuvwxyz")
    output_type = morph_dataset_type(source_type, config)
    #    print (output_type)
    _LOG.info('Created DatasetType %s', output_type.name)
    output_type = index.datasets.types.add(output_type)

    datacube = Datacube(index=index)

    grid_spec = output_type.grid_spec
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    file_path_template = str(
        Path(config['location'], config['file_path_template']))

    bbox = BoundingBox(**config['ingestion_bounds'])
    tasks = find_diff(source_type, output_type, bbox, datacube)

    def ingest_work(tile_index, sources):
        geobox = GeoBox.from_grid_spec(grid_spec, tile_index)
        #        print ("in ingest.py in ingest_word")
        data = Datacube.product_data(sources, geobox, measurements)

        nudata = data.rename(namemap)

        file_path = file_path_template.format(
            tile_index=tile_index,
            start_time=to_datetime(
                sources.time.values[0]).strftime('%Y%m%d%H%M%S%f'),
            end_time=to_datetime(
                sources.time.values[-1]).strftime('%Y%m%d%H%M%S%f'))
        # TODO: algorithm params
        print("Writing product")
        nudatasets = write_product(nudata, sources, output_type,
                                   config['global_attributes'],
                                   variable_params, Path(file_path))
        return nudatasets

    do_work(tasks, ingest_work, index, executor)
    temp = str(Path(config['location']))
    files_path = temp + "/cache"
    if not os.path.isfile(temp + "/archive"):
        os.system("mkdir " + temp + "/archive")
    print("Compressing files")
    compress(files_path)
コード例 #23
0
def add_metadata_types(index, files):
    """
    Add or update metadata types in the index
    """
    for descriptor_path, parsed_doc in read_documents(*(Path(f)
                                                        for f in files)):
        try:
            type_ = index.metadata_types.from_doc(parsed_doc)
            index.metadata_types.add(type_)
        except InvalidDocException as e:
            _LOG.exception(e)
            _LOG.error('Invalid metadata type definition: %s', descriptor_path)
            continue
コード例 #24
0
ファイル: product.py プロジェクト: prasunkgupta/datacube-iirs
def add_dataset_types(index, files):
    """
    Add product types to the index
    """
    for descriptor_path, parsed_doc in read_documents(*(Path(f) for f in files)):
        try:
            type_ = index.datasets.types.from_doc(parsed_doc)
            index.datasets.types.add(type_)
            echo('Added "%s"' % type_.name)
        except InvalidDocException as e:
            _LOG.exception(e)
            _LOG.error('Invalid product definition: %s', descriptor_path)
            continue
コード例 #25
0
ファイル: metadata_type.py プロジェクト: nhoss2/datacube-core
def add_metadata_types(index, allow_exclusive_lock, files):
    # type: (Index, bool, list) -> None
    """
    Add or update metadata types in the index
    """
    for descriptor_path, parsed_doc in read_documents(*(Path(f) for f in files)):
        try:
            type_ = index.metadata_types.from_doc(parsed_doc)
            index.metadata_types.add(type_, allow_table_lock=allow_exclusive_lock)
        except InvalidDocException as e:
            _LOG.exception(e)
            _LOG.error('Invalid metadata type definition: %s', descriptor_path)
            continue
コード例 #26
0
ファイル: product.py プロジェクト: nikhil003/datacube-core
def add_products(index, allow_exclusive_lock, files):
    # type: (Index, bool, list) -> None
    """
    Add or update products in the generic index.
    """
    for descriptor_path, parsed_doc in read_documents(*files):
        try:
            type_ = index.products.from_doc(parsed_doc)
            index.products.add(type_, allow_table_lock=allow_exclusive_lock)
            echo('Added "%s"' % type_.name)
        except InvalidDocException as e:
            _LOG.exception(e)
            _LOG.error('Invalid product definition: %s', descriptor_path)
            sys.exit(1)
コード例 #27
0
def add_dataset_types(index, files):
    """
    Add product types to the index
    """
    for descriptor_path, parsed_doc in read_documents(*(Path(f)
                                                        for f in files)):
        try:
            type_ = index.products.from_doc(parsed_doc)
            index.products.add(type_)
            echo('Added "%s"' % type_.name)
        except InvalidDocException as e:
            _LOG.exception(e)
            _LOG.error('Invalid product definition: %s', descriptor_path)
            continue
コード例 #28
0
def update_dataset_types(index, allow_unsafe, allow_exclusive_lock, dry_run, files):
    # type: (Index, bool, bool, bool, list) -> None
    """
    Update existing products.

    An error will be thrown if a change is potentially unsafe.

    (An unsafe change is anything that may potentially make the product
    incompatible with existing datasets of that type)
    """
    failures = 0
    for descriptor_path, parsed_doc in read_documents(*(Path(f) for f in files)):
        try:
            type_ = index.products.from_doc(parsed_doc)
        except InvalidDocException as e:
            _LOG.exception(e)
            _LOG.error('Invalid product definition: %s', descriptor_path)
            failures += 1
            continue

        if not dry_run:
            try:
                index.products.update(
                    type_,
                    allow_unsafe_updates=allow_unsafe,
                    allow_table_lock=allow_exclusive_lock,
                )
                echo('Updated "%s"' % type_.name)
            except ValueError as e:
                echo('Failed to update "%s": %s' % (type_.name, e))
                failures += 1
        else:
            can_update, safe_changes, unsafe_changes = index.products.can_update(type_,
                                                                                 allow_unsafe_updates=allow_unsafe)

            for offset, old_val, new_val in safe_changes:
                echo('Safe change in %r %s from %r to %r' % (type_.name, _readable_offset(offset), old_val, new_val))

            for offset, old_val, new_val in unsafe_changes:
                echo('Unsafe change in %r %s from %r to %r' % (type_.name, _readable_offset(offset), old_val, new_val))

            if can_update:
                echo('Can update "%s": %s unsafe changes, %s safe changes' % (type_.name,
                                                                              len(unsafe_changes),
                                                                              len(safe_changes)))
            else:
                echo('Cannot update "%s": %s unsafe changes, %s safe changes' % (type_.name,
                                                                                 len(unsafe_changes),
                                                                                 len(safe_changes)))
    sys.exit(failures)
コード例 #29
0
ファイル: dataset.py プロジェクト: etsangsplk/datacube-core
def load_rules_from_file(filename, index):
    rules = next(read_documents(Path(filename)))[1]
    # TODO: verify schema

    for rule in rules:
        type_ = index.products.get_by_name(rule['type'])
        if not type_:
            _LOG.error('DatasetType %s does not exists', rule['type'])
            return
        if not changes.contains(type_.metadata_doc, rule['metadata']):
            _LOG.error('DatasetType %s can\'t be matched by its own rule', rule['type'])
            return
        rule['type'] = type_

    return rules
コード例 #30
0
ファイル: dataset.py プロジェクト: ceos-seo/Data_Cube_v2
def load_rules_from_file(filename, index):
    rules = next(read_documents(Path(filename)))[1]
    # TODO: verify schema

    for rule in rules:
        type_ = index.products.get_by_name(rule['type'])
        if not type_:
            _LOG.error('DatasetType %s does not exists', rule['type'])
            return
        if not contains(type_.metadata_doc, rule['metadata']):
            _LOG.error('DatasetType %s can\'t be matched by its own rule', rule['type'])
            return
        rule['type'] = type_

    return rules
コード例 #31
0
def _test_read_docs_impl(sample_documents: Iterable[Tuple[str, int]]):
    # Test case for returning URIs pointing to documents
    for doc_url, num_docs in sample_documents:
        all_docs = list(read_documents(doc_url, uri=True))
        assert len(all_docs) == num_docs

        for uri, doc in all_docs:
            assert isinstance(doc, dict)
            assert isinstance(uri, str)

        url = as_url(doc_url)
        if num_docs > 1:
            expect_uris = [as_url(url) + '#part={}'.format(i) for i in range(num_docs)]
        else:
            expect_uris = [as_url(url)]

        assert [f for f, _ in all_docs] == expect_uris
コード例 #32
0
ファイル: ingest.py プロジェクト: prasunkgupta/datacube-iirs
def ingest_cmd(index, config, dry_run, executor):
    _, config = next(read_documents(Path(config)))
    source_type = index.datasets.types.get_by_name(config['source_type'])
    if not source_type:
        _LOG.error("Source DatasetType %s does not exist", config['source_type'])
#    print (source_type)
#    print ("abcdefghijklmnopqrstuvwxyz")
    output_type = morph_dataset_type(source_type, config)
#    print (output_type)
    _LOG.info('Created DatasetType %s', output_type.name)
    output_type = index.datasets.types.add(output_type)

    datacube = Datacube(index=index)

    grid_spec = output_type.grid_spec
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    file_path_template = str(Path(config['location'], config['file_path_template']))

    bbox = BoundingBox(**config['ingestion_bounds'])
    tasks = find_diff(source_type, output_type, bbox, datacube)

    def ingest_work(tile_index, sources):
        geobox = GeoBox.from_grid_spec(grid_spec, tile_index)
#        print ("in ingest.py in ingest_word")
        data = Datacube.product_data(sources, geobox, measurements)

        nudata = data.rename(namemap)

        file_path = file_path_template.format(tile_index=tile_index,
                                              start_time=to_datetime(sources.time.values[0]).strftime('%Y%m%d%H%M%S%f'),
                                              end_time=to_datetime(sources.time.values[-1]).strftime('%Y%m%d%H%M%S%f'))
        # TODO: algorithm params
        print ("Writing product")
        nudatasets = write_product(nudata, sources, output_type,
                                   config['global_attributes'], variable_params, Path(file_path))
        return nudatasets

    do_work(tasks, ingest_work, index, executor)
    temp = str(Path(config['location'])) 
    files_path = temp + "/cache"
    if not os.path.isfile(temp+"/archive"):
        os.system("mkdir "+temp+"/archive")
    print ("Compressing files")
    compress(files_path)
コード例 #33
0
def check_dataset_metadata_in_storage_unit(nco, dataset_dirs):
    """Check one of the NetCDF files metadata against the original
    metadata."""
    assert len(nco.variables['dataset']) == 1  # 1 time slice
    stored_metadata = netcdf_extract_string(nco.variables['dataset'][0])
    stored = yaml.safe_load(stored_metadata)

    assert 'lineage' in stored
    assert 'source_datasets' in stored['lineage']
    assert '0' in stored['lineage']['source_datasets']
    assert 'id' in stored['lineage']['source_datasets']['0']
    source_uuid = UUID(stored['lineage']['source_datasets']['0']['id'])
    assert source_uuid in dataset_dirs
    ds_filename = dataset_dirs[source_uuid] / 'agdc-metadata.yaml'
    [(_, original)] = read_documents(ds_filename)
    assert len(stored['lineage']['source_datasets']) == 1
    assert next(iter(stored['lineage']['source_datasets'].values())) == original
コード例 #34
0
def _populate_from_dump(session_dea_index, expected_type: str,
                        dump_path: Path):
    ls8_nbar_scene = session_dea_index.products.get_by_name(expected_type)
    dataset_count = 0

    create_dataset = Doc2Dataset(session_dea_index)

    for _, doc in read_documents(dump_path):
        label = doc["ga_label"] if ("ga_label" in doc) else doc["id"]
        dataset, err = create_dataset(
            doc, f"file://example.com/test_dataset/{label}")
        assert dataset is not None, err
        created = session_dea_index.datasets.add(dataset)

        assert created.type.name == ls8_nbar_scene.name
        dataset_count += 1

    print(f"Populated {dataset_count} of {expected_type}")
    return dataset_count
コード例 #35
0
ファイル: ingest.py プロジェクト: ceos-seo/Data_Cube_v2
def load_config_from_file(index, config):
    config_name = Path(config).name
    _, config = next(read_documents(Path(config)))
    config['filename'] = config_name

    return config
コード例 #36
0
 def decorate(cls):
     cls.schema = next(iter(read_documents(SCHEMA_PATH/schema)))[1]
     cls.validate = classmethod(validate)
     return cls