Esempio n. 1
0
def testdata_dir(tmpdir, ingest_configs):
    datadir = Path(str(tmpdir), 'data')
    datadir.mkdir()

    shutil.copytree(str(TEST_DATA), str(tmpdir / 'lbg'))

    for file in ingest_configs.values():
        prepare_test_ingestion_configuration(tmpdir, tmpdir, INGESTER_CONFIGS/file,
                                             mode='end2end')

    return tmpdir
def test_double_ingestion(clirunner, index, tmpdir, ingest_configs):
    """
    Test for the case where ingestor does not need to create a new product,
    but should re-use an existing target product.

    """
    # Make a test ingestor configuration
    config = INGESTER_CONFIGS / ingest_configs['ls5_nbar_albers']
    config_path, config = prepare_test_ingestion_configuration(
        tmpdir, None, config, mode='fast_ingest')

    def index_dataset(path):
        return clirunner(['dataset', 'add', str(path)])

    # Create and Index some example scene datasets
    dataset_paths = generate_test_scenes(tmpdir)
    for path in dataset_paths:
        index_dataset(path)

    # Ingest them
    clirunner(['ingest', '--config-file', str(config_path)])

    # Create and Index some more scene datasets
    dataset_paths = generate_test_scenes(tmpdir)
    for path in dataset_paths:
        index_dataset(path)

    # Make sure that we can ingest the new scenes
    clirunner(['ingest', '--config-file', str(config_path)])
Esempio n. 3
0
def test_s3_full_ingestion(clirunner, index, tmpdir, example_ls5_dataset_paths,
                           ingest_configs):
    config = INGESTER_CONFIGS / ingest_configs['ls5_nbar_albers']

    config_path, config = prepare_test_ingestion_configuration(
        tmpdir, None, config, mode='fast_ingest')
    valid_uuids = []
    for uuid, example_ls5_dataset_path in example_ls5_dataset_paths.items():
        valid_uuids.append(uuid)
        clirunner(['dataset', 'add', str(example_ls5_dataset_path)])

    ensure_datasets_are_indexed(index, valid_uuids)

    # TODO(csiro) Set time dimension when testing
    # config['storage']['tile_size']['time'] = 2

    result = clirunner(['ingest', '--config-file', str(config_path)])

    print(result.output)

    datasets = index.datasets.search_eager(product='ls5_nbar_albers')
    assert len(datasets) > 0
    assert datasets[0].managed

    check_open_with_api(index, len(valid_uuids))
    check_data_with_api(index, len(valid_uuids))
Esempio n. 4
0
def test_double_ingestion(clirunner, index, tmpdir, ingest_configs,
                          example_ls5_dataset_paths):
    """
    Test for the case where ingestor does not need to create a new product,
    but should re-use an existing target product.

    """
    # Make a test ingestor configuration
    config = INGESTER_CONFIGS / ingest_configs['ls5_nbar_albers']
    config_path, config = prepare_test_ingestion_configuration(
        tmpdir, None, config, mode='fast_ingest')

    def index_dataset(path):
        return clirunner(['dataset', 'add', str(path)])

    def ingest_products():
        valid_uuids = []
        for uuid, ls5_dataset_path in example_ls5_dataset_paths.items():
            valid_uuids.append(uuid)
            index_dataset(ls5_dataset_path)

        # Ensure that datasets are actually indexed
        ensure_datasets_are_indexed(index, valid_uuids)

        # Ingest them
        clirunner(['ingest', '--config-file', str(config_path)])

        # Validate that the ingestion is working as expected
        datasets = index.datasets.search_eager(product='ls5_nbar_albers')
        assert len(datasets) > 0
        assert datasets[0].managed

        check_open_with_api(index, len(valid_uuids))
        check_data_with_api(index, len(valid_uuids))

        # NetCDF specific checks, based on the saved NetCDF file
        ds_path = str(datasets[0].local_path)
        with netCDF4.Dataset(ds_path) as nco:
            check_data_shape(nco)
            check_grid_mapping(nco)
            check_cf_compliance(nco)
            check_dataset_metadata_in_storage_unit(nco,
                                                   example_ls5_dataset_paths)
            check_attributes(nco, config['global_attributes'])

            name = config['measurements'][0]['name']
            check_attributes(nco[name], config['measurements'][0]['attrs'])
        check_open_with_xarray(ds_path)

    # Create and Index some example scene datasets
    ingest_products()

    ######################
    #  Double Ingestion  #
    ######################
    # Create and Index some more scene datasets
    ingest_products()
Esempio n. 5
0
def test_process_all_ingest_jobs(clirunner, index, tmpdir,
                                 example_ls5_dataset_paths, ingest_configs):
    """
    Test for the case where ingestor processes upto `--queue-size` number of tasks and not all the available scenes
    """
    # Make a test ingestor configuration
    config = INGESTER_CONFIGS / ingest_configs['ls5_nbar_albers']
    config_path, config = prepare_test_ingestion_configuration(
        tmpdir, None, config, mode='fast_ingest')

    def index_dataset(path):
        return clirunner(['dataset', 'add', str(path)])

    # Number of scenes generated is 3 (as per NUM_TIME_SLICES const from conftest.py)
    # Set the queue size to process 2 tiles
    queue_size = 2
    valid_uuids = []
    for uuid, ls5_dataset_path in example_ls5_dataset_paths.items():
        valid_uuids.append(uuid)
        index_dataset(ls5_dataset_path)

    # Ensure that datasets are actually indexed
    ensure_datasets_are_indexed(index, valid_uuids)

    # Ingest all scenes (Though the queue size is 2, all 3 tiles will be ingested)
    clirunner([
        'ingest',
        '--config-file',
        str(config_path),
        '--queue-size',
        queue_size,
        '--allow-product-changes',
    ])

    # Validate that the ingestion is working as expected
    datasets = index.datasets.search_eager(product='ls5_nbar_albers')
    assert len(datasets) > 0
    assert datasets[0].managed

    check_open_with_api(index, len(valid_uuids))

    # NetCDF specific checks, based on the saved NetCDF file
    ds_path = str(datasets[0].local_path)
    with netCDF4.Dataset(ds_path) as nco:
        check_data_shape(nco)
        check_grid_mapping(nco)
        check_cf_compliance(nco)
        check_dataset_metadata_in_storage_unit(nco, example_ls5_dataset_paths)
        check_attributes(nco, config['global_attributes'])

        name = config['measurements'][0]['name']
        check_attributes(nco[name], config['measurements'][0]['attrs'])
    check_open_with_xarray(ds_path)
Esempio n. 6
0
def test_invalid_ingestor_config(clirunner, index, tmpdir, ingest_configs):
    """
    Test that the ingestor correctly identifies an invalid ingestor config file.

    Note: We do not need to test valid config files as that is covered by the existing
          ingestor tests.
    """
    config = PROJECT_ROOT / 'integration_tests/data/ingester/invalid_config.yaml'
    config_path, config = prepare_test_ingestion_configuration(
        tmpdir, None, config, mode='fast_ingest')

    clirunner(['ingest', '--config-file',
               str(config_path)],
              expect_success=False)
Esempio n. 7
0
def test_full_ingestion(clirunner, index, tmpdir, example_ls5_dataset_paths, ingest_configs):
    config = INGESTER_CONFIGS/ingest_configs['ls5_nbar_albers']
    config_path, config = prepare_test_ingestion_configuration(tmpdir, None, config, mode='fast_ingest')
    valid_uuids = []
    for uuid, example_ls5_dataset_path in example_ls5_dataset_paths.items():
        valid_uuids.append(uuid)
        clirunner([
            'dataset',
            'add',
            str(example_ls5_dataset_path)
        ])

    ensure_datasets_are_indexed(index, valid_uuids)

    # TODO(csiro) Set time dimension when testing
    # config['storage']['tile_size']['time'] = 2

    clirunner([
        'ingest',
        '--config-file',
        str(config_path)
    ])

    datasets = index.datasets.search_eager(product='ls5_nbar_albers')
    assert len(datasets) > 0
    assert datasets[0].managed

    check_open_with_api(index, len(valid_uuids))
    check_data_with_api(index, len(valid_uuids))

    # NetCDF specific checks, based on the saved NetCDF file
    ds_path = str(datasets[0].local_path)
    with netCDF4.Dataset(ds_path) as nco:
        check_data_shape(nco)
        check_grid_mapping(nco)
        check_cf_compliance(nco)
        check_dataset_metadata_in_storage_unit(nco, example_ls5_dataset_paths)
        check_attributes(nco, config['global_attributes'])

        name = config['measurements'][0]['name']
        check_attributes(nco[name], config['measurements'][0]['attrs'])
    check_open_with_xarray(ds_path)
def test_invalid_ingestor_config(clirunner, index, tmpdir):
    """
    Test that the ingestor correctly identifies an invalid ingestor config file.

    Note: We do not need to test valid config files as that is covered by the existing
          ingestor tests.
    """
    base = PROJECT_ROOT / 'integration_tests/data/ingester/'

    for cfg, err in (('invalid_config.yaml',
                      "'src_varname' is a required property"),
                     ('invalid_src_name.yaml',
                      'No such variable in the source product:')):
        config = base / cfg
        config_path, config = prepare_test_ingestion_configuration(
            tmpdir, None, config)

        result = clirunner(['ingest', '--config-file',
                            str(config_path)],
                           expect_success=False)

        assert result.exit_code != 0
        assert err in result.output
Esempio n. 9
0
def test_index_out_of_bound_error(clirunner, index, tmpdir, example_ls5_dataset_paths, ingest_configs):
    """
    Test for the case where ingestor processes upto `--queue-size` number of tasks and not all the available scenes
    """
    # Make a test ingestor configuration
    config = INGESTER_CONFIGS / ingest_configs['ls5_nbar_albers']
    config_path, config = prepare_test_ingestion_configuration(tmpdir, None,
                                                               config, mode='fast_ingest')

    def index_dataset(path):
        return clirunner(['dataset', 'add', str(path)])

    # Set the queue size to process 5 tiles
    queue_size = 5
    valid_uuids = []
    for uuid, ls5_dataset_path in example_ls5_dataset_paths.items():
        valid_uuids.append(uuid)
        index_dataset(ls5_dataset_path)

    # Ensure that datasets are actually indexed
    ensure_datasets_are_indexed(index, valid_uuids)

    # Locationless scenario within database arises when we run the sync tool (with --update-location option)
    # on the disk where the actual file is removed and regenerated again with new dataset id.
    for indexed_uuid in valid_uuids:
        dc1 = datacube.Datacube(index=index)
        datasets = dc1.find_datasets(product='ls5_nbar_scene')
        try:
            # Remove location from the index, to simulate indexed out of range scenario
            res = dc1.index.datasets.remove_location(indexed_uuid, datasets[0].local_uri)
        except AttributeError:
            # Do for one dataset, ignore any other attribute errors
            pass
        assert res is True, "Error for %r. output: %r" % (indexed_uuid, res)

    # Ingest scenes with locationless dataset
    clirunner([
        'ingest',
        '--config-file',
        str(config_path),
        '--queue-size',
        queue_size,
        '--allow-product-changes',
    ])

    # Validate that the ingestion is working as expected
    datasets = index.datasets.search_eager(product='ls5_nbar_albers')
    assert len(datasets) > 0
    assert datasets[0].managed

    check_open_with_api(index, len(valid_uuids))

    # NetCDF specific checks, based on the saved NetCDF file
    ds_path = str(datasets[0].local_path)
    with netCDF4.Dataset(ds_path) as nco:
        check_data_shape(nco)
        check_grid_mapping(nco)
        check_cf_compliance(nco)
        check_dataset_metadata_in_storage_unit(nco, example_ls5_dataset_paths)
        check_attributes(nco, config['global_attributes'])

        name = config['measurements'][0]['name']
        check_attributes(nco[name], config['measurements'][0]['attrs'])
    check_open_with_xarray(ds_path)