def testdata_dir(tmpdir, ingest_configs): datadir = Path(str(tmpdir), 'data') datadir.mkdir() shutil.copytree(str(TEST_DATA), str(tmpdir / 'lbg')) for file in ingest_configs.values(): prepare_test_ingestion_configuration(tmpdir, tmpdir, INGESTER_CONFIGS/file, mode='end2end') return tmpdir
def test_double_ingestion(clirunner, index, tmpdir, ingest_configs): """ Test for the case where ingestor does not need to create a new product, but should re-use an existing target product. """ # Make a test ingestor configuration config = INGESTER_CONFIGS / ingest_configs['ls5_nbar_albers'] config_path, config = prepare_test_ingestion_configuration( tmpdir, None, config, mode='fast_ingest') def index_dataset(path): return clirunner(['dataset', 'add', str(path)]) # Create and Index some example scene datasets dataset_paths = generate_test_scenes(tmpdir) for path in dataset_paths: index_dataset(path) # Ingest them clirunner(['ingest', '--config-file', str(config_path)]) # Create and Index some more scene datasets dataset_paths = generate_test_scenes(tmpdir) for path in dataset_paths: index_dataset(path) # Make sure that we can ingest the new scenes clirunner(['ingest', '--config-file', str(config_path)])
def test_s3_full_ingestion(clirunner, index, tmpdir, example_ls5_dataset_paths, ingest_configs): config = INGESTER_CONFIGS / ingest_configs['ls5_nbar_albers'] config_path, config = prepare_test_ingestion_configuration( tmpdir, None, config, mode='fast_ingest') valid_uuids = [] for uuid, example_ls5_dataset_path in example_ls5_dataset_paths.items(): valid_uuids.append(uuid) clirunner(['dataset', 'add', str(example_ls5_dataset_path)]) ensure_datasets_are_indexed(index, valid_uuids) # TODO(csiro) Set time dimension when testing # config['storage']['tile_size']['time'] = 2 result = clirunner(['ingest', '--config-file', str(config_path)]) print(result.output) datasets = index.datasets.search_eager(product='ls5_nbar_albers') assert len(datasets) > 0 assert datasets[0].managed check_open_with_api(index, len(valid_uuids)) check_data_with_api(index, len(valid_uuids))
def test_double_ingestion(clirunner, index, tmpdir, ingest_configs, example_ls5_dataset_paths): """ Test for the case where ingestor does not need to create a new product, but should re-use an existing target product. """ # Make a test ingestor configuration config = INGESTER_CONFIGS / ingest_configs['ls5_nbar_albers'] config_path, config = prepare_test_ingestion_configuration( tmpdir, None, config, mode='fast_ingest') def index_dataset(path): return clirunner(['dataset', 'add', str(path)]) def ingest_products(): valid_uuids = [] for uuid, ls5_dataset_path in example_ls5_dataset_paths.items(): valid_uuids.append(uuid) index_dataset(ls5_dataset_path) # Ensure that datasets are actually indexed ensure_datasets_are_indexed(index, valid_uuids) # Ingest them clirunner(['ingest', '--config-file', str(config_path)]) # Validate that the ingestion is working as expected datasets = index.datasets.search_eager(product='ls5_nbar_albers') assert len(datasets) > 0 assert datasets[0].managed check_open_with_api(index, len(valid_uuids)) check_data_with_api(index, len(valid_uuids)) # NetCDF specific checks, based on the saved NetCDF file ds_path = str(datasets[0].local_path) with netCDF4.Dataset(ds_path) as nco: check_data_shape(nco) check_grid_mapping(nco) check_cf_compliance(nco) check_dataset_metadata_in_storage_unit(nco, example_ls5_dataset_paths) check_attributes(nco, config['global_attributes']) name = config['measurements'][0]['name'] check_attributes(nco[name], config['measurements'][0]['attrs']) check_open_with_xarray(ds_path) # Create and Index some example scene datasets ingest_products() ###################### # Double Ingestion # ###################### # Create and Index some more scene datasets ingest_products()
def test_process_all_ingest_jobs(clirunner, index, tmpdir, example_ls5_dataset_paths, ingest_configs): """ Test for the case where ingestor processes upto `--queue-size` number of tasks and not all the available scenes """ # Make a test ingestor configuration config = INGESTER_CONFIGS / ingest_configs['ls5_nbar_albers'] config_path, config = prepare_test_ingestion_configuration( tmpdir, None, config, mode='fast_ingest') def index_dataset(path): return clirunner(['dataset', 'add', str(path)]) # Number of scenes generated is 3 (as per NUM_TIME_SLICES const from conftest.py) # Set the queue size to process 2 tiles queue_size = 2 valid_uuids = [] for uuid, ls5_dataset_path in example_ls5_dataset_paths.items(): valid_uuids.append(uuid) index_dataset(ls5_dataset_path) # Ensure that datasets are actually indexed ensure_datasets_are_indexed(index, valid_uuids) # Ingest all scenes (Though the queue size is 2, all 3 tiles will be ingested) clirunner([ 'ingest', '--config-file', str(config_path), '--queue-size', queue_size, '--allow-product-changes', ]) # Validate that the ingestion is working as expected datasets = index.datasets.search_eager(product='ls5_nbar_albers') assert len(datasets) > 0 assert datasets[0].managed check_open_with_api(index, len(valid_uuids)) # NetCDF specific checks, based on the saved NetCDF file ds_path = str(datasets[0].local_path) with netCDF4.Dataset(ds_path) as nco: check_data_shape(nco) check_grid_mapping(nco) check_cf_compliance(nco) check_dataset_metadata_in_storage_unit(nco, example_ls5_dataset_paths) check_attributes(nco, config['global_attributes']) name = config['measurements'][0]['name'] check_attributes(nco[name], config['measurements'][0]['attrs']) check_open_with_xarray(ds_path)
def test_invalid_ingestor_config(clirunner, index, tmpdir, ingest_configs): """ Test that the ingestor correctly identifies an invalid ingestor config file. Note: We do not need to test valid config files as that is covered by the existing ingestor tests. """ config = PROJECT_ROOT / 'integration_tests/data/ingester/invalid_config.yaml' config_path, config = prepare_test_ingestion_configuration( tmpdir, None, config, mode='fast_ingest') clirunner(['ingest', '--config-file', str(config_path)], expect_success=False)
def test_full_ingestion(clirunner, index, tmpdir, example_ls5_dataset_paths, ingest_configs): config = INGESTER_CONFIGS/ingest_configs['ls5_nbar_albers'] config_path, config = prepare_test_ingestion_configuration(tmpdir, None, config, mode='fast_ingest') valid_uuids = [] for uuid, example_ls5_dataset_path in example_ls5_dataset_paths.items(): valid_uuids.append(uuid) clirunner([ 'dataset', 'add', str(example_ls5_dataset_path) ]) ensure_datasets_are_indexed(index, valid_uuids) # TODO(csiro) Set time dimension when testing # config['storage']['tile_size']['time'] = 2 clirunner([ 'ingest', '--config-file', str(config_path) ]) datasets = index.datasets.search_eager(product='ls5_nbar_albers') assert len(datasets) > 0 assert datasets[0].managed check_open_with_api(index, len(valid_uuids)) check_data_with_api(index, len(valid_uuids)) # NetCDF specific checks, based on the saved NetCDF file ds_path = str(datasets[0].local_path) with netCDF4.Dataset(ds_path) as nco: check_data_shape(nco) check_grid_mapping(nco) check_cf_compliance(nco) check_dataset_metadata_in_storage_unit(nco, example_ls5_dataset_paths) check_attributes(nco, config['global_attributes']) name = config['measurements'][0]['name'] check_attributes(nco[name], config['measurements'][0]['attrs']) check_open_with_xarray(ds_path)
def test_invalid_ingestor_config(clirunner, index, tmpdir): """ Test that the ingestor correctly identifies an invalid ingestor config file. Note: We do not need to test valid config files as that is covered by the existing ingestor tests. """ base = PROJECT_ROOT / 'integration_tests/data/ingester/' for cfg, err in (('invalid_config.yaml', "'src_varname' is a required property"), ('invalid_src_name.yaml', 'No such variable in the source product:')): config = base / cfg config_path, config = prepare_test_ingestion_configuration( tmpdir, None, config) result = clirunner(['ingest', '--config-file', str(config_path)], expect_success=False) assert result.exit_code != 0 assert err in result.output
def test_index_out_of_bound_error(clirunner, index, tmpdir, example_ls5_dataset_paths, ingest_configs): """ Test for the case where ingestor processes upto `--queue-size` number of tasks and not all the available scenes """ # Make a test ingestor configuration config = INGESTER_CONFIGS / ingest_configs['ls5_nbar_albers'] config_path, config = prepare_test_ingestion_configuration(tmpdir, None, config, mode='fast_ingest') def index_dataset(path): return clirunner(['dataset', 'add', str(path)]) # Set the queue size to process 5 tiles queue_size = 5 valid_uuids = [] for uuid, ls5_dataset_path in example_ls5_dataset_paths.items(): valid_uuids.append(uuid) index_dataset(ls5_dataset_path) # Ensure that datasets are actually indexed ensure_datasets_are_indexed(index, valid_uuids) # Locationless scenario within database arises when we run the sync tool (with --update-location option) # on the disk where the actual file is removed and regenerated again with new dataset id. for indexed_uuid in valid_uuids: dc1 = datacube.Datacube(index=index) datasets = dc1.find_datasets(product='ls5_nbar_scene') try: # Remove location from the index, to simulate indexed out of range scenario res = dc1.index.datasets.remove_location(indexed_uuid, datasets[0].local_uri) except AttributeError: # Do for one dataset, ignore any other attribute errors pass assert res is True, "Error for %r. output: %r" % (indexed_uuid, res) # Ingest scenes with locationless dataset clirunner([ 'ingest', '--config-file', str(config_path), '--queue-size', queue_size, '--allow-product-changes', ]) # Validate that the ingestion is working as expected datasets = index.datasets.search_eager(product='ls5_nbar_albers') assert len(datasets) > 0 assert datasets[0].managed check_open_with_api(index, len(valid_uuids)) # NetCDF specific checks, based on the saved NetCDF file ds_path = str(datasets[0].local_path) with netCDF4.Dataset(ds_path) as nco: check_data_shape(nco) check_grid_mapping(nco) check_cf_compliance(nco) check_dataset_metadata_in_storage_unit(nco, example_ls5_dataset_paths) check_attributes(nco, config['global_attributes']) name = config['measurements'][0]['name'] check_attributes(nco[name], config['measurements'][0]['attrs']) check_open_with_xarray(ds_path)