def test_find_datasets_default_parameter(self): add_dataset(self.ctx, dataset=new_test_dataset(1)) add_dataset(self.ctx, dataset=new_test_dataset(2)) add_dataset(self.ctx, dataset=new_test_dataset(3)) expr = None region = None time = None wdepth = None mtype = None wlmode = 'all' shallow = 'no' pmode = 'contains' pgroup = None pname = None offset = None count = None # noinspection PyTypeChecker result = find_datasets(self.ctx, expr=expr, region=region, time=time, wdepth=wdepth, mtype=mtype, wlmode=wlmode, shallow=shallow, pmode=pmode, pgroup=pgroup, pname=pname, offset=offset, count=count) self.assertIsInstance(result, DatasetQueryResult) self.assertEqual(3, result.total_count)
def test_local(self): self.add_path('my.zarr') writer = DatasetWriter('my.zarr', output_overwrite=False) ds = new_test_dataset(day=1) writer.write_dataset(ds) self.assertTrue(os.path.isdir('my.zarr')) ds = new_test_dataset(day=2) with self.assertRaises(zarr.errors.ContainsGroupError): writer.write_dataset(ds)
def test_update_dataset(self): dataset_ref = add_dataset(self.ctx, new_test_dataset(42)) dataset_id = dataset_ref.id dataset_update = new_test_dataset(42) dataset_update.id = dataset_id dataset_update.path = "a/b/c/archive/x/x-01.csv" update_dataset(self.ctx, dataset=dataset_update) updated_dataset = get_dataset_by_id_strict(self.ctx, dataset_id) self.assertEqual(dataset_update, updated_dataset)
def test_find_datasets_with_geolocations(self): dataset = new_test_dataset(1) dataset.longitudes = [104, 105] dataset.latitudes = [22, 23] add_dataset(self.ctx, dataset=dataset) dataset = new_test_dataset(2) dataset.longitudes = [114, 115] dataset.latitudes = [32, 33] add_dataset(self.ctx, dataset=dataset) dataset = new_test_dataset(3) dataset.longitudes = [124, 125] dataset.latitudes = [42, 43] add_dataset(self.ctx, dataset=dataset) expr = None region = [110, 30, 120, 35] time = None wdepth = None mtype = None wlmode = 'all' shallow = 'no' pmode = 'contains' pgroup = None pname = None offset = None count = None geojson = True # noinspection PyTypeChecker result = find_datasets(self.ctx, expr=expr, region=region, time=time, wdepth=wdepth, mtype=mtype, wlmode=wlmode, shallow=shallow, pmode=pmode, pgroup=pgroup, pname=pname, offset=offset, count=count, geojson=geojson) self.assertIsInstance(result, DatasetQueryResult) self.assertEqual(1, result.total_count) self.assertEqual(1, len(result.locations)) ds_id = result.datasets[0].id self.assertEqual( "{'type':'FeatureCollection','features':[" "{'type':'Feature','geometry':{'type':'Point','coordinates':[114,32]}}," "{'type':'Feature','geometry':{'type':'Point','coordinates':[115,33]}}]}", result.locations[ds_id])
def test_local_overwrite(self): self.add_path('my.zarr') writer = DatasetWriter('my.zarr', output_overwrite=False) ds = new_test_dataset(day=1) writer.write_dataset(ds) self.assertTrue(os.path.isdir('my.zarr')) writer = DatasetWriter('my.zarr', output_overwrite=True) ds = new_test_dataset(day=2) writer.write_dataset(ds) self.assertTrue(os.path.isdir('my.zarr'))
def test_add_dataset(self): dataset_1 = new_test_dataset(6) result_1 = add_dataset(self.ctx, dataset=dataset_1) self.assertIsInstance(result_1, DatasetRef) self.assertIsNotNone(result_1.id) self.assertEqual(dataset_1.path, result_1.path) dataset_2 = new_test_dataset(8) result_2 = add_dataset(self.ctx, dataset=dataset_2) self.assertIsInstance(result_2, DatasetRef) self.assertIsNotNone(result_2.id) self.assertNotEqual(result_1.id, result_2.id) self.assertEqual(dataset_2.path, result_2.path)
def test_rechunk_and_encodings_merged(self): ds = new_test_dataset(day=1) processor = DatasetProcessor( process_rechunk={ 'r_i32': dict(lon=8, lat=8), 'lon': None, 'lat': None }, output_encoding={'r_i32': dict(compressor=None, fill_value=None)}) new_ds, new_encoding = processor.process_dataset(ds) self.assertIsNot(ds, new_ds) self.assertEqual( { 'r_f32': { 'chunks': (1, 18, 36) }, 'r_i32': { 'chunks': (1, 8, 8), 'compressor': None, 'fill_value': None }, 'r_ui16': { 'chunks': (1, 18, 36) }, 'lon': { 'chunks': (36, ) }, 'lat': { 'chunks': (18, ) }, 'time': { 'chunks': (1, ) }, }, new_encoding)
def test_rechunk_with_input_and_single_chunks(self): # See https://github.com/bcdev/nc2zarr/issues/23 ds = new_test_dataset(day=1, chunked=True) ds = ds.chunk(dict(lat=1000, lon=1000, time=1000)) processor = DatasetProcessor(process_rechunk={ 'lon': None, 'lat': None, }) new_ds, new_encoding = processor.process_dataset(ds) self.assertIsNot(ds, new_ds) self.assertEqual( { 'r_f32': { 'chunks': (1, 18, 36) }, 'r_i32': { 'chunks': (1, 18, 36) }, 'r_ui16': { 'chunks': (1, 18, 36) }, 'lon': { 'chunks': (36, ) }, 'lat': { 'chunks': (18, ) }, 'time': { 'chunks': (1, ) } }, new_encoding)
def test_rechunk_default(self): ds = new_test_dataset(day=1) processor = DatasetProcessor( process_rechunk={'*': dict(lon=8, lat=4, time=1)}) new_ds, new_encoding = processor.process_dataset(ds) self.assertIsNot(ds, new_ds) self.assertEqual( { 'r_f32': { 'chunks': (1, 4, 8) }, 'r_i32': { 'chunks': (1, 4, 8) }, 'r_ui16': { 'chunks': (1, 4, 8) }, 'lon': { 'chunks': (8, ) }, 'lat': { 'chunks': (4, ) }, 'time': { 'chunks': (1, ) }, }, new_encoding)
def test_appending_vars_that_lack_append_dim(self): src_path_pat = 'src_{}.zarr' dst_path = 'my.zarr' self.add_path(dst_path) writer = DatasetWriter(dst_path, output_overwrite=False, input_decode_cf=False) n = 3 for i in range(0, n): field_names_values = np.full((3, 50), 0, dtype='S') field_names_values[0, 0] = np.array('A') field_names_values[1, 0] = np.array('B') field_names_values[2, 0] = np.array('C') src_dataset = new_test_dataset(day=i + 1) src_dataset = src_dataset.assign( field_names=xr.DataArray(field_names_values, dims=("fields", "field_name_length")) ) src_path = src_path_pat.format(i) self.add_path(src_path) src_dataset.to_zarr(src_path) with xr.open_zarr(src_path, decode_cf=False) as src_dataset: writer.write_dataset(src_dataset, append=i > 0) self.assertTimeSlicesOk(dst_path, src_path_pat, n)
def test_rechunk_with_lon_lat_time_unchunked(self): ds = new_test_dataset(day=1) processor = DatasetProcessor(process_rechunk={ '*': dict(lon=8, lat=4, time=1), 'lon': None, 'lat': None, 'time': 100 }) new_ds, new_encoding = processor.process_dataset(ds) self.assertIsNot(ds, new_ds) self.assertEqual( { 'r_f32': { 'chunks': (1, 4, 8) }, 'r_i32': { 'chunks': (1, 4, 8) }, 'r_ui16': { 'chunks': (1, 4, 8) }, 'lon': { 'chunks': (36, ) }, 'lat': { 'chunks': (18, ) }, 'time': { 'chunks': (100, ) }, }, new_encoding)
def test_local_dry_run_for_existing(self): self.add_path('my.zarr') ds = new_test_dataset(day=1) writer = DatasetWriter('my.zarr', output_overwrite=True) writer.write_dataset(ds) self.assertTrue(os.path.isdir('my.zarr')) writer = DatasetWriter('my.zarr', output_overwrite=True, dry_run=True) writer.write_dataset(ds) self.assertTrue(os.path.isdir('my.zarr'))
def test_rename(self): ds = new_test_dataset(day=1) self.assertIn('r_f32', ds) processor = DatasetProcessor(process_rename={'r_f32': 'bibo'}) new_ds, new_encoding = processor.process_dataset(ds) self.assertIsInstance(new_ds, xr.Dataset) self.assertIn('bibo', new_ds) self.assertNotIn('r_f32', new_ds) self.assertEqual({}, new_encoding)
def test_delete_dataset(self): dataset_ref = add_dataset(self.ctx, new_test_dataset(42)) dataset_id = dataset_ref.id dataset = get_dataset_by_id_strict(self.ctx, dataset_id) self.assertEqual(dataset_id, dataset.id) delete_dataset(self.ctx, dataset_id) with self.assertRaises(WsResourceNotFoundError): delete_dataset(self.ctx, dataset_id)
def test_find_datasets_pgroup(self): dataset = new_test_dataset(1) dataset.groups = ["a"] add_dataset(self.ctx, dataset=dataset) dataset = new_test_dataset(2) dataset.groups = ["sal"] add_dataset(self.ctx, dataset=dataset) dataset = new_test_dataset(3) dataset.groups = ["Chl_a", "Chl_b"] add_dataset(self.ctx, dataset=dataset) expr = None region = None time = None wdepth = None mtype = None wlmode = 'all' shallow = 'no' pmode = 'contains' pgroup = ['sal'] pname = None offset = None count = None # noinspection PyTypeChecker result = find_datasets(self.ctx, expr=expr, region=region, time=time, wdepth=wdepth, mtype=mtype, wlmode=wlmode, shallow=shallow, pmode=pmode, pgroup=pgroup, pname=pname, offset=offset, count=count) self.assertIsInstance(result, DatasetQueryResult) self.assertEqual(1, result.total_count)
def test_find_datasets_pname(self): dataset = new_test_dataset(1) dataset.attributes = ["But-fuco"] add_dataset(self.ctx, dataset=dataset) dataset = new_test_dataset(2) dataset.attributes = ["Hex-fuco"] add_dataset(self.ctx, dataset=dataset) dataset = new_test_dataset(3) dataset.attributes = ["Allo", "Diadino"] add_dataset(self.ctx, dataset=dataset) expr = None region = None time = None wdepth = None mtype = None wlmode = 'all' shallow = 'no' pmode = 'contains' pgroup = None pname = ['Allo', 'Diadino'] offset = None count = None # noinspection PyTypeChecker result = find_datasets(self.ctx, expr=expr, region=region, time=time, wdepth=wdepth, mtype=mtype, wlmode=wlmode, shallow=shallow, pmode=pmode, pgroup=pgroup, pname=pname, offset=offset, count=count) self.assertIsInstance(result, DatasetQueryResult) self.assertEqual(1, result.total_count)
def test_get_dataset_by_id(self): dataset_id_1 = add_dataset(self.ctx, dataset=new_test_dataset(1)).id dataset_id_2 = add_dataset(self.ctx, dataset=new_test_dataset(2)).id dataset_id_3 = add_dataset(self.ctx, dataset=new_test_dataset(3)).id dataset_1 = get_dataset_by_id_strict(self.ctx, dataset_id_1) self.assertIsNotNone(dataset_1) self.assertEqual(dataset_id_1, dataset_1.id) dataset_2 = get_dataset_by_id_strict(self.ctx, dataset_id_2) self.assertIsNotNone(dataset_2) self.assertEqual(dataset_id_2, dataset_2.id) dataset_3 = get_dataset_by_id_strict(self.ctx, dataset_id_3) self.assertIsNotNone(dataset_3) self.assertEqual(dataset_id_3, dataset_3.id) with self.assertRaises(WsResourceNotFoundError): get_dataset_by_id_strict(self.ctx, "gnarz")
def test_rechunk_with_invalid_size(self): ds = new_test_dataset() processor = DatasetProcessor(process_rechunk={ '*': { 'lon': [1, 2, 3], 'lat': 'input', }, }) with self.assertRaises(ValueError) as cm: processor.process_dataset(ds) self.assertEqual('invalid chunk size: [1, 2, 3]', f'{cm.exception}')
def test_finalize_only_and_append(self): self.add_path('my.zarr') writer = DatasetWriter('my.zarr', finalize_only=True, output_append=True) ds = new_test_dataset(day=1) with self.assertRaises(RuntimeError) as e: writer.write_dataset(ds) self.assertEqual(('internal error: cannot write/append' ' datasets when in finalize-only mode',), e.exception.args)
def test_local_postprocessor(self): self.add_path('my.zarr') writer = DatasetWriter( 'my.zarr', output_overwrite=False, output_custom_postprocessor='tests.test_writer:my_postprocessor') ds = new_test_dataset(day=1) self.assertNotIn('crs', ds) writer.write_dataset(ds) self.assertTrue(os.path.isdir('my.zarr')) with xr.open_zarr('my.zarr', consolidated=False) as ds: self.assertIn('crs', ds)
def test_finalize_updates_metadata(self): self.add_path('my.zarr') writer = DatasetWriter('my.zarr', output_append=True, output_metadata=dict(comment='This dataset is a test.')) for i in range(3): ds = new_test_dataset(day=i + 1) writer.write_dataset(ds) with xr.open_zarr('my.zarr', consolidated=False) as ds: self.assertNotIn('comment', ds.attrs) writer.finalize_dataset() with xr.open_zarr('my.zarr', consolidated=False) as ds: self.assertIn('comment', ds.attrs) self.assertEqual('This dataset is a test.', ds.attrs['comment'])
def test_finalize_adjusts_metadata_with_time_bnds(self): self.add_path('my.zarr') writer = DatasetWriter('my.zarr', output_append=True, output_adjust_metadata=True) for i in range(3): ds = new_test_dataset(day=i + 1, add_time_bnds=True) writer.write_dataset(ds) writer.finalize_dataset() with xr.open_zarr('my.zarr', consolidated=False) as ds: self.assertIn('time_coverage_start', ds.attrs) self.assertEqual('2020-12-01 09:30:00', ds.attrs['time_coverage_start']) self.assertIn('time_coverage_end', ds.attrs) self.assertEqual('2020-12-03 10:30:00', ds.attrs['time_coverage_end'])
def test_get_set_dataset_qc_info(self): dataset_ref = add_dataset(self.ctx, new_test_dataset(42)) dataset_id = dataset_ref.id qc_info = get_dataset_qc_info(self.ctx, dataset_id) self.assertEqual(QcInfo(QC_STATUS_SUBMITTED), qc_info) expected_qc_info = QcInfo(QC_STATUS_VALIDATED) set_dataset_qc_info(self.ctx, dataset_id, expected_qc_info) qc_info = get_dataset_qc_info(self.ctx, dataset_id) self.assertEqual(expected_qc_info, qc_info) expected_qc_info = QcInfo( QC_STATUS_PUBLISHED, dict(by='Illaria', when="2019-02-01", doc_files=["qc-report.docx"])) set_dataset_qc_info(self.ctx, dataset_id, expected_qc_info) qc_info = get_dataset_qc_info(self.ctx, dataset_id) self.assertEqual(expected_qc_info, qc_info)
def test_append_with_input_decode_cf(self): src_path_pat = 'src_{}.zarr' dst_path = 'my.zarr' self.add_path(dst_path) writer = DatasetWriter(dst_path, output_overwrite=False, input_decode_cf=False) n = 3 for i in range(0, n): src_dataset = new_test_dataset(day=i + 1) src_path = src_path_pat.format(i) self.add_path(src_path) src_dataset.to_zarr(src_path) with xr.open_zarr(src_path, decode_cf=False) as src_dataset: writer.write_dataset(src_dataset, append=i > 0) self.assertTimeSlicesOk(dst_path, src_path_pat, n)
def test_finalize_only_and_consolidate_if_specified(self): self.add_path('my.zarr') ds = new_test_dataset(day=1) writer = DatasetWriter('my.zarr', output_overwrite=True) writer.write_dataset(ds) writer.finalize_dataset() self.assertTrue(os.path.isdir('my.zarr')) self.assertFalse(os.path.isfile('my.zarr/.zmetadata')) writer = DatasetWriter('my.zarr', output_consolidated=True, finalize_only=True) writer.finalize_dataset() self.assertTrue(os.path.isdir('my.zarr')) self.assertTrue(os.path.isfile('my.zarr/.zmetadata')) with open('my.zarr/.zmetadata') as fp: metadata = json.load(fp) self.assertIn('metadata', metadata) self.assertEqual({}, metadata['metadata'].get('.zattrs'))
def test_finalize_adjusts_metadata(self): self.add_path('my.zarr') writer = DatasetWriter('my.zarr', output_append=True, output_adjust_metadata=True, input_paths=['a.nc', 'z.zarr', 'b.nc']) for i in range(3): ds = new_test_dataset(day=i + 1) writer.write_dataset(ds) with xr.open_zarr('my.zarr', consolidated=False) as ds: self.assertNotIn('history', ds.attrs) self.assertNotIn('source', ds.attrs) self.assertNotIn('time_coverage_start', ds.attrs) self.assertNotIn('time_coverage_end', ds.attrs) writer.finalize_dataset() with xr.open_zarr('my.zarr', consolidated=False) as ds: self.assertIn('history', ds.attrs) self.assertIn('source', ds.attrs) self.assertEqual('a.nc, b.nc', ds.attrs['source']) self.assertIn('time_coverage_start', ds.attrs) self.assertEqual('2020-12-01 10:00:00', ds.attrs['time_coverage_start']) self.assertIn('time_coverage_end', ds.attrs) self.assertEqual('2020-12-03 10:00:00', ds.attrs['time_coverage_end'])
def test_append_with_input_decode_cf_xarray(self): src_path_pat = 'src_{}.zarr' dst_path = 'my.zarr' self.add_path(dst_path) n = 3 for i in range(0, n): src_dataset = new_test_dataset(day=i + 1) src_path = src_path_pat.format(i) self.add_path(src_path) src_dataset.to_zarr(src_path) with xr.open_zarr(src_path, decode_cf=False) as src_dataset: if i == 0: src_dataset.to_zarr(dst_path, mode='w-') else: # Hack: src_dataset = xr.decode_cf(src_dataset) for var_name in src_dataset.variables: src_dataset[var_name].encoding = {} src_dataset[var_name].attrs = {} src_dataset.to_zarr(dst_path, append_dim='time') self.assertTimeSlicesOk(dst_path, src_path_pat, n)
def test_rechunk_all_unchunked_except_time(self): ds = new_test_dataset(day=1) processor = DatasetProcessor( process_rechunk={ '*': { 'lon': None, 'lat': None, 'time': 1 }, 'lon': None, 'lat': None, 'time': 128 }) new_ds, new_encoding = processor.process_dataset(ds) self.assertIsNot(ds, new_ds) self.assertEqual( { 'r_f32': { 'chunks': (1, 18, 36) }, 'r_i32': { 'chunks': (1, 18, 36) }, 'r_ui16': { 'chunks': (1, 18, 36) }, 'lon': { 'chunks': (36, ) }, 'lat': { 'chunks': (18, ) }, 'time': { 'chunks': (128, ) } }, new_encoding)
def test_rechunk_with_input(self): ds = new_test_dataset(day=1, chunked=True) processor = DatasetProcessor( process_rechunk={ '*': { 'lon': 'input', 'lat': 'input', 'time': 1 }, 'lon': None, 'lat': None, 'time': 128 }) new_ds, new_encoding = processor.process_dataset(ds) self.assertIsNot(ds, new_ds) self.assertEqual( { 'r_f32': { 'chunks': (1, 9, 18) }, 'r_i32': { 'chunks': (1, 9, 18) }, 'r_ui16': { 'chunks': (1, 9, 18) }, 'lon': { 'chunks': (36, ) }, 'lat': { 'chunks': (18, ) }, 'time': { 'chunks': (128, ) } }, new_encoding)
def test_validate_dataset(self): dataset = new_test_dataset(11) dataset.id = None result = validate_dataset(self.ctx, dataset=dataset) expected_result = DatasetValidationResult("OK", []) self.assertEqual(expected_result, result)