Ejemplo n.º 1
0
    def test_find_datasets_default_parameter(self):
        add_dataset(self.ctx, dataset=new_test_dataset(1))
        add_dataset(self.ctx, dataset=new_test_dataset(2))
        add_dataset(self.ctx, dataset=new_test_dataset(3))

        expr = None
        region = None
        time = None
        wdepth = None
        mtype = None
        wlmode = 'all'
        shallow = 'no'
        pmode = 'contains'
        pgroup = None
        pname = None
        offset = None
        count = None

        # noinspection PyTypeChecker
        result = find_datasets(self.ctx,
                               expr=expr,
                               region=region,
                               time=time,
                               wdepth=wdepth,
                               mtype=mtype,
                               wlmode=wlmode,
                               shallow=shallow,
                               pmode=pmode,
                               pgroup=pgroup,
                               pname=pname,
                               offset=offset,
                               count=count)

        self.assertIsInstance(result, DatasetQueryResult)
        self.assertEqual(3, result.total_count)
Ejemplo n.º 2
0
    def test_local(self):
        self.add_path('my.zarr')
        writer = DatasetWriter('my.zarr', output_overwrite=False)
        ds = new_test_dataset(day=1)
        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))

        ds = new_test_dataset(day=2)
        with self.assertRaises(zarr.errors.ContainsGroupError):
            writer.write_dataset(ds)
Ejemplo n.º 3
0
    def test_update_dataset(self):
        dataset_ref = add_dataset(self.ctx, new_test_dataset(42))
        dataset_id = dataset_ref.id
        dataset_update = new_test_dataset(42)
        dataset_update.id = dataset_id
        dataset_update.path = "a/b/c/archive/x/x-01.csv"
        update_dataset(self.ctx, dataset=dataset_update)

        updated_dataset = get_dataset_by_id_strict(self.ctx, dataset_id)
        self.assertEqual(dataset_update, updated_dataset)
Ejemplo n.º 4
0
    def test_find_datasets_with_geolocations(self):
        dataset = new_test_dataset(1)
        dataset.longitudes = [104, 105]
        dataset.latitudes = [22, 23]
        add_dataset(self.ctx, dataset=dataset)

        dataset = new_test_dataset(2)
        dataset.longitudes = [114, 115]
        dataset.latitudes = [32, 33]
        add_dataset(self.ctx, dataset=dataset)

        dataset = new_test_dataset(3)
        dataset.longitudes = [124, 125]
        dataset.latitudes = [42, 43]
        add_dataset(self.ctx, dataset=dataset)

        expr = None
        region = [110, 30, 120, 35]
        time = None
        wdepth = None
        mtype = None
        wlmode = 'all'
        shallow = 'no'
        pmode = 'contains'
        pgroup = None
        pname = None
        offset = None
        count = None
        geojson = True

        # noinspection PyTypeChecker
        result = find_datasets(self.ctx,
                               expr=expr,
                               region=region,
                               time=time,
                               wdepth=wdepth,
                               mtype=mtype,
                               wlmode=wlmode,
                               shallow=shallow,
                               pmode=pmode,
                               pgroup=pgroup,
                               pname=pname,
                               offset=offset,
                               count=count,
                               geojson=geojson)

        self.assertIsInstance(result, DatasetQueryResult)
        self.assertEqual(1, result.total_count)
        self.assertEqual(1, len(result.locations))
        ds_id = result.datasets[0].id
        self.assertEqual(
            "{'type':'FeatureCollection','features':["
            "{'type':'Feature','geometry':{'type':'Point','coordinates':[114,32]}},"
            "{'type':'Feature','geometry':{'type':'Point','coordinates':[115,33]}}]}",
            result.locations[ds_id])
Ejemplo n.º 5
0
    def test_local_overwrite(self):
        self.add_path('my.zarr')
        writer = DatasetWriter('my.zarr', output_overwrite=False)
        ds = new_test_dataset(day=1)
        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))

        writer = DatasetWriter('my.zarr', output_overwrite=True)
        ds = new_test_dataset(day=2)
        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))
Ejemplo n.º 6
0
    def test_add_dataset(self):
        dataset_1 = new_test_dataset(6)
        result_1 = add_dataset(self.ctx, dataset=dataset_1)
        self.assertIsInstance(result_1, DatasetRef)
        self.assertIsNotNone(result_1.id)
        self.assertEqual(dataset_1.path, result_1.path)

        dataset_2 = new_test_dataset(8)
        result_2 = add_dataset(self.ctx, dataset=dataset_2)
        self.assertIsInstance(result_2, DatasetRef)
        self.assertIsNotNone(result_2.id)
        self.assertNotEqual(result_1.id, result_2.id)
        self.assertEqual(dataset_2.path, result_2.path)
Ejemplo n.º 7
0
 def test_rechunk_and_encodings_merged(self):
     ds = new_test_dataset(day=1)
     processor = DatasetProcessor(
         process_rechunk={
             'r_i32': dict(lon=8, lat=8),
             'lon': None,
             'lat': None
         },
         output_encoding={'r_i32': dict(compressor=None, fill_value=None)})
     new_ds, new_encoding = processor.process_dataset(ds)
     self.assertIsNot(ds, new_ds)
     self.assertEqual(
         {
             'r_f32': {
                 'chunks': (1, 18, 36)
             },
             'r_i32': {
                 'chunks': (1, 8, 8),
                 'compressor': None,
                 'fill_value': None
             },
             'r_ui16': {
                 'chunks': (1, 18, 36)
             },
             'lon': {
                 'chunks': (36, )
             },
             'lat': {
                 'chunks': (18, )
             },
             'time': {
                 'chunks': (1, )
             },
         }, new_encoding)
Ejemplo n.º 8
0
 def test_rechunk_with_input_and_single_chunks(self):
     # See https://github.com/bcdev/nc2zarr/issues/23
     ds = new_test_dataset(day=1, chunked=True)
     ds = ds.chunk(dict(lat=1000, lon=1000, time=1000))
     processor = DatasetProcessor(process_rechunk={
         'lon': None,
         'lat': None,
     })
     new_ds, new_encoding = processor.process_dataset(ds)
     self.assertIsNot(ds, new_ds)
     self.assertEqual(
         {
             'r_f32': {
                 'chunks': (1, 18, 36)
             },
             'r_i32': {
                 'chunks': (1, 18, 36)
             },
             'r_ui16': {
                 'chunks': (1, 18, 36)
             },
             'lon': {
                 'chunks': (36, )
             },
             'lat': {
                 'chunks': (18, )
             },
             'time': {
                 'chunks': (1, )
             }
         }, new_encoding)
Ejemplo n.º 9
0
 def test_rechunk_default(self):
     ds = new_test_dataset(day=1)
     processor = DatasetProcessor(
         process_rechunk={'*': dict(lon=8, lat=4, time=1)})
     new_ds, new_encoding = processor.process_dataset(ds)
     self.assertIsNot(ds, new_ds)
     self.assertEqual(
         {
             'r_f32': {
                 'chunks': (1, 4, 8)
             },
             'r_i32': {
                 'chunks': (1, 4, 8)
             },
             'r_ui16': {
                 'chunks': (1, 4, 8)
             },
             'lon': {
                 'chunks': (8, )
             },
             'lat': {
                 'chunks': (4, )
             },
             'time': {
                 'chunks': (1, )
             },
         }, new_encoding)
Ejemplo n.º 10
0
    def test_appending_vars_that_lack_append_dim(self):

        src_path_pat = 'src_{}.zarr'
        dst_path = 'my.zarr'
        self.add_path(dst_path)

        writer = DatasetWriter(dst_path, output_overwrite=False,
                               input_decode_cf=False)

        n = 3
        for i in range(0, n):
            field_names_values = np.full((3, 50), 0, dtype='S')
            field_names_values[0, 0] = np.array('A')
            field_names_values[1, 0] = np.array('B')
            field_names_values[2, 0] = np.array('C')

            src_dataset = new_test_dataset(day=i + 1)
            src_dataset = src_dataset.assign(
                field_names=xr.DataArray(field_names_values,
                                         dims=("fields", "field_name_length"))
            )
            src_path = src_path_pat.format(i)
            self.add_path(src_path)
            src_dataset.to_zarr(src_path)
            with xr.open_zarr(src_path, decode_cf=False) as src_dataset:
                writer.write_dataset(src_dataset, append=i > 0)

        self.assertTimeSlicesOk(dst_path, src_path_pat, n)
Ejemplo n.º 11
0
 def test_rechunk_with_lon_lat_time_unchunked(self):
     ds = new_test_dataset(day=1)
     processor = DatasetProcessor(process_rechunk={
         '*': dict(lon=8, lat=4, time=1),
         'lon': None,
         'lat': None,
         'time': 100
     })
     new_ds, new_encoding = processor.process_dataset(ds)
     self.assertIsNot(ds, new_ds)
     self.assertEqual(
         {
             'r_f32': {
                 'chunks': (1, 4, 8)
             },
             'r_i32': {
                 'chunks': (1, 4, 8)
             },
             'r_ui16': {
                 'chunks': (1, 4, 8)
             },
             'lon': {
                 'chunks': (36, )
             },
             'lat': {
                 'chunks': (18, )
             },
             'time': {
                 'chunks': (100, )
             },
         }, new_encoding)
Ejemplo n.º 12
0
 def test_local_dry_run_for_existing(self):
     self.add_path('my.zarr')
     ds = new_test_dataset(day=1)
     writer = DatasetWriter('my.zarr', output_overwrite=True)
     writer.write_dataset(ds)
     self.assertTrue(os.path.isdir('my.zarr'))
     writer = DatasetWriter('my.zarr', output_overwrite=True, dry_run=True)
     writer.write_dataset(ds)
     self.assertTrue(os.path.isdir('my.zarr'))
Ejemplo n.º 13
0
 def test_rename(self):
     ds = new_test_dataset(day=1)
     self.assertIn('r_f32', ds)
     processor = DatasetProcessor(process_rename={'r_f32': 'bibo'})
     new_ds, new_encoding = processor.process_dataset(ds)
     self.assertIsInstance(new_ds, xr.Dataset)
     self.assertIn('bibo', new_ds)
     self.assertNotIn('r_f32', new_ds)
     self.assertEqual({}, new_encoding)
Ejemplo n.º 14
0
    def test_delete_dataset(self):
        dataset_ref = add_dataset(self.ctx, new_test_dataset(42))
        dataset_id = dataset_ref.id
        dataset = get_dataset_by_id_strict(self.ctx, dataset_id)
        self.assertEqual(dataset_id, dataset.id)
        delete_dataset(self.ctx, dataset_id)

        with self.assertRaises(WsResourceNotFoundError):
            delete_dataset(self.ctx, dataset_id)
Ejemplo n.º 15
0
    def test_find_datasets_pgroup(self):
        dataset = new_test_dataset(1)
        dataset.groups = ["a"]
        add_dataset(self.ctx, dataset=dataset)

        dataset = new_test_dataset(2)
        dataset.groups = ["sal"]
        add_dataset(self.ctx, dataset=dataset)

        dataset = new_test_dataset(3)
        dataset.groups = ["Chl_a", "Chl_b"]
        add_dataset(self.ctx, dataset=dataset)

        expr = None
        region = None
        time = None
        wdepth = None
        mtype = None
        wlmode = 'all'
        shallow = 'no'
        pmode = 'contains'
        pgroup = ['sal']
        pname = None
        offset = None
        count = None

        # noinspection PyTypeChecker
        result = find_datasets(self.ctx,
                               expr=expr,
                               region=region,
                               time=time,
                               wdepth=wdepth,
                               mtype=mtype,
                               wlmode=wlmode,
                               shallow=shallow,
                               pmode=pmode,
                               pgroup=pgroup,
                               pname=pname,
                               offset=offset,
                               count=count)

        self.assertIsInstance(result, DatasetQueryResult)
        self.assertEqual(1, result.total_count)
Ejemplo n.º 16
0
    def test_find_datasets_pname(self):
        dataset = new_test_dataset(1)
        dataset.attributes = ["But-fuco"]
        add_dataset(self.ctx, dataset=dataset)

        dataset = new_test_dataset(2)
        dataset.attributes = ["Hex-fuco"]
        add_dataset(self.ctx, dataset=dataset)

        dataset = new_test_dataset(3)
        dataset.attributes = ["Allo", "Diadino"]
        add_dataset(self.ctx, dataset=dataset)

        expr = None
        region = None
        time = None
        wdepth = None
        mtype = None
        wlmode = 'all'
        shallow = 'no'
        pmode = 'contains'
        pgroup = None
        pname = ['Allo', 'Diadino']
        offset = None
        count = None

        # noinspection PyTypeChecker
        result = find_datasets(self.ctx,
                               expr=expr,
                               region=region,
                               time=time,
                               wdepth=wdepth,
                               mtype=mtype,
                               wlmode=wlmode,
                               shallow=shallow,
                               pmode=pmode,
                               pgroup=pgroup,
                               pname=pname,
                               offset=offset,
                               count=count)

        self.assertIsInstance(result, DatasetQueryResult)
        self.assertEqual(1, result.total_count)
Ejemplo n.º 17
0
    def test_get_dataset_by_id(self):
        dataset_id_1 = add_dataset(self.ctx, dataset=new_test_dataset(1)).id
        dataset_id_2 = add_dataset(self.ctx, dataset=new_test_dataset(2)).id
        dataset_id_3 = add_dataset(self.ctx, dataset=new_test_dataset(3)).id

        dataset_1 = get_dataset_by_id_strict(self.ctx, dataset_id_1)
        self.assertIsNotNone(dataset_1)
        self.assertEqual(dataset_id_1, dataset_1.id)

        dataset_2 = get_dataset_by_id_strict(self.ctx, dataset_id_2)
        self.assertIsNotNone(dataset_2)
        self.assertEqual(dataset_id_2, dataset_2.id)

        dataset_3 = get_dataset_by_id_strict(self.ctx, dataset_id_3)
        self.assertIsNotNone(dataset_3)
        self.assertEqual(dataset_id_3, dataset_3.id)

        with self.assertRaises(WsResourceNotFoundError):
            get_dataset_by_id_strict(self.ctx, "gnarz")
Ejemplo n.º 18
0
 def test_rechunk_with_invalid_size(self):
     ds = new_test_dataset()
     processor = DatasetProcessor(process_rechunk={
         '*': {
             'lon': [1, 2, 3],
             'lat': 'input',
         },
     })
     with self.assertRaises(ValueError) as cm:
         processor.process_dataset(ds)
     self.assertEqual('invalid chunk size: [1, 2, 3]', f'{cm.exception}')
Ejemplo n.º 19
0
    def test_finalize_only_and_append(self):
        self.add_path('my.zarr')
        writer = DatasetWriter('my.zarr',
                               finalize_only=True,
                               output_append=True)

        ds = new_test_dataset(day=1)
        with self.assertRaises(RuntimeError) as e:
            writer.write_dataset(ds)
        self.assertEqual(('internal error: cannot write/append'
                          ' datasets when in finalize-only mode',),
                         e.exception.args)
Ejemplo n.º 20
0
    def test_local_postprocessor(self):
        self.add_path('my.zarr')
        writer = DatasetWriter(
            'my.zarr',
            output_overwrite=False,
            output_custom_postprocessor='tests.test_writer:my_postprocessor')
        ds = new_test_dataset(day=1)
        self.assertNotIn('crs', ds)

        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))
        with xr.open_zarr('my.zarr', consolidated=False) as ds:
            self.assertIn('crs', ds)
Ejemplo n.º 21
0
 def test_finalize_updates_metadata(self):
     self.add_path('my.zarr')
     writer = DatasetWriter('my.zarr',
                            output_append=True,
                            output_metadata=dict(comment='This dataset is a test.'))
     for i in range(3):
         ds = new_test_dataset(day=i + 1)
         writer.write_dataset(ds)
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertNotIn('comment', ds.attrs)
     writer.finalize_dataset()
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertIn('comment', ds.attrs)
         self.assertEqual('This dataset is a test.', ds.attrs['comment'])
Ejemplo n.º 22
0
 def test_finalize_adjusts_metadata_with_time_bnds(self):
     self.add_path('my.zarr')
     writer = DatasetWriter('my.zarr', output_append=True,
                            output_adjust_metadata=True)
     for i in range(3):
         ds = new_test_dataset(day=i + 1, add_time_bnds=True)
         writer.write_dataset(ds)
     writer.finalize_dataset()
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertIn('time_coverage_start', ds.attrs)
         self.assertEqual('2020-12-01 09:30:00',
                          ds.attrs['time_coverage_start'])
         self.assertIn('time_coverage_end', ds.attrs)
         self.assertEqual('2020-12-03 10:30:00',
                          ds.attrs['time_coverage_end'])
Ejemplo n.º 23
0
    def test_get_set_dataset_qc_info(self):
        dataset_ref = add_dataset(self.ctx, new_test_dataset(42))
        dataset_id = dataset_ref.id

        qc_info = get_dataset_qc_info(self.ctx, dataset_id)
        self.assertEqual(QcInfo(QC_STATUS_SUBMITTED), qc_info)

        expected_qc_info = QcInfo(QC_STATUS_VALIDATED)
        set_dataset_qc_info(self.ctx, dataset_id, expected_qc_info)
        qc_info = get_dataset_qc_info(self.ctx, dataset_id)
        self.assertEqual(expected_qc_info, qc_info)

        expected_qc_info = QcInfo(
            QC_STATUS_PUBLISHED,
            dict(by='Illaria', when="2019-02-01",
                 doc_files=["qc-report.docx"]))
        set_dataset_qc_info(self.ctx, dataset_id, expected_qc_info)
        qc_info = get_dataset_qc_info(self.ctx, dataset_id)
        self.assertEqual(expected_qc_info, qc_info)
Ejemplo n.º 24
0
    def test_append_with_input_decode_cf(self):

        src_path_pat = 'src_{}.zarr'
        dst_path = 'my.zarr'
        self.add_path(dst_path)

        writer = DatasetWriter(dst_path, output_overwrite=False,
                               input_decode_cf=False)

        n = 3
        for i in range(0, n):
            src_dataset = new_test_dataset(day=i + 1)
            src_path = src_path_pat.format(i)
            self.add_path(src_path)
            src_dataset.to_zarr(src_path)
            with xr.open_zarr(src_path, decode_cf=False) as src_dataset:
                writer.write_dataset(src_dataset, append=i > 0)

        self.assertTimeSlicesOk(dst_path, src_path_pat, n)
Ejemplo n.º 25
0
 def test_finalize_only_and_consolidate_if_specified(self):
     self.add_path('my.zarr')
     ds = new_test_dataset(day=1)
     writer = DatasetWriter('my.zarr',
                            output_overwrite=True)
     writer.write_dataset(ds)
     writer.finalize_dataset()
     self.assertTrue(os.path.isdir('my.zarr'))
     self.assertFalse(os.path.isfile('my.zarr/.zmetadata'))
     writer = DatasetWriter('my.zarr',
                            output_consolidated=True,
                            finalize_only=True)
     writer.finalize_dataset()
     self.assertTrue(os.path.isdir('my.zarr'))
     self.assertTrue(os.path.isfile('my.zarr/.zmetadata'))
     with open('my.zarr/.zmetadata') as fp:
         metadata = json.load(fp)
     self.assertIn('metadata', metadata)
     self.assertEqual({},
                      metadata['metadata'].get('.zattrs'))
Ejemplo n.º 26
0
 def test_finalize_adjusts_metadata(self):
     self.add_path('my.zarr')
     writer = DatasetWriter('my.zarr',
                            output_append=True,
                            output_adjust_metadata=True,
                            input_paths=['a.nc', 'z.zarr', 'b.nc'])
     for i in range(3):
         ds = new_test_dataset(day=i + 1)
         writer.write_dataset(ds)
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertNotIn('history', ds.attrs)
         self.assertNotIn('source', ds.attrs)
         self.assertNotIn('time_coverage_start', ds.attrs)
         self.assertNotIn('time_coverage_end', ds.attrs)
     writer.finalize_dataset()
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertIn('history', ds.attrs)
         self.assertIn('source', ds.attrs)
         self.assertEqual('a.nc, b.nc', ds.attrs['source'])
         self.assertIn('time_coverage_start', ds.attrs)
         self.assertEqual('2020-12-01 10:00:00', ds.attrs['time_coverage_start'])
         self.assertIn('time_coverage_end', ds.attrs)
         self.assertEqual('2020-12-03 10:00:00', ds.attrs['time_coverage_end'])
Ejemplo n.º 27
0
    def test_append_with_input_decode_cf_xarray(self):

        src_path_pat = 'src_{}.zarr'
        dst_path = 'my.zarr'
        self.add_path(dst_path)

        n = 3
        for i in range(0, n):
            src_dataset = new_test_dataset(day=i + 1)
            src_path = src_path_pat.format(i)
            self.add_path(src_path)
            src_dataset.to_zarr(src_path)
            with xr.open_zarr(src_path, decode_cf=False) as src_dataset:
                if i == 0:
                    src_dataset.to_zarr(dst_path, mode='w-')
                else:
                    # Hack:
                    src_dataset = xr.decode_cf(src_dataset)
                    for var_name in src_dataset.variables:
                        src_dataset[var_name].encoding = {}
                        src_dataset[var_name].attrs = {}
                    src_dataset.to_zarr(dst_path, append_dim='time')

        self.assertTimeSlicesOk(dst_path, src_path_pat, n)
Ejemplo n.º 28
0
 def test_rechunk_all_unchunked_except_time(self):
     ds = new_test_dataset(day=1)
     processor = DatasetProcessor(
         process_rechunk={
             '*': {
                 'lon': None,
                 'lat': None,
                 'time': 1
             },
             'lon': None,
             'lat': None,
             'time': 128
         })
     new_ds, new_encoding = processor.process_dataset(ds)
     self.assertIsNot(ds, new_ds)
     self.assertEqual(
         {
             'r_f32': {
                 'chunks': (1, 18, 36)
             },
             'r_i32': {
                 'chunks': (1, 18, 36)
             },
             'r_ui16': {
                 'chunks': (1, 18, 36)
             },
             'lon': {
                 'chunks': (36, )
             },
             'lat': {
                 'chunks': (18, )
             },
             'time': {
                 'chunks': (128, )
             }
         }, new_encoding)
Ejemplo n.º 29
0
 def test_rechunk_with_input(self):
     ds = new_test_dataset(day=1, chunked=True)
     processor = DatasetProcessor(
         process_rechunk={
             '*': {
                 'lon': 'input',
                 'lat': 'input',
                 'time': 1
             },
             'lon': None,
             'lat': None,
             'time': 128
         })
     new_ds, new_encoding = processor.process_dataset(ds)
     self.assertIsNot(ds, new_ds)
     self.assertEqual(
         {
             'r_f32': {
                 'chunks': (1, 9, 18)
             },
             'r_i32': {
                 'chunks': (1, 9, 18)
             },
             'r_ui16': {
                 'chunks': (1, 9, 18)
             },
             'lon': {
                 'chunks': (36, )
             },
             'lat': {
                 'chunks': (18, )
             },
             'time': {
                 'chunks': (128, )
             }
         }, new_encoding)
Ejemplo n.º 30
0
 def test_validate_dataset(self):
     dataset = new_test_dataset(11)
     dataset.id = None
     result = validate_dataset(self.ctx, dataset=dataset)
     expected_result = DatasetValidationResult("OK", [])
     self.assertEqual(expected_result, result)