Example #1
0
    def test_appending_vars_that_lack_append_dim(self):

        src_path_pat = 'src_{}.zarr'
        dst_path = 'my.zarr'
        self.add_path(dst_path)

        writer = DatasetWriter(dst_path, output_overwrite=False,
                               input_decode_cf=False)

        n = 3
        for i in range(0, n):
            field_names_values = np.full((3, 50), 0, dtype='S')
            field_names_values[0, 0] = np.array('A')
            field_names_values[1, 0] = np.array('B')
            field_names_values[2, 0] = np.array('C')

            src_dataset = new_test_dataset(day=i + 1)
            src_dataset = src_dataset.assign(
                field_names=xr.DataArray(field_names_values,
                                         dims=("fields", "field_name_length"))
            )
            src_path = src_path_pat.format(i)
            self.add_path(src_path)
            src_dataset.to_zarr(src_path)
            with xr.open_zarr(src_path, decode_cf=False) as src_dataset:
                writer.write_dataset(src_dataset, append=i > 0)

        self.assertTimeSlicesOk(dst_path, src_path_pat, n)
Example #2
0
 def test_local_dry_run_for_existing(self):
     self.add_path('my.zarr')
     ds = new_test_dataset(day=1)
     writer = DatasetWriter('my.zarr', output_overwrite=True)
     writer.write_dataset(ds)
     self.assertTrue(os.path.isdir('my.zarr'))
     writer = DatasetWriter('my.zarr', output_overwrite=True, dry_run=True)
     writer.write_dataset(ds)
     self.assertTrue(os.path.isdir('my.zarr'))
Example #3
0
    def test_local(self):
        self.add_path('my.zarr')
        writer = DatasetWriter('my.zarr', output_overwrite=False)
        ds = new_test_dataset(day=1)
        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))

        ds = new_test_dataset(day=2)
        with self.assertRaises(zarr.errors.ContainsGroupError):
            writer.write_dataset(ds)
Example #4
0
    def test_local_overwrite(self):
        self.add_path('my.zarr')
        writer = DatasetWriter('my.zarr', output_overwrite=False)
        ds = new_test_dataset(day=1)
        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))

        writer = DatasetWriter('my.zarr', output_overwrite=True)
        ds = new_test_dataset(day=2)
        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))
Example #5
0
 def test_append_to_non_increasing_append_mode_all(self):
     dst_path = "my.zarr"
     self.add_path(dst_path)
     ds1, ds2 = new_append_test_datasets(
         ["2001-01-01", "2001-01-03", "2001-01-02"],
         ["2001-01-04", "2001-01-05", "2001-01-06"]
     )
     ds1.to_zarr(dst_path)
     w = DatasetWriter(dst_path, output_append=True, output_append_dim="t",
                       output_append_mode=AppendMode.all)
     w.write_dataset(ds2)
Example #6
0
    def test_finalize_only_and_append(self):
        self.add_path('my.zarr')
        writer = DatasetWriter('my.zarr',
                               finalize_only=True,
                               output_append=True)

        ds = new_test_dataset(day=1)
        with self.assertRaises(RuntimeError) as e:
            writer.write_dataset(ds)
        self.assertEqual(('internal error: cannot write/append'
                          ' datasets when in finalize-only mode',),
                         e.exception.args)
Example #7
0
    def test_local_postprocessor(self):
        self.add_path('my.zarr')
        writer = DatasetWriter(
            'my.zarr',
            output_overwrite=False,
            output_custom_postprocessor='tests.test_writer:my_postprocessor')
        ds = new_test_dataset(day=1)
        self.assertNotIn('crs', ds)

        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))
        with xr.open_zarr('my.zarr', consolidated=False) as ds:
            self.assertIn('crs', ds)
Example #8
0
 def test_finalize_updates_metadata(self):
     self.add_path('my.zarr')
     writer = DatasetWriter('my.zarr',
                            output_append=True,
                            output_metadata=dict(comment='This dataset is a test.'))
     for i in range(3):
         ds = new_test_dataset(day=i + 1)
         writer.write_dataset(ds)
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertNotIn('comment', ds.attrs)
     writer.finalize_dataset()
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertIn('comment', ds.attrs)
         self.assertEqual('This dataset is a test.', ds.attrs['comment'])
Example #9
0
 def test_append_non_increasing_append_newer(self):
     dst_path = "my.zarr"
     self.add_path(dst_path)
     ds1, ds2 = new_append_test_datasets(
         ["2001-01-01", "2001-01-02", "2001-01-03"],
         ["2001-01-05", "2001-01-04", "2001-01-03", "2001-02-02"]
     )
     ds1.to_zarr(dst_path)
     w = DatasetWriter(dst_path, output_append=True,
                       output_append_dim="t",
                       output_append_mode=AppendMode.newer)
     with pytest.raises(ValueError,
                        match="must be increasing"):
         w.write_dataset(ds2)
Example #10
0
 def test_append_overlapping_forbid_overlap(self):
     dst_path = "my.zarr"
     self.add_path(dst_path)
     ds1, ds2 = new_append_test_datasets(
         ["2001-01-01", "2001-01-02", "2001-01-03"],
         ["2001-01-02", "2001-01-03", "2001-01-04"]
     )
     ds1.to_zarr(dst_path)
     with pytest.raises(ValueError,
                        match="may not overlap"):
         w = DatasetWriter(dst_path, output_append=True,
                           output_append_dim="t",
                           output_append_mode=AppendMode.no_overlap)
         w.write_dataset(ds2)
Example #11
0
 def test_finalize_adjusts_metadata_with_time_bnds(self):
     self.add_path('my.zarr')
     writer = DatasetWriter('my.zarr', output_append=True,
                            output_adjust_metadata=True)
     for i in range(3):
         ds = new_test_dataset(day=i + 1, add_time_bnds=True)
         writer.write_dataset(ds)
     writer.finalize_dataset()
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertIn('time_coverage_start', ds.attrs)
         self.assertEqual('2020-12-01 09:30:00',
                          ds.attrs['time_coverage_start'])
         self.assertIn('time_coverage_end', ds.attrs)
         self.assertEqual('2020-12-03 10:30:00',
                          ds.attrs['time_coverage_end'])
Example #12
0
    def test_append_with_input_decode_cf(self):

        src_path_pat = 'src_{}.zarr'
        dst_path = 'my.zarr'
        self.add_path(dst_path)

        writer = DatasetWriter(dst_path, output_overwrite=False,
                               input_decode_cf=False)

        n = 3
        for i in range(0, n):
            src_dataset = new_test_dataset(day=i + 1)
            src_path = src_path_pat.format(i)
            self.add_path(src_path)
            src_dataset.to_zarr(src_path)
            with xr.open_zarr(src_path, decode_cf=False) as src_dataset:
                writer.write_dataset(src_dataset, append=i > 0)

        self.assertTimeSlicesOk(dst_path, src_path_pat, n)
Example #13
0
 def test_append_overlapping_append_newer(self):
     for consolidated in False, True:
         with self.subTest(consolidated=consolidated):
             dst_path = "my.zarr"
             self.add_path(dst_path)
             ds1, ds2 = new_append_test_datasets(
                 ["2001-01-01", "2001-01-02", "2001-01-03"],
                 ["2001-01-02", "2001-01-03", "2001-01-04", "2001-02-05"]
             )
             ds1.to_zarr(dst_path, consolidated=consolidated)
             w = DatasetWriter(dst_path, output_append=True,
                               output_append_dim="t",
                               output_append_mode=AppendMode.newer,
                               output_consolidated=consolidated)
             w.write_dataset(ds2)
             ds3 = xr.open_zarr(dst_path, consolidated=consolidated)
             expected = np.array(["2001-01-01", "2001-01-02", "2001-01-03",
                                  "2001-01-04", "2001-02-05"],
                                 dtype="datetime64[ns]")
             np.testing.assert_equal(expected, ds3.t.data)
Example #14
0
 def test_finalize_only_and_consolidate_if_specified(self):
     self.add_path('my.zarr')
     ds = new_test_dataset(day=1)
     writer = DatasetWriter('my.zarr',
                            output_overwrite=True)
     writer.write_dataset(ds)
     writer.finalize_dataset()
     self.assertTrue(os.path.isdir('my.zarr'))
     self.assertFalse(os.path.isfile('my.zarr/.zmetadata'))
     writer = DatasetWriter('my.zarr',
                            output_consolidated=True,
                            finalize_only=True)
     writer.finalize_dataset()
     self.assertTrue(os.path.isdir('my.zarr'))
     self.assertTrue(os.path.isfile('my.zarr/.zmetadata'))
     with open('my.zarr/.zmetadata') as fp:
         metadata = json.load(fp)
     self.assertIn('metadata', metadata)
     self.assertEqual({},
                      metadata['metadata'].get('.zattrs'))
Example #15
0
 def test_append_overlapping_replace(self):
     dst_path = "my.zarr"
     self.add_path(dst_path)
     ds1, ds2 = new_append_test_datasets(
         ["2001-01-01", "2001-01-02", "2001-01-03", "2001-01-05"],
         ["2001-01-02", "2001-01-03", "2001-01-04", "2001-01-06"]
     )
     ds1.to_zarr(dst_path)
     w = DatasetWriter(dst_path, output_append=True,
                       output_append_dim="t",
                       output_append_mode=AppendMode.replace,
                       output_consolidated=False)
     w.write_dataset(ds2)
     ds3 = xr.open_zarr(dst_path, consolidated=False)
     np.testing.assert_equal(
         np.array(["2001-01-01", "2001-01-02", "2001-01-03",
                   "2001-01-04", "2001-01-05", "2001-01-06"],
                  dtype="datetime64[ns]"), ds3.t.data)
     np.testing.assert_equal(
         np.array([0, 1, 1, 1, 0, 1]),
         ds3.v.isel(x=0, y=0)
     )
Example #16
0
 def test_finalize_adjusts_metadata(self):
     self.add_path('my.zarr')
     writer = DatasetWriter('my.zarr',
                            output_append=True,
                            output_adjust_metadata=True,
                            input_paths=['a.nc', 'z.zarr', 'b.nc'])
     for i in range(3):
         ds = new_test_dataset(day=i + 1)
         writer.write_dataset(ds)
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertNotIn('history', ds.attrs)
         self.assertNotIn('source', ds.attrs)
         self.assertNotIn('time_coverage_start', ds.attrs)
         self.assertNotIn('time_coverage_end', ds.attrs)
     writer.finalize_dataset()
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertIn('history', ds.attrs)
         self.assertIn('source', ds.attrs)
         self.assertEqual('a.nc, b.nc', ds.attrs['source'])
         self.assertIn('time_coverage_start', ds.attrs)
         self.assertEqual('2020-12-01 10:00:00', ds.attrs['time_coverage_start'])
         self.assertIn('time_coverage_end', ds.attrs)
         self.assertEqual('2020-12-03 10:00:00', ds.attrs['time_coverage_end'])
Example #17
0
 def test_local_dry_run(self):
     self.add_path('my.zarr')
     writer = DatasetWriter('my.zarr', dry_run=True)
     ds = new_test_dataset(day=1)
     writer.write_dataset(ds)
     self.assertFalse(os.path.isdir('out.zarr'))
Example #18
0
 def test_aws_s3_with_unknown_bucket(self):
     ds = new_test_dataset(day=1)
     writer = DatasetWriter(f's3://my{uuid.uuid4()}/my.zarr')
     with self.assertRaises(Exception):
         # We know this will raise, but our test coverage increases a little bit.
         writer.write_dataset(ds)