def test_PP_WGDOS_UNPACKING(self): f = cf.read(self.ppfilename)[0] self.assertTrue(f.minimum() > 221.71, 'Bad unpacking of WGDOS packed data') self.assertTrue(f.maximum() < 310.45, 'Bad unpacking of WGDOS packed data') array = f.array for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.ppfilename)[0] for fmt in ('NETCDF4', 'CFA4'): # print (fmt) # f.dump() # print (repr(f.dtype)) # print (f._FillValue) # print (type(f._FillValue)) # f._FillValue = numpy.array(f._FillValue , dtype='float32') cf.write(f, tmpfile, fmt=fmt) g = cf.read(tmpfile)[0] self.assertTrue((f.array == array).all(), 'Bad unpacking of PP WGDOS packed data') self.assertTrue(f.equals(g, verbose=2), 'Bad writing/reading. fmt='+fmt) cf.chunksize(self.original_chunksize)
def test_read_write_format(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return for chunksize in self.chunk_sizes: cf.chunksize(chunksize) for fmt in ( "NETCDF3_CLASSIC", "NETCDF3_64BIT", "NETCDF3_64BIT_OFFSET", "NETCDF3_64BIT_DATA", "NETCDF4", "NETCDF4_CLASSIC", "CFA", ): # print (fmt, string) f = cf.read(self.filename)[0] f0 = f.copy() cf.write(f, tmpfile, fmt=fmt) g = cf.read(tmpfile, verbose=0) self.assertEqual(len(g), 1, "g = " + repr(g)) g0 = g[0] self.assertTrue( f0.equals(g0, verbose=1), "Bad read/write of format {!r}".format(fmt), )
def test_write_reference_datetime(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return for reference_datetime in ("1751-2-3", "1492-12-30"): for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename)[0] t = cf.DimensionCoordinate( data=cf.Data([123], "days since 1750-1-1")) t.standard_name = "time" axisT = f.set_construct(cf.DomainAxis(1)) f.set_construct(t, axes=[axisT]) cf.write( f, tmpfile, fmt="NETCDF4", reference_datetime=reference_datetime, ) g = cf.read(tmpfile)[0] t = g.dimension_coordinate("T") self.assertEqual( t.Units, cf.Units("days since " + reference_datetime), ("Units written were " + repr(t.Units.reftime) + " not " + repr(reference_datetime)), ) # --- End: for cf.chunksize(self.original_chunksize)
def test_read_write_netCDF4_compress_shuffle(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return tmpfiles.append(tmpfile) for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename)[0] for fmt in ("NETCDF4", "NETCDF4_CLASSIC", "CFA4"): for shuffle in (True, ): for compress in (1, ): # range(10): cf.write( f, tmpfile, fmt=fmt, compress=compress, shuffle=shuffle, ) g = cf.read(tmpfile)[0] self.assertTrue( f.equals(g, verbose=2), "Bad read/write with lossless compression: " "{0}, {1}, {2}".format(fmt, compress, shuffle), ) # --- End: for cf.chunksize(self.original_chunksize)
def test_write_reference_datetime(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return for reference_datetime in ('1751-2-3', '1492-12-30'): for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename)[0] t = cf.DimensionCoordinate( data=cf.Data([123], 'days since 1750-1-1') ) t.standard_name = 'time' axisT = f.set_construct(cf.DomainAxis(1)) f.set_construct(t, axes=[axisT]) cf.write(f, tmpfile, fmt='NETCDF4', reference_datetime=reference_datetime) g = cf.read(tmpfile)[0] t = g.dimension_coordinate('T') self.assertEqual( t.Units, cf.Units('days since ' + reference_datetime), ('Units written were ' + repr(t.Units.reftime) + ' not ' + repr(reference_datetime))) # --- End: for cf.chunksize(self.original_chunksize)
def test_Field_regrids(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return # Set tolerance for array equality original_atol = cf.atol(1e-12) for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f1 = cf.read(self.filename1)[0] f2 = cf.read(self.filename2)[0] f3 = cf.read(self.filename3)[0] r = f1.regrids(f2, 'conservative') self.assertTrue( f3.equals(r, verbose=2), 'destination = global Field, CHUNKSIZE = %s' % chunksize ) r = f1.regrids(f2, method='conservative') self.assertTrue( f3.equals(r, verbose=2), 'destination = global Field, CHUNKSIZE = %s' % chunksize ) dst = {'longitude': f2.dim('X'), 'latitude': f2.dim('Y')} r = f1.regrids(dst, 'conservative', dst_cyclic=True) self.assertTrue( f3.equals(r, verbose=2), 'destination = global dict, CHUNKSIZE = %s' % chunksize ) r = f1.regrids(dst, method='conservative', dst_cyclic=True) self.assertTrue( f3.equals(r, verbose=2), 'destination = global dict, CHUNKSIZE = %s' % chunksize ) f4 = cf.read(self.filename4)[0] f5 = cf.read(self.filename5)[0] r = f1.regrids(f5, 'linear') self.assertTrue( f4.equals(r, verbose=2), 'destination = regional Field, CHUNKSIZE = %s' % chunksize ) r = f1.regrids(f5, method='linear') self.assertTrue( f4.equals(r, verbose=2), 'destination = regional Field, CHUNKSIZE = %s' % chunksize ) # --- End: for cf.chunksize(self.original_chunksize) f6 = cf.read(self.filename6)[0] with self.assertRaises(Exception): f1.regridc(f6, axes='T', method='linear') cf.atol(original_atol)
def test_write_datatype(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return tmpfiles.append(tmpfile) for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename)[0] self.assertEqual(f.dtype, numpy.dtype(float)) cf.write( f, tmpfile, fmt="NETCDF4", datatype={numpy.dtype(float): numpy.dtype("float32")}, ) g = cf.read(tmpfile)[0] self.assertEqual( g.dtype, numpy.dtype("float32"), "datatype read in is " + str(g.dtype), ) cf.chunksize(self.original_chunksize) # Keyword single f = cf.read(self.filename)[0] self.assertEqual(f.dtype, numpy.dtype(float)) cf.write(f, tmpfile, fmt="NETCDF4", single=True) g = cf.read(tmpfile)[0] self.assertEqual( g.dtype, numpy.dtype("float32"), "datatype read in is " + str(g.dtype), ) tmpfiles.append(tmpfile2) # Keyword double f = g self.assertEqual(f.dtype, numpy.dtype("float32")) cf.write(f, tmpfile2, fmt="NETCDF4", double=True) g = cf.read(tmpfile2)[0] self.assertEqual(g.dtype, numpy.dtype(float), "datatype read in is " + str(g.dtype)) for single in (True, False): for double in (True, False): with self.assertRaises(Exception): _ = cf.write(g, double=double, single=single) # --- End: for datatype = {numpy.dtype(float): numpy.dtype("float32")} with self.assertRaises(Exception): _ = cf.write(g, datatype=datatype, single=True) with self.assertRaises(Exception): _ = cf.write(g, datatype=datatype, double=True)
def test_Field_regridc(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return original_atol = cf.atol(1e-12) for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f1 = cf.read(self.filename7)[0] f2 = cf.read(self.filename8)[0] f3 = cf.read(self.filename9)[0] self.assertTrue( f3.equals(f1.regridc( f2, axes='T', method='linear'), verbose=2), 'destination = time series, CHUNKSIZE = %s' % chunksize ) f4 = cf.read(self.filename1)[0] f5 = cf.read(self.filename2)[0] f6 = cf.read(self.filename10)[0] self.assertTrue( f6.equals(f4.regridc( f5, axes=('X', 'Y'), method='conservative'), verbose=2), 'destination = global Field, CHUNKSIZE = %s' % chunksize ) self.assertTrue( f6.equals(f4.regridc( f5, axes=('X', 'Y'), method='conservative'), verbose=2), 'destination = global Field, CHUNKSIZE = %s' % chunksize ) dst = {'X': f5.dim('X'), 'Y': f5.dim('Y')} self.assertTrue( f6.equals( f4.regridc(dst, axes=('X', 'Y'), method='conservative'), verbose=2 ), 'destination = global dict, CHUNKSIZE = %s' % chunksize ) self.assertTrue( f6.equals( f4.regridc(dst, axes=('X', 'Y'), method='conservative'), verbose=2 ), 'destination = global dict, CHUNKSIZE = %s' % chunksize ) # --- End: for cf.chunksize(self.original_chunksize) cf.atol(original_atol)
def test_Field_regridc(self): self.assertFalse(cf.regrid_logging()) with cf.atol(1e-11): for chunksize in self.chunk_sizes: self.assertFalse(cf.regrid_logging()) with cf.chunksize(chunksize): f1 = cf.read(self.filename7)[0] f2 = cf.read(self.filename8)[0] f3 = cf.read(self.filename9)[0] self.assertTrue( f3.equals(f1.regridc(f2, axes="T", method="linear")), "destination=time series, CHUNKSIZE={}".format( chunksize ), ) f4 = cf.read(self.filename1)[0] f5 = cf.read(self.filename2)[0] f6 = cf.read(self.filename10)[0] self.assertTrue( f6.equals( f4.regridc( f5, axes=("X", "Y"), method="conservative" ) ), "destination=global Field, CHUNKSIZE={}".format( chunksize ), ) self.assertTrue( f6.equals( f4.regridc( f5, axes=("X", "Y"), method="conservative" ) ), "destination=global Field, CHUNKSIZE={}".format( chunksize ), ) dst = {"X": f5.dim("X"), "Y": f5.dim("Y")} self.assertTrue( f6.equals( f4.regridc( dst, axes=("X", "Y"), method="conservative" ) ), "destination=global dict, CHUNKSIZE={}".format( chunksize ), ) self.assertTrue( f6.equals( f4.regridc( dst, axes=("X", "Y"), method="conservative" ) ), "destination=global dict, CHUNKSIZE={}".format( chunksize ), )
class PartitionTest(unittest.TestCase): filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_file.nc") chunk_sizes = (17, 34, 300, 100000)[::-1] original_chunksize = cf.chunksize() test_only = [] def test_Partition(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return
def test_aggregate_exist_equal_ignore_opts(self): # TODO: extend the option-checking coverage so all options and all # reasonable combinations of them are tested. For now, this is # testing options that previously errored due to a bug. for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename, squeeze=True)[0] # Use f as-is: simple test that aggregate works and does not # change anything with the given options: g = cf.aggregate(f, exist_all=True)[0] self.assertEqual(g, f) h = cf.aggregate(f, equal_all=True)[0] self.assertEqual(h, f) with self.assertRaises(ValueError): # contradictory options cf.aggregate(f, exist_all=True, equal_all=True) cf.chunksize(self.original_chunksize)
def setUp(self): self.ppfilename = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'wgdos_packed.pp') self.new_table = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'new_STASH_to_CF.txt') text_file = open(self.new_table, 'w') text_file.write( '1!24!SURFACE TEMPERATURE AFTER TIMESTEP !Pa!!!NEW_NAME!!') text_file.close() self.chunk_sizes = (100000, 300, 34) self.original_chunksize = cf.chunksize()
def test_aliases(self): self.assertEqual(cf.log_level(), cf.LOG_LEVEL()) self.assertEqual(cf.free_memory(), cf.FREE_MEMORY()) self.assertEqual(cf.free_memory_factor(), cf.FREE_MEMORY_FACTOR()) self.assertEqual(cf.fm_threshold(), cf.FM_THRESHOLD()) self.assertEqual(cf.total_memory(), cf.TOTAL_MEMORY()) self.assertEqual(cf.regrid_logging(), cf.REGRID_LOGGING()) self.assertEqual(cf.relaxed_identities(), cf.RELAXED_IDENTITIES()) self.assertEqual(cf.tempdir(), cf.TEMPDIR()) self.assertEqual(cf.chunksize(), cf.CHUNKSIZE()) self.assertEqual(cf.set_performance(), cf.SET_PERFORMANCE()) self.assertEqual(cf.of_fraction(), cf.OF_FRACTION()) self.assertEqual(cf.collapse_parallel_mode(), cf.COLLAPSE_PARALLEL_MODE())
def test_write_datatype(self): for chunksize in self.chunk_sizes: with cf.chunksize(chunksize): f = cf.read(self.filename)[0] self.assertEqual(f.dtype, numpy.dtype(float)) cf.write( f, tmpfile, fmt="NETCDF4", datatype={numpy.dtype(float): numpy.dtype("float32")}, ) g = cf.read(tmpfile)[0] self.assertEqual( g.dtype, numpy.dtype("float32"), "datatype read in is " + str(g.dtype), ) # Keyword single f = cf.read(self.filename)[0] self.assertEqual(f.dtype, numpy.dtype(float)) cf.write(f, tmpfile, fmt="NETCDF4", single=True) g = cf.read(tmpfile)[0] self.assertEqual( g.dtype, numpy.dtype("float32"), "datatype read in is " + str(g.dtype), ) # Keyword double f = g self.assertEqual(f.dtype, numpy.dtype("float32")) cf.write(f, tmpfile2, fmt="NETCDF4", double=True) g = cf.read(tmpfile2)[0] self.assertEqual( g.dtype, numpy.dtype(float), "datatype read in is " + str(g.dtype) ) for single in (True, False): for double in (True, False): with self.assertRaises(Exception): cf.write(g, double=double, single=single) datatype = {numpy.dtype(float): numpy.dtype("float32")} with self.assertRaises(Exception): cf.write(g, datatype=datatype, single=True) with self.assertRaises(Exception): cf.write(g, datatype=datatype, double=True)
def test_Field_regrids(self): self.assertFalse(cf.regrid_logging()) with cf.atol(1e-12): for chunksize in self.chunk_sizes: with cf.chunksize(chunksize): f1 = cf.read(self.filename1)[0] f2 = cf.read(self.filename2)[0] f3 = cf.read(self.filename3)[0] f4 = cf.read(self.filename4)[0] f5 = cf.read(self.filename5)[0] r = f1.regrids(f2, "conservative") self.assertTrue( f3.equals(r), "destination=global Field, CHUNKSIZE={}".format( chunksize ), ) dst = {"longitude": f2.dim("X"), "latitude": f2.dim("Y")} r = f1.regrids(dst, "conservative", dst_cyclic=True) self.assertTrue( f3.equals(r), "destination=global dict, CHUNKSIZE={}".format( chunksize ), ) r = f1.regrids(dst, method="conservative", dst_cyclic=True) self.assertTrue( f3.equals(r), "destination=global dict, CHUNKSIZE={}".format( chunksize ), ) # Regrid global to regional roated pole r = f1.regrids(f5, method="linear") self.assertTrue( f4.equals(r, verbose=3), "destination=regional Field, CHUNKSIZE={}".format( chunksize ), ) f6 = cf.read(self.filename6)[0] with self.assertRaises(Exception): f1.regridc(f6, axes="T", method="linear")
def test_read_write_netCDF4_compress_shuffle(self): for chunksize in self.chunk_sizes: with cf.chunksize(chunksize): f = cf.read(self.filename)[0] for fmt in ("NETCDF4", "NETCDF4_CLASSIC", "CFA4"): cf.write( f, tmpfile, fmt=fmt, compress=1, shuffle=True, ) g = cf.read(tmpfile)[0] self.assertTrue( f.equals(g, verbose=2), f"Bad read/write with lossless compression: {fmt}", )
def test_read_write_format(self): cf.write(self.f1, tmpfile) for chunksize in self.chunk_sizes: with cf.chunksize(chunksize): for fmt in self.netcdf3_fmts + ["CFA"]: f = cf.read(tmpfile)[0] cf.write(f, tmpfile2, fmt=fmt) g = cf.read(tmpfile2, verbose=0) self.assertEqual(len(g), 1) g = g[0] self.assertTrue( f.equals(g, verbose=1), f"Bad read/write of format {fmt!r}", )
if not outcome.wasSuccessful(): exit(1) # else is zero for sucess as standard if __name__ == "__main__": parser = ArgumentParser() parser.add_argument( "-d", "--doctest", dest="doctest", action="store_true", help="run only the doctest tests", ) args = parser.parse_args() original_chunksize = cf.chunksize() print("--------------------") print("CF-PYTHON TEST SUITE") print("--------------------") print("Run date:", datetime.datetime.now()) cf.environment() print("") print("Running tests from", os.path.abspath(os.curdir)) if args.doctest: print("Note: running only doctest tests\n") run_doctests_only() else: print("") cf.chunksize(original_chunksize)
def test_GATHERING_create(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return # Define the gathered values gathered_array = numpy.array([[280, 282.5, 281], [279, 278, 277.5]], dtype='float32') # Define the list array values list_array = [1, 4, 5] # Initialise the list variable list_variable = cf.List(data=cf.Data(list_array)) # Initialise the gathered array object array = cf.GatheredArray(compressed_array=cf.Data(gathered_array), compressed_dimension=1, shape=(2, 3, 2), size=12, ndim=3, list_variable=list_variable) # Create the field construct with the domain axes and the # gathered array tas = cf.Field(properties={ 'standard_name': 'air_temperature', 'units': 'K' }) # Create the domain axis constructs for the uncompressed array T = tas.set_construct(cf.DomainAxis(2)) Y = tas.set_construct(cf.DomainAxis(3)) X = tas.set_construct(cf.DomainAxis(2)) uncompressed_array = numpy.ma.masked_array(data=[[[1, 280.0], [1, 1], [282.5, 281.0]], [[1, 279.0], [1, 1], [278.0, 277.5]]], mask=[[[True, False], [True, True], [False, False]], [[True, False], [True, True], [False, False]]], fill_value=1e+20, dtype='float32') for chunksize in (1000000, ): cf.chunksize(chunksize) message = 'chunksize=' + str(chunksize) # Set the data for the field tas.set_data(cf.Data(array), axes=[T, Y, X]) self.assertTrue((tas.data.array == uncompressed_array).all(), message) self.assertEqual(tas.data.get_compression_type(), 'gathered', message) self.assertTrue( (tas.data.compressed_array == numpy.array( [[280., 282.5, 281.], [279., 278., 277.5]], dtype='float32')).all(), message) self.assertTrue( (tas.data.get_list().data.array == numpy.array([1, 4, 5])).all(), message)
def test_basic_aggregate(self): for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename, squeeze=True)[0] g = cf.FieldList(f[0]) g.append(f[1:3]) g.append(f[3]) g[-1].flip(0, inplace=True) g.append(f[4:7]) g[-1].flip(0, inplace=True) g.extend([f[i] for i in range(7, f.shape[0])]) g0 = g.copy() self.assertTrue(g.equals(g0, verbose=2), "g != g0") with warnings.catch_warnings(): # Suppress noise throughout the test fixture from: # # ~/cf-python/cf/__init__.py:1459: FutureWarning: elementwise # comparison failed; returning scalar instead, but in the # future will perform elementwise comparison # # TODO: it is not clear where the above emerges from, e.g. # since __init__ file ref'd does not have that many lines. # It seems like this warning arises from NumPy comparisons # done at some point in (only) some aggregate calls (see e.g: # https://github.com/numpy/numpy/issues/6784). warnings.filterwarnings("ignore", category=FutureWarning) h = cf.aggregate(g, verbose=2) self.assertEqual(len(h), 1) self.assertEqual( h[0].shape, (10, 9), "h[0].shape = " + repr(h[0].shape) + " != (10, 9)", ) self.assertTrue(g.equals(g0, verbose=2), "g != itself after aggregation") self.assertTrue(h[0].equals(f, verbose=2), "h[0] != f") with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate(g, verbose=2) self.assertTrue(i.equals(h, verbose=2), "The second aggregation != the first") self.assertTrue( g.equals(g0, verbose=2), "g != itself after the second aggregation", ) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate(g, verbose=2, axes="grid_latitude") self.assertTrue(i.equals(h, verbose=2), "The third aggregation != the first") self.assertTrue( g.equals(g0, verbose=2), "g !=itself after the third aggregation", ) self.assertEqual(i[0].shape, (10, 9), "i[0].shape is " + repr(i[0].shape)) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate( g, verbose=2, axes="grid_latitude", donotchecknonaggregatingaxes=1, ) self.assertTrue(i.equals(h, verbose=2), "The fourth aggregation != the first") self.assertTrue( g.equals(g0, verbose=2), "g != itself after the fourth aggregation", ) self.assertEqual(i[0].shape, (10, 9), "i[0].shape is " + repr(i[0].shape)) q, t = cf.read(self.file) c = cf.read(self.file2)[0] d = cf.aggregate([c, t], verbose=1, relaxed_identities=True) e = cf.aggregate([t, c], verbose=1, relaxed_identities=True) self.assertEqual(len(d), 1) self.assertEqual(len(e), 1) self.assertEqual(d[0].shape, (3, ) + t.shape) self.assertTrue(d[0].equals(e[0], verbose=2)) x = cf.read(["file.nc", "file2.nc"], aggregate=False) self.assertEqual(len(x), 3) x = cf.read(["file.nc", "file2.nc"], aggregate={"relaxed_identities": True}) self.assertEqual(len(x), 2) del t.standard_name del c.standard_name x = cf.aggregate([c, t], verbose=1) self.assertEqual(len(x), 2) t.long_name = "qwerty" c.long_name = "qwerty" x = cf.aggregate([c, t], field_identity="long_name") self.assertEqual(len(x), 1) cf.chunksize(self.original_chunksize)
class aggregateTest(unittest.TestCase): filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_file.nc") file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "file.nc") file2 = os.path.join(os.path.dirname(os.path.abspath(__file__)), "file2.nc") chunk_sizes = (100000, 300, 34) original_chunksize = cf.chunksize() def test_basic_aggregate(self): for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename, squeeze=True)[0] g = cf.FieldList(f[0]) g.append(f[1:3]) g.append(f[3]) g[-1].flip(0, inplace=True) g.append(f[4:7]) g[-1].flip(0, inplace=True) g.extend([f[i] for i in range(7, f.shape[0])]) g0 = g.copy() self.assertTrue(g.equals(g0, verbose=2), "g != g0") with warnings.catch_warnings(): # Suppress noise throughout the test fixture from: # # ~/cf-python/cf/__init__.py:1459: FutureWarning: elementwise # comparison failed; returning scalar instead, but in the # future will perform elementwise comparison # # TODO: it is not clear where the above emerges from, e.g. # since __init__ file ref'd does not have that many lines. # It seems like this warning arises from NumPy comparisons # done at some point in (only) some aggregate calls (see e.g: # https://github.com/numpy/numpy/issues/6784). warnings.filterwarnings("ignore", category=FutureWarning) h = cf.aggregate(g, verbose=2) self.assertEqual(len(h), 1) self.assertEqual( h[0].shape, (10, 9), "h[0].shape = " + repr(h[0].shape) + " != (10, 9)", ) self.assertTrue(g.equals(g0, verbose=2), "g != itself after aggregation") self.assertTrue(h[0].equals(f, verbose=2), "h[0] != f") with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate(g, verbose=2) self.assertTrue(i.equals(h, verbose=2), "The second aggregation != the first") self.assertTrue( g.equals(g0, verbose=2), "g != itself after the second aggregation", ) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate(g, verbose=2, axes="grid_latitude") self.assertTrue(i.equals(h, verbose=2), "The third aggregation != the first") self.assertTrue( g.equals(g0, verbose=2), "g !=itself after the third aggregation", ) self.assertEqual(i[0].shape, (10, 9), "i[0].shape is " + repr(i[0].shape)) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate( g, verbose=2, axes="grid_latitude", donotchecknonaggregatingaxes=1, ) self.assertTrue(i.equals(h, verbose=2), "The fourth aggregation != the first") self.assertTrue( g.equals(g0, verbose=2), "g != itself after the fourth aggregation", ) self.assertEqual(i[0].shape, (10, 9), "i[0].shape is " + repr(i[0].shape)) q, t = cf.read(self.file) c = cf.read(self.file2)[0] d = cf.aggregate([c, t], verbose=1, relaxed_identities=True) e = cf.aggregate([t, c], verbose=1, relaxed_identities=True) self.assertEqual(len(d), 1) self.assertEqual(len(e), 1) self.assertEqual(d[0].shape, (3, ) + t.shape) self.assertTrue(d[0].equals(e[0], verbose=2)) x = cf.read(["file.nc", "file2.nc"], aggregate=False) self.assertEqual(len(x), 3) x = cf.read(["file.nc", "file2.nc"], aggregate={"relaxed_identities": True}) self.assertEqual(len(x), 2) del t.standard_name del c.standard_name x = cf.aggregate([c, t], verbose=1) self.assertEqual(len(x), 2) t.long_name = "qwerty" c.long_name = "qwerty" x = cf.aggregate([c, t], field_identity="long_name") self.assertEqual(len(x), 1) cf.chunksize(self.original_chunksize) def test_aggregate_exist_equal_ignore_opts(self): # TODO: extend the option-checking coverage so all options and all # reasonable combinations of them are tested. For now, this is # testing options that previously errored due to a bug. for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename, squeeze=True)[0] # Use f as-is: simple test that aggregate works and does not # change anything with the given options: g = cf.aggregate(f, exist_all=True)[0] self.assertEqual(g, f) h = cf.aggregate(f, equal_all=True)[0] self.assertEqual(h, f) with self.assertRaises(ValueError): # contradictory options cf.aggregate(f, exist_all=True, equal_all=True) cf.chunksize(self.original_chunksize) def test_aggregate_verbosity(self): f0 = cf.example_field(0) f1 = cf.example_field(1) detail_header = "DETAIL:cf.aggregate:STRUCTURAL SIGNATURE:" debug_header = "DEBUG:cf.aggregate:COMPLETE AGGREGATION METADATA:" # 'DEBUG' (-1) verbosity should output both log message headers... with self.assertLogs(level="NOTSET") as catch: cf.aggregate([f0, f1], verbose=-1) for header in (detail_header, debug_header): self.assertTrue( any( log_item.startswith(header) for log_item in catch.output), "No log entry begins with '{}'".format(header), ) # ...but with 'DETAIL' (3), should get only the detail-level one. with self.assertLogs(level="NOTSET") as catch: cf.aggregate([f0, f1], verbose=3) self.assertTrue( any( log_item.startswith(detail_header) for log_item in catch.output), "No log entry begins with '{}'".format(detail_header), ) self.assertFalse( any( log_item.startswith(debug_header) for log_item in catch.output), "A log entry begins with '{}' but should not".format( debug_header), ) # and neither should emerge at the 'WARNING' (1) level. with self.assertLogs(level="NOTSET") as catch: logger.warning( "Dummy message to log something at warning level so that " "'assertLog' does not error when no logs messages emerge.") # Note: can use assertNoLogs in Python 3.10 to avoid this, see: # https://bugs.python.org/issue39385 cf.aggregate([f0, f1], verbose=1) for header in (detail_header, debug_header): self.assertFalse( any( log_item.startswith(header) for log_item in catch.output), "A log entry begins with '{}' but should not".format( header), )
class aggregateTest(unittest.TestCase): filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'test_file.nc') file = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'file.nc') file2 = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'file2.nc') chunk_sizes = (100000, 300, 34) original_chunksize = cf.chunksize() def test_basic_aggregate(self): for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename, squeeze=True)[0] g = cf.FieldList(f[0]) g.append(f[1:3]) g.append(f[3]) g[-1].flip(0, inplace=True) g.append(f[4:7]) g[-1].flip(0, inplace=True) g.extend([f[i] for i in range(7, f.shape[0])]) g0 = g.copy() self.assertTrue(g.equals(g0, verbose=2), "g != g0") with warnings.catch_warnings(): # Suppress noise throughout the test fixture from: # # ~/cf-python/cf/__init__.py:1459: FutureWarning: elementwise # comparison failed; returning scalar instead, but in the # future will perform elementwise comparison # # TODO: it is not clear where the above emerges from, e.g. # since __init__ file ref'd does not have that many lines. # It seems like this warning arises from NumPy comparisons # done at some point in (only) some aggregate calls (see e.g: # https://github.com/numpy/numpy/issues/6784). warnings.filterwarnings('ignore', category=FutureWarning) h = cf.aggregate(g, verbose=2) self.assertEqual(len(h), 1) self.assertEqual( h[0].shape, (10, 9), 'h[0].shape = ' + repr(h[0].shape) + ' != (10, 9)' ) self.assertTrue( g.equals(g0, verbose=2), 'g != itself after aggregation' ) self.assertTrue(h[0].equals(f, verbose=2), 'h[0] != f') with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate(g, verbose=2) self.assertTrue( i.equals(h, verbose=2), 'The second aggregation != the first' ) self.assertTrue( g.equals(g0, verbose=2), 'g != itself after the second aggregation' ) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate(g, verbose=2, axes='grid_latitude') self.assertTrue( i.equals(h, verbose=2), 'The third aggregation != the first' ) self.assertTrue( g.equals(g0, verbose=2), 'g !=itself after the third aggregation' ) self.assertEqual(i[0].shape, (10, 9), 'i[0].shape is ' + repr(i[0].shape)) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate( g, verbose=2, axes='grid_latitude', donotchecknonaggregatingaxes=1 ) self.assertTrue( i.equals(h, verbose=2), 'The fourth aggregation != the first' ) self.assertTrue( g.equals(g0, verbose=2), 'g != itself after the fourth aggregation' ) self.assertEqual(i[0].shape, (10, 9), 'i[0].shape is ' + repr(i[0].shape)) q, t = cf.read(self.file) c = cf.read(self.file2)[0] d = cf.aggregate([c, t], verbose=1, relaxed_identities=True) e = cf.aggregate([t, c], verbose=1, relaxed_identities=True) self.assertEqual(len(d), 1) self.assertEqual(len(e), 1) self.assertEqual(d[0].shape, (3,) + t.shape) self.assertTrue(d[0].equals(e[0], verbose=2)) x = cf.read(['file.nc', 'file2.nc'], aggregate=False) self.assertEqual(len(x), 3) x = cf.read( ['file.nc', 'file2.nc'], aggregate={'relaxed_identities': True} ) self.assertEqual(len(x), 2) del t.standard_name del c.standard_name x = cf.aggregate([c, t]) self.assertEqual(len(x), 2) t.long_name = 'qwerty' c.long_name = 'qwerty' x = cf.aggregate([c, t], field_identity='long_name') self.assertEqual(len(x), 1) cf.chunksize(self.original_chunksize) def test_aggregate_exist_equal_ignore_opts(self): # TODO: extend the option-checking coverage so all options and all # reasonable combinations of them are tested. For now, this is # testing options that previously errored due to a bug. for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename, squeeze=True)[0] # Use f as-is: simple test that aggregate works and does not # change anything with the given options: g = cf.aggregate(f, exist_all=True)[0] self.assertEqual(g, f) h = cf.aggregate(f, equal_all=True)[0] self.assertEqual(h, f) with self.assertRaises(ValueError): # contradictory options cf.aggregate(f, exist_all=True, equal_all=True) cf.chunksize(self.original_chunksize)
class read_writeTest(unittest.TestCase): filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_file.nc') string_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'string_char.nc') chunk_sizes = (100000, 300) original_chunksize = cf.chunksize() test_only = [] # test_only = ['NOTHING!!!!!'] # test_only = ['test_write_filename'] # test_only = ['test_read_write_unlimited'] # test_only = ['test_write_datatype'] # test_only = ['test_read_directory'] # test_only = ['test_read_string'] # test_only = ['test_read_write_netCDF4_compress_shuffle'] def test_write_filename(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return tmpfiles.append(tmpfile) f = cf.example_field(0) a = f.array cf.write(f, tmpfile) g = cf.read(tmpfile) with self.assertRaises(Exception): cf.write(g, tmpfile) self.assertTrue((a == g[0].array).all()) def test_read_mask(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return f = cf.example_field(0) N = f.size f.data[1, 1] = cf.masked f.data[2, 2] = cf.masked f.del_property('_FillValue', None) f.del_property('missing_value', None) cf.write(f, tmpfile) g = cf.read(tmpfile)[0] self.assertEqual(numpy.ma.count(g.data.array), N - 2) g = cf.read(tmpfile, mask=False)[0] self.assertEqual(numpy.ma.count(g.data.array), N) g.apply_masking(inplace=True) self.assertEqual(numpy.ma.count(g.data.array), N - 2) f.set_property('_FillValue', 999) f.set_property('missing_value', -111) cf.write(f, tmpfile) g = cf.read(tmpfile)[0] self.assertEqual(numpy.ma.count(g.data.array), N - 2) g = cf.read(tmpfile, mask=False)[0] self.assertEqual(numpy.ma.count(g.data.array), N) g.apply_masking(inplace=True) self.assertEqual(numpy.ma.count(g.data.array), N - 2) def test_read_directory(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return pwd = os.getcwd() + '/' dir = 'dir_'+inspect.stack()[0][3] try: os.mkdir(dir) except FileExistsError: pass except Exception: raise ValueError("Can not mkdir {}{}".format(pwd, dir)) f = 'test_file2.nc' try: os.symlink(pwd+f, pwd+dir+'/'+f) except FileExistsError: pass subdir = dir+'/subdir' try: os.mkdir(subdir) except FileExistsError: pass except Exception: raise ValueError("Can not mkdir {}{}".format(pwd, subdir)) for f in ('test_file3.nc', 'test_file.nc'): try: os.symlink(pwd+f, pwd+subdir+'/'+f) except FileExistsError: pass # --- End: for f = cf.read(dir, aggregate=False) self.assertEqual(len(f), 1, f) f = cf.read(dir, recursive=True, aggregate=False) self.assertEqual(len(f), 3, f) f = cf.read([dir, subdir], aggregate=False) self.assertEqual(len(f), 3, f) f = cf.read([subdir, dir], aggregate=False) self.assertEqual(len(f), 3, f) f = cf.read([dir, subdir], recursive=True, aggregate=False) self.assertEqual(len(f), 5, f) f = cf.read(subdir, aggregate=False) self.assertEqual(len(f), 2, f) f = cf.read(subdir, recursive=True, aggregate=False) self.assertEqual(len(f), 2, f) shutil.rmtree(dir) def test_read_select(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return # select on field list f = cf.read(self.filename, select='eastward_wind') g = cf.read(self.filename) self.assertTrue(f.equals(g, verbose=2), 'Bad read with select keyword') def test_read_squeeze(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return # select on field list f = cf.read(self.filename, squeeze=True) f = cf.read(self.filename, unsqueeze=True) with self.assertRaises(Exception): f = cf.read(self.filename, unsqueeze=True, squeeze=True) def test_read_aggregate(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return f = cf.read(self.filename, aggregate=True) f = cf.read(self.filename, aggregate=False) f = cf.read(self.filename, aggregate={}) def test_read_extra(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return # Test field keyword of cf.read filename = self.filename f = cf.read(filename) self.assertEqual(len(f), 1, '\n'+str(f)) f = cf.read(filename, extra=['auxiliary_coordinate']) self.assertEqual(len(f), 4, '\n'+str(f)) f = cf.read(filename, extra='cell_measure') self.assertEqual(len(f), 2, '\n'+str(f)) f = cf.read(filename, extra=['field_ancillary']) self.assertEqual(len(f), 5, '\n'+str(f)) f = cf.read(filename, extra='domain_ancillary', verbose=0) self.assertEqual(len(f), 4, '\n'+str(f)) f = cf.read(filename, extra=['field_ancillary', 'auxiliary_coordinate']) self.assertEqual(len(f), 8, '\n'+str(f)) self.assertEqual(len(cf.read(filename, extra=['domain_ancillary', 'auxiliary_coordinate'])), 7) f = cf.read(filename, extra=['domain_ancillary', 'cell_measure', 'auxiliary_coordinate']) self.assertEqual(len(f), 8, '\n'+str(f)) f = cf.read(filename, extra=('field_ancillary', 'dimension_coordinate', 'cell_measure', 'auxiliary_coordinate', 'domain_ancillary')) self.assertEqual(len(f), 15, '\n'+str(f)) def test_read_write_format(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return for chunksize in self.chunk_sizes: cf.chunksize(chunksize) for fmt in ('NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_64BIT_OFFSET', 'NETCDF3_64BIT_DATA', 'NETCDF4', 'NETCDF4_CLASSIC', 'CFA',): # print (fmt, string) f = cf.read(self.filename)[0] f0 = f.copy() cf.write(f, tmpfile, fmt=fmt) g = cf.read(tmpfile, verbose=0) self.assertEqual(len(g), 1, 'g = '+repr(g)) g0 = g[0] self.assertTrue( f0.equals(g0, verbose=1), 'Bad read/write of format {!r}'.format(fmt)) def test_read_write_netCDF4_compress_shuffle(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return tmpfiles.append(tmpfile) for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename)[0] for fmt in ('NETCDF4', 'NETCDF4_CLASSIC', 'CFA4'): for shuffle in (True,): for compress in (1,): # range(10): cf.write(f, tmpfile, fmt=fmt, compress=compress, shuffle=shuffle) g = cf.read(tmpfile)[0] self.assertTrue( f.equals(g, verbose=2), "Bad read/write with lossless compression: " "{0}, {1}, {2}".format( fmt, compress, shuffle)) # --- End: for cf.chunksize(self.original_chunksize) def test_write_datatype(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return tmpfiles.append(tmpfile) for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename)[0] self.assertEqual(f.dtype, numpy.dtype(float)) cf.write(f, tmpfile, fmt='NETCDF4', datatype={numpy.dtype(float): numpy.dtype('float32')}) g = cf.read(tmpfile)[0] self.assertEqual(g.dtype, numpy.dtype('float32'), 'datatype read in is ' + str(g.dtype)) cf.chunksize(self.original_chunksize) # Keyword single f = cf.read(self.filename)[0] self.assertEqual(f.dtype, numpy.dtype(float)) cf.write(f, tmpfile, fmt='NETCDF4', single=True) g = cf.read(tmpfile)[0] self.assertEqual(g.dtype, numpy.dtype('float32'), 'datatype read in is ' + str(g.dtype)) tmpfiles.append(tmpfile2) # Keyword double f = g self.assertEqual(f.dtype, numpy.dtype('float32')) cf.write(f, tmpfile2, fmt='NETCDF4', double=True) g = cf.read(tmpfile2)[0] self.assertEqual(g.dtype, numpy.dtype(float), 'datatype read in is ' + str(g.dtype)) for single in (True, False): for dousble in (True, False): with self.assertRaises(Exception): _ = cf.write(g, double=double, single=single) # --- End: for datatype = {numpy.dtype(float): numpy.dtype('float32')} with self.assertRaises(Exception): _ = cf.write(g, datatype=datatype, single=True) with self.assertRaises(Exception): _ = cf.write(g, datatype=datatype, double=True) def test_write_reference_datetime(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return for reference_datetime in ('1751-2-3', '1492-12-30'): for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename)[0] t = cf.DimensionCoordinate( data=cf.Data([123], 'days since 1750-1-1') ) t.standard_name = 'time' axisT = f.set_construct(cf.DomainAxis(1)) f.set_construct(t, axes=[axisT]) cf.write(f, tmpfile, fmt='NETCDF4', reference_datetime=reference_datetime) g = cf.read(tmpfile)[0] t = g.dimension_coordinate('T') self.assertEqual( t.Units, cf.Units('days since ' + reference_datetime), ('Units written were ' + repr(t.Units.reftime) + ' not ' + repr(reference_datetime))) # --- End: for cf.chunksize(self.original_chunksize) def test_read_write_unlimited(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return for fmt in ('NETCDF4', 'NETCDF3_CLASSIC'): f = cf.read(self.filename)[0] f.domain_axes['domainaxis0'].nc_set_unlimited(True) cf.write(f, tmpfile, fmt=fmt) f = cf.read(tmpfile)[0] self.assertTrue(f.domain_axes['domainaxis0'].nc_is_unlimited()) fmt = 'NETCDF4' f = cf.read(self.filename)[0] f.domain_axes['domainaxis0'].nc_set_unlimited(True) f.domain_axes['domainaxis2'].nc_set_unlimited(True) cf.write(f, tmpfile, fmt=fmt) f = cf.read(tmpfile)[0] self.assertTrue(f.domain_axes['domainaxis0'].nc_is_unlimited()) self.assertTrue(f.domain_axes['domainaxis2'].nc_is_unlimited()) def test_read_pp(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return p = cf.read('wgdos_packed.pp')[0] p0 = cf.read('wgdos_packed.pp', um={'fmt': 'PP', 'endian': 'big', 'word_size': 4, 'version': 4.5, 'height_at_top_of_model': 23423.65})[0] self.assertTrue(p.equals(p0, verbose=2)) def test_read_CDL(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return subprocess.run( ' '.join(['ncdump', self.filename, '>', tmpfile]), shell=True, check=True) subprocess.run( ' '.join(['ncdump', '-h', self.filename, '>', tmpfileh]), shell=True, check=True) subprocess.run( ' '.join(['ncdump', '-c', self.filename, '>', tmpfilec]), shell=True, check=True) f0 = cf.read(self.filename)[0] f = cf.read(tmpfile)[0] h = cf.read(tmpfileh)[0] c = cf.read(tmpfilec)[0] self.assertTrue(f0.equals(f, verbose=2)) self.assertTrue(f.construct('grid_latitude').equals( c.construct('grid_latitude'), verbose=2)) self.assertTrue(f0.construct('grid_latitude').equals( c.construct('grid_latitude'), verbose=2)) with self.assertRaises(Exception): _ = cf.read('test_read_write.py') def test_read_write_string(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return f = cf.read(self.string_filename) n = int(len(f)/2) for i in range(0, n): j = i + n self.assertTrue(f[i].data.equals(f[j].data, verbose=1), "{!r} {!r}".format(f[i], f[j])) self.assertTrue(f[j].data.equals(f[i].data, verbose=1), "{!r} {!r}".format(f[j], f[i])) f0 = cf.read(self.string_filename) for string0 in (True, False): for fmt0 in ('NETCDF4', 'NETCDF3_CLASSIC'): cf.write(f0, tmpfile0, fmt=fmt0, string=string0) for string1 in (True, False): for fmt1 in ('NETCDF4', 'NETCDF3_CLASSIC'): cf.write(f0, tmpfile1, fmt=fmt1, string=string1) for i, j in zip(cf.read(tmpfile1), cf.read(tmpfile0)): self.assertTrue(i.equals(j, verbose=1))
def test_Field_regrids(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return self.assertFalse(cf.regrid_logging()) with cf.atol(1e-12): for chunksize in self.chunk_sizes: with cf.chunksize(chunksize): f1 = cf.read(self.filename1)[0] f2 = cf.read(self.filename2)[0] f3 = cf.read(self.filename3)[0] r = f1.regrids(f2, "conservative") self.assertTrue( f3.equals(r), "destination=global Field, CHUNKSIZE={}".format( chunksize), ) r = f1.regrids(f2, method="conservative") self.assertTrue( f3.equals(r), "destination=global Field, CHUNKSIZE={}".format( chunksize), ) dst = {"longitude": f2.dim("X"), "latitude": f2.dim("Y")} r = f1.regrids(dst, "conservative", dst_cyclic=True) self.assertTrue( f3.equals(r), "destination=global dict, CHUNKSIZE={}".format( chunksize), ) r = f1.regrids(dst, method="conservative", dst_cyclic=True) self.assertTrue( f3.equals(r), "destination=global dict, CHUNKSIZE={}".format( chunksize), ) f4 = cf.read(self.filename4)[0] f5 = cf.read(self.filename5)[0] r = f1.regrids(f5, "linear") self.assertTrue( f4.equals(r), "destination=regional Field, CHUNKSIZE={}".format( chunksize), ) r = f1.regrids(f5, method="linear") self.assertTrue( f4.equals(r), "destination=regional Field, CHUNKSIZE={}".format( chunksize), ) # --- End: for f6 = cf.read(self.filename6)[0] with self.assertRaises(Exception): f1.regridc(f6, axes="T", method="linear")
def test_GATHERING_create(self): # Define the gathered values gathered_array = numpy.array( [[280, 282.5, 281], [279, 278, 277.5]], dtype="float32" ) # Define the list array values list_array = [1, 4, 5] # Initialise the list variable list_variable = cf.List(data=cf.Data(list_array)) # Initialise the gathered array object array = cf.GatheredArray( compressed_array=cf.Data(gathered_array), compressed_dimension=1, shape=(2, 3, 2), size=12, ndim=3, list_variable=list_variable, ) # Create the field construct with the domain axes and the # gathered array tas = cf.Field( properties={"standard_name": "air_temperature", "units": "K"} ) # Create the domain axis constructs for the uncompressed array T = tas.set_construct(cf.DomainAxis(2)) Y = tas.set_construct(cf.DomainAxis(3)) X = tas.set_construct(cf.DomainAxis(2)) uncompressed_array = numpy.ma.masked_array( data=[ [[1, 280.0], [1, 1], [282.5, 281.0]], [[1, 279.0], [1, 1], [278.0, 277.5]], ], mask=[ [[True, False], [True, True], [False, False]], [[True, False], [True, True], [False, False]], ], fill_value=1e20, dtype="float32", ) for chunksize in (1000000,): cf.chunksize(chunksize) message = "chunksize=" + str(chunksize) # Set the data for the field tas.set_data(cf.Data(array), axes=[T, Y, X]) self.assertTrue( (tas.data.array == uncompressed_array).all(), message ) self.assertEqual( tas.data.get_compression_type(), "gathered", message ) self.assertTrue( ( tas.data.compressed_array == numpy.array( [[280.0, 282.5, 281.0], [279.0, 278.0, 277.5]], dtype="float32", ) ).all(), message, ) self.assertTrue( ( tas.data.get_list().data.array == numpy.array([1, 4, 5]) ).all(), message, )
class read_writeTest(unittest.TestCase): filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_file.nc") broken_bounds = os.path.join(os.path.dirname(os.path.abspath(__file__)), "broken_bounds.cdl") string_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), "string_char.nc") chunk_sizes = (100000, 300) original_chunksize = cf.chunksize() test_only = [] # test_only = ['NOTHING!!!!!'] # test_only = ['test_write_filename'] # test_only = ['test_read_write_unlimited'] # test_only = ['test_write_datatype'] # test_only = ['test_read_directory'] # test_only = ['test_read_string'] # test_only = ['test_read_write_netCDF4_compress_shuffle'] def test_write_filename(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return tmpfiles.append(tmpfile) f = cf.example_field(0) a = f.array cf.write(f, tmpfile) g = cf.read(tmpfile) with self.assertRaises(Exception): cf.write(g, tmpfile) self.assertTrue((a == g[0].array).all()) def test_read_mask(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return f = cf.example_field(0) N = f.size f.data[1, 1] = cf.masked f.data[2, 2] = cf.masked f.del_property("_FillValue", None) f.del_property("missing_value", None) cf.write(f, tmpfile) g = cf.read(tmpfile)[0] self.assertEqual(numpy.ma.count(g.data.array), N - 2) g = cf.read(tmpfile, mask=False)[0] self.assertEqual(numpy.ma.count(g.data.array), N) g.apply_masking(inplace=True) self.assertEqual(numpy.ma.count(g.data.array), N - 2) f.set_property("_FillValue", 999) f.set_property("missing_value", -111) cf.write(f, tmpfile) g = cf.read(tmpfile)[0] self.assertEqual(numpy.ma.count(g.data.array), N - 2) g = cf.read(tmpfile, mask=False)[0] self.assertEqual(numpy.ma.count(g.data.array), N) g.apply_masking(inplace=True) self.assertEqual(numpy.ma.count(g.data.array), N - 2) def test_read_directory(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return pwd = os.getcwd() + "/" dir = "dir_" + inspect.stack()[0][3] try: os.mkdir(dir) except FileExistsError: pass except Exception: raise ValueError("Can not mkdir {}{}".format(pwd, dir)) f = "test_file2.nc" try: os.symlink(pwd + f, pwd + dir + "/" + f) except FileExistsError: pass subdir = dir + "/subdir" try: os.mkdir(subdir) except FileExistsError: pass except Exception: raise ValueError("Can not mkdir {}{}".format(pwd, subdir)) for f in ("test_file3.nc", "test_file.nc"): try: os.symlink(pwd + f, pwd + subdir + "/" + f) except FileExistsError: pass # --- End: for f = cf.read(dir, aggregate=False) self.assertEqual(len(f), 1, f) f = cf.read(dir, recursive=True, aggregate=False) self.assertEqual(len(f), 3, f) f = cf.read([dir, subdir], aggregate=False) self.assertEqual(len(f), 3, f) f = cf.read([subdir, dir], aggregate=False) self.assertEqual(len(f), 3, f) f = cf.read([dir, subdir], recursive=True, aggregate=False) self.assertEqual(len(f), 5, f) f = cf.read(subdir, aggregate=False) self.assertEqual(len(f), 2, f) f = cf.read(subdir, recursive=True, aggregate=False) self.assertEqual(len(f), 2, f) shutil.rmtree(dir) def test_read_select(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return # select on field list f = cf.read(self.filename, select="eastward_wind") g = cf.read(self.filename) self.assertTrue(f.equals(g, verbose=2), "Bad read with select keyword") def test_read_squeeze(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return # select on field list cf.read(self.filename, squeeze=True) cf.read(self.filename, unsqueeze=True) with self.assertRaises(Exception): cf.read(self.filename, unsqueeze=True, squeeze=True) def test_read_aggregate(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return cf.read(self.filename, aggregate=True) cf.read(self.filename, aggregate=False) cf.read(self.filename, aggregate={}) def test_read_extra(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return # Test field keyword of cf.read filename = self.filename f = cf.read(filename) self.assertEqual(len(f), 1, "\n" + str(f)) f = cf.read(filename, extra=["auxiliary_coordinate"]) self.assertEqual(len(f), 4, "\n" + str(f)) f = cf.read(filename, extra="cell_measure") self.assertEqual(len(f), 2, "\n" + str(f)) f = cf.read(filename, extra=["field_ancillary"]) self.assertEqual(len(f), 5, "\n" + str(f)) f = cf.read(filename, extra="domain_ancillary", verbose=0) self.assertEqual(len(f), 4, "\n" + str(f)) f = cf.read(filename, extra=["field_ancillary", "auxiliary_coordinate"]) self.assertEqual(len(f), 8, "\n" + str(f)) self.assertEqual( len( cf.read( filename, extra=["domain_ancillary", "auxiliary_coordinate"], )), 7, ) f = cf.read( filename, extra=["domain_ancillary", "cell_measure", "auxiliary_coordinate"], ) self.assertEqual(len(f), 8, "\n" + str(f)) f = cf.read( filename, extra=( "field_ancillary", "dimension_coordinate", "cell_measure", "auxiliary_coordinate", "domain_ancillary", ), ) self.assertEqual(len(f), 15, "\n" + str(f)) def test_read_write_format(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return for chunksize in self.chunk_sizes: cf.chunksize(chunksize) for fmt in ( "NETCDF3_CLASSIC", "NETCDF3_64BIT", "NETCDF3_64BIT_OFFSET", "NETCDF3_64BIT_DATA", "NETCDF4", "NETCDF4_CLASSIC", "CFA", ): # print (fmt, string) f = cf.read(self.filename)[0] f0 = f.copy() cf.write(f, tmpfile, fmt=fmt) g = cf.read(tmpfile, verbose=0) self.assertEqual(len(g), 1, "g = " + repr(g)) g0 = g[0] self.assertTrue( f0.equals(g0, verbose=1), "Bad read/write of format {!r}".format(fmt), ) def test_read_write_netCDF4_compress_shuffle(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return tmpfiles.append(tmpfile) for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename)[0] for fmt in ("NETCDF4", "NETCDF4_CLASSIC", "CFA4"): for shuffle in (True, ): for compress in (1, ): # range(10): cf.write( f, tmpfile, fmt=fmt, compress=compress, shuffle=shuffle, ) g = cf.read(tmpfile)[0] self.assertTrue( f.equals(g, verbose=2), "Bad read/write with lossless compression: " "{0}, {1}, {2}".format(fmt, compress, shuffle), ) # --- End: for cf.chunksize(self.original_chunksize) def test_write_datatype(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return tmpfiles.append(tmpfile) for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename)[0] self.assertEqual(f.dtype, numpy.dtype(float)) cf.write( f, tmpfile, fmt="NETCDF4", datatype={numpy.dtype(float): numpy.dtype("float32")}, ) g = cf.read(tmpfile)[0] self.assertEqual( g.dtype, numpy.dtype("float32"), "datatype read in is " + str(g.dtype), ) cf.chunksize(self.original_chunksize) # Keyword single f = cf.read(self.filename)[0] self.assertEqual(f.dtype, numpy.dtype(float)) cf.write(f, tmpfile, fmt="NETCDF4", single=True) g = cf.read(tmpfile)[0] self.assertEqual( g.dtype, numpy.dtype("float32"), "datatype read in is " + str(g.dtype), ) tmpfiles.append(tmpfile2) # Keyword double f = g self.assertEqual(f.dtype, numpy.dtype("float32")) cf.write(f, tmpfile2, fmt="NETCDF4", double=True) g = cf.read(tmpfile2)[0] self.assertEqual(g.dtype, numpy.dtype(float), "datatype read in is " + str(g.dtype)) for single in (True, False): for double in (True, False): with self.assertRaises(Exception): _ = cf.write(g, double=double, single=single) # --- End: for datatype = {numpy.dtype(float): numpy.dtype("float32")} with self.assertRaises(Exception): _ = cf.write(g, datatype=datatype, single=True) with self.assertRaises(Exception): _ = cf.write(g, datatype=datatype, double=True) def test_write_reference_datetime(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return for reference_datetime in ("1751-2-3", "1492-12-30"): for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename)[0] t = cf.DimensionCoordinate( data=cf.Data([123], "days since 1750-1-1")) t.standard_name = "time" axisT = f.set_construct(cf.DomainAxis(1)) f.set_construct(t, axes=[axisT]) cf.write( f, tmpfile, fmt="NETCDF4", reference_datetime=reference_datetime, ) g = cf.read(tmpfile)[0] t = g.dimension_coordinate("T") self.assertEqual( t.Units, cf.Units("days since " + reference_datetime), ("Units written were " + repr(t.Units.reftime) + " not " + repr(reference_datetime)), ) # --- End: for cf.chunksize(self.original_chunksize) def test_read_write_unlimited(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return for fmt in ("NETCDF4", "NETCDF3_CLASSIC"): f = cf.read(self.filename)[0] f.domain_axes["domainaxis0"].nc_set_unlimited(True) cf.write(f, tmpfile, fmt=fmt) f = cf.read(tmpfile)[0] self.assertTrue(f.domain_axes["domainaxis0"].nc_is_unlimited()) fmt = "NETCDF4" f = cf.read(self.filename)[0] f.domain_axes["domainaxis0"].nc_set_unlimited(True) f.domain_axes["domainaxis2"].nc_set_unlimited(True) cf.write(f, tmpfile, fmt=fmt) f = cf.read(tmpfile)[0] self.assertTrue(f.domain_axes["domainaxis0"].nc_is_unlimited()) self.assertTrue(f.domain_axes["domainaxis2"].nc_is_unlimited()) def test_read_pp(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return p = cf.read("wgdos_packed.pp")[0] p0 = cf.read( "wgdos_packed.pp", um={ "fmt": "PP", "endian": "big", "word_size": 4, "version": 4.5, "height_at_top_of_model": 23423.65, }, )[0] self.assertTrue(p.equals(p0, verbose=2)) def test_read_CDL(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return subprocess.run( " ".join(["ncdump", self.filename, ">", tmpfile]), shell=True, check=True, ) # For the cases of '-h' and '-c', i.e. only header info or coordinates, # notably no data, take two cases each: one where there is sufficient # info from the metadata to map to fields, and one where there isn't: # 1. Sufficient metadata, so should be read-in successfully subprocess.run( " ".join(["ncdump", "-h", self.filename, ">", tmpfileh]), shell=True, check=True, ) subprocess.run( " ".join(["ncdump", "-c", self.filename, ">", tmpfilec]), shell=True, check=True, ) # 2. Insufficient metadata, so should error with a message as such geometry_1_file = os.path.join( os.path.dirname(os.path.abspath(__file__)), "geometry_1.nc") subprocess.run( " ".join(["ncdump", "-h", geometry_1_file, ">", tmpfileh2]), shell=True, check=True, ) subprocess.run( " ".join(["ncdump", "-c", geometry_1_file, ">", tmpfilec2]), shell=True, check=True, ) f0 = cf.read(self.filename)[0] # Case (1) as above, so read in and check the fields are as should be f = cf.read(tmpfile)[0] _ = cf.read(tmpfileh)[0] c = cf.read(tmpfilec)[0] # Case (2) as above, so the right error should be raised on read with self.assertRaises(ValueError): cf.read(tmpfileh2)[0] with self.assertRaises(ValueError): cf.read(tmpfilec2)[0] self.assertTrue(f0.equals(f, verbose=2)) self.assertTrue( f.construct("grid_latitude").equals(c.construct("grid_latitude"), verbose=2)) self.assertTrue( f0.construct("grid_latitude").equals(c.construct("grid_latitude"), verbose=2)) with self.assertRaises(Exception): _ = cf.read("test_read_write.py") def test_read_write_string(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return f = cf.read(self.string_filename) n = int(len(f) / 2) for i in range(0, n): j = i + n self.assertTrue( f[i].data.equals(f[j].data, verbose=1), "{!r} {!r}".format(f[i], f[j]), ) self.assertTrue( f[j].data.equals(f[i].data, verbose=1), "{!r} {!r}".format(f[j], f[i]), ) f0 = cf.read(self.string_filename) for string0 in (True, False): for fmt0 in ("NETCDF4", "NETCDF3_CLASSIC"): cf.write(f0, tmpfile0, fmt=fmt0, string=string0) for string1 in (True, False): for fmt1 in ("NETCDF4", "NETCDF3_CLASSIC"): cf.write(f0, tmpfile1, fmt=fmt1, string=string1) for i, j in zip(cf.read(tmpfile1), cf.read(tmpfile0)): self.assertTrue(i.equals(j, verbose=1)) # --- End: for def test_read_broken_bounds(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return f = cf.read(self.broken_bounds, verbose=0) self.assertEqual(len(f), 2)
def test_GENERAL(self): # Save original chunksize original_chunksize = cf.chunksize() cf.chunksize(60) g = self.f.squeeze() f = self.f.copy() c = cf.set([0, 3, 4, 5]) _ = f == c # +, -, *, /, ** h = g.copy() h **= 2 h **= 0.5 h.standard_name = g.standard_name self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) h *= 10 h /= 10.0 self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) h += 1 h -= 1 self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) h = h ** 2.0 h = h ** 0.5 h.standard_name = g.standard_name self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) h = h * 10 h = h / 10.0 self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) h = h + 1 h = h - 1 self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) # flip, expand_dims, squeeze and remove_axes h = g.copy() h.flip((1, 0), inplace=True) h.flip((1, 0), inplace=True) h.flip(0, inplace=True) h.flip(1, inplace=True) h.flip([0, 1], inplace=True) self.assertTrue(g.equals(h, verbose=2)) # Access the field's data as a numpy array g.array g.item("latitude").array g.item("longitude").array # Subspace the field g[..., 2:5].array g[9::-4, ...].array h = g[(slice(None, None, -1),) * g.ndim] h = h[(slice(None, None, -1),) * h.ndim] self.assertTrue(g.equals(h, verbose=2)) # Indices for a subspace defined by coordinates f.indices() f.indices(grid_latitude=cf.lt(5), grid_longitude=27) f.indices( grid_latitude=cf.lt(5), grid_longitude=27, atmosphere_hybrid_height_coordinate=1.5, ) # Subspace the field g.subspace( grid_latitude=cf.lt(5), grid_longitude=27, atmosphere_hybrid_height_coordinate=1.5, ) # Create list of fields fl = cf.FieldList([g, g, g, g]) # Write a list of fields to disk cf.write((f, fl), tmpfile) cf.write(fl, tmpfile) # Read a list of fields from disk fl = cf.read(tmpfile, squeeze=True) for f in fl: try: del f.history except AttributeError: pass # Access the last field in the list x = fl[-1] # Access the data of the last field in the list x = fl[-1].array # Modify the last field in the list fl[-1] *= -1 x = fl[-1].array # Changing units fl[-1].units = "mm.s-1" x = fl[-1].array # Combine fields not in place g = fl[-1] - fl[-1] x = g.array # Combine field with a size 1 Data object g += cf.Data([[[[[1.5]]]]], "cm.s-1") x = g.array # Setting of (un)masked elements with where() g[::2, 1::2] = numpy.ma.masked g.data.to_memory(1) g.where(True, 99) g.data.to_memory(1) g.where(g.mask, 2) g.data.to_memory(1) g[slice(None, None, 2), slice(1, None, 2)] = cf.masked g.data.to_memory(1) g.where(g.mask, [[-1]]) g.data.to_memory(1) g.where(True, cf.Data(0, None)) g.data.to_memory(1) h = g[:3, :4] h.where(True, -1) h[0, 2] = 2 h.transpose([1, 0], inplace=True) h.flip([1, 0], inplace=True) g[slice(None, 3), slice(None, 4)] = h h = g[:3, :4] h[...] = -1 h[0, 2] = 2 g[slice(None, 3), slice(None, 4)] = h # Make sure all partitions' data are in temporary files g.data.to_disk() # Push partitions' data from temporary files into memory g.data.to_memory(regardless=True) g.data.to_disk() # Iterate through array values for x in f.data.flat(): pass # Reset chunk size cf.chunksize(original_chunksize) # Move Data partitions to disk f.data.to_disk() cf.chunksize(original_chunksize) f.transpose(inplace=True) f.flip(inplace=True) cf.write(f, "delme.nc") f = cf.read("delme.nc")[0] cf.write(f, "delme.nca", fmt="CFA4") g = cf.read("delme.nca")[0] b = f[:, 0:6, :] c = f[:, 6:, :] cf.aggregate([b, c], verbose=2)[0] # Remove temporary files cf.data.partition._remove_temporary_files() cf.chunksize(original_chunksize)
class RegridTest(unittest.TestCase): filename1 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'regrid_file1.nc') filename2 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'regrid_file2.nc') filename3 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'regrid_file3.nc') filename4 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'regrid_file4.nc') filename5 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_file3.nc') filename6 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_file2.nc') filename7 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'regrid_file5.nc') filename8 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'regrid_file6.nc') filename9 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'regrid_file7.nc') filename10 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'regrid_file8.nc') # f1 = cf.read(filename1)[0] # f2 = cf.read(filename2)[0] # f3 = cf.read(filename3)[0] # f4 = cf.read(filename4)[0] # f5 = cf.read(filename5)[0] # f6 = cf.read(filename6)[0] # f7 = cf.read(filename7)[0] # f8 = cf.read(filename8)[0] # f9 = cf.read(filename9)[0] # f10 = cf.read(filename10)[0] chunk_sizes = (300, 10000, 100000)[::-1] original_chunksize = cf.chunksize() test_only = [] # test_only = ('NOTHING!!!!!',) # test_only = ('test_Field_regrids',) # test_only = ('test_Field_regridc',) # test_only('test_Field_section',) # test_only('test_Data_section',) @unittest.skipUnless(cf._found_ESMF, "Requires esmf package.") def test_Field_regrids(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return # Set tolerance for array equality original_atol = cf.atol(1e-12) for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f1 = cf.read(self.filename1)[0] f2 = cf.read(self.filename2)[0] f3 = cf.read(self.filename3)[0] r = f1.regrids(f2, 'conservative') self.assertTrue( f3.equals(r, verbose=2), 'destination = global Field, CHUNKSIZE = %s' % chunksize ) r = f1.regrids(f2, method='conservative') self.assertTrue( f3.equals(r, verbose=2), 'destination = global Field, CHUNKSIZE = %s' % chunksize ) dst = {'longitude': f2.dim('X'), 'latitude': f2.dim('Y')} r = f1.regrids(dst, 'conservative', dst_cyclic=True) self.assertTrue( f3.equals(r, verbose=2), 'destination = global dict, CHUNKSIZE = %s' % chunksize ) r = f1.regrids(dst, method='conservative', dst_cyclic=True) self.assertTrue( f3.equals(r, verbose=2), 'destination = global dict, CHUNKSIZE = %s' % chunksize ) f4 = cf.read(self.filename4)[0] f5 = cf.read(self.filename5)[0] r = f1.regrids(f5, 'linear') self.assertTrue( f4.equals(r, verbose=2), 'destination = regional Field, CHUNKSIZE = %s' % chunksize ) r = f1.regrids(f5, method='linear') self.assertTrue( f4.equals(r, verbose=2), 'destination = regional Field, CHUNKSIZE = %s' % chunksize ) # --- End: for cf.chunksize(self.original_chunksize) f6 = cf.read(self.filename6)[0] with self.assertRaises(Exception): f1.regridc(f6, axes='T', method='linear') cf.atol(original_atol) @unittest.skipUnless(cf._found_ESMF, "Requires esmf package.") def test_Field_regridc(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return original_atol = cf.atol(1e-12) for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f1 = cf.read(self.filename7)[0] f2 = cf.read(self.filename8)[0] f3 = cf.read(self.filename9)[0] self.assertTrue( f3.equals(f1.regridc( f2, axes='T', method='linear'), verbose=2), 'destination = time series, CHUNKSIZE = %s' % chunksize ) f4 = cf.read(self.filename1)[0] f5 = cf.read(self.filename2)[0] f6 = cf.read(self.filename10)[0] self.assertTrue( f6.equals(f4.regridc( f5, axes=('X', 'Y'), method='conservative'), verbose=2), 'destination = global Field, CHUNKSIZE = %s' % chunksize ) self.assertTrue( f6.equals(f4.regridc( f5, axes=('X', 'Y'), method='conservative'), verbose=2), 'destination = global Field, CHUNKSIZE = %s' % chunksize ) dst = {'X': f5.dim('X'), 'Y': f5.dim('Y')} self.assertTrue( f6.equals( f4.regridc(dst, axes=('X', 'Y'), method='conservative'), verbose=2 ), 'destination = global dict, CHUNKSIZE = %s' % chunksize ) self.assertTrue( f6.equals( f4.regridc(dst, axes=('X', 'Y'), method='conservative'), verbose=2 ), 'destination = global dict, CHUNKSIZE = %s' % chunksize ) # --- End: for cf.chunksize(self.original_chunksize) cf.atol(original_atol)