def test_aggregate_bad_units(self): f = cf.read(self.filename, squeeze=True)[0] g = cf.FieldList(f[0]) g.append(f[1:]) h = cf.aggregate(g) self.assertEqual(len(h), 1) g[0].override_units(cf.Units("apples!"), inplace=True) g[1].override_units(cf.Units("oranges!"), inplace=True) h = cf.aggregate(g) self.assertEqual(len(h), 2)
def test_aggregate_domain(self): f = cf.example_field(0) g = f[0:3].domain h = f[3:].domain x = cf.aggregate([g, h]) self.assertEqual(len(x), 1, x)
def test_EXTERNAL_AGGREGATE(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return # Read parent file without the external file, taking first field to test f = cf.read(self.parent_file, verbose=0)[0] measure_name = "measure:area" # Split f into parts (take longitude with 3 x 3 = 9 points) so can # test the difference between the aggregated result and original f. # Note all parts retain the external variable cell measure. f_lon_thirds = [f[:, :3], f[:, 3:6], f[:, 6:]] g = cf.aggregate(f_lon_thirds) self.assertEqual(len(g), 1) # Check cell measure construct from external variable has been removed self.assertFalse(g[0].cell_measures()) # Check aggregated field is identical to original with measure removed f0 = f.copy() f0.del_construct(measure_name) self.assertEqual(g[0], f0) # Also check aggregation did not remove the measure from the inputs for part in f_lon_thirds: cell_measure = part.constructs.filter_by_identity( "measure:area").value() self.assertTrue(cell_measure.nc_get_external()) # Now try aggregating when one part doesn't have the cell measure, # expecting all of the parts to still aggregate back to one field # without the external measure (rather than 2->1 => 1 + 1 = 2 fields). f_lon_thirds[1].del_construct(measure_name) g = cf.aggregate(f_lon_thirds) self.assertEqual(len(g), 1) self.assertFalse(g[0].cell_measures()) # Also check measure was not removed from, or added to, any input for part in [f_lon_thirds[0], f_lon_thirds[2]]: cm = part.constructs.filter_by_identity("measure:area").value() self.assertTrue(cm.nc_get_external()) self.assertFalse(f_lon_thirds[1].cell_measures())
def test_aggregate_exist_equal_ignore_opts(self): # TODO: extend the option-checking coverage so all options and all # reasonable combinations of them are tested. For now, this is # testing options that previously errored due to a bug. for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename, squeeze=True)[0] # Use f as-is: simple test that aggregate works and does not # change anything with the given options: g = cf.aggregate(f, exist_all=True)[0] self.assertEqual(g, f) h = cf.aggregate(f, equal_all=True)[0] self.assertEqual(h, f) with self.assertRaises(ValueError): # contradictory options cf.aggregate(f, exist_all=True, equal_all=True) cf.chunksize(self.original_chunksize)
def test_aggregate_dimension(self): """Test the promotion of property to axis.""" f = cf.example_field(0) g = f.copy() f.set_property("sim", "r1i1p1f1") g.set_property("sim", "r2i1p1f1") self.assertFalse(len(f.auxiliary_coordinates())) a = cf.aggregate([f, g], dimension="sim") self.assertEqual(len(a), 1) a = a[0] self.assertEqual(len(a.auxiliary_coordinates()), 1)
def test_aggregate_verbosity(self): for chunksize in self.chunk_sizes: f0 = cf.example_field(0) f1 = cf.example_field(1) detail_header = "DETAIL:cf.aggregate:STRUCTURAL SIGNATURE:" debug_header = "DEBUG:cf.aggregate:COMPLETE AGGREGATION METADATA:" # 'DEBUG' (-1) verbosity should output both log message headers... with self.assertLogs(level="NOTSET") as catch: cf.aggregate([f0, f1], verbose=-1) for header in (detail_header, debug_header): self.assertTrue( any( log_item.startswith(header) for log_item in catch.output), "No log entry begins with '{}'".format(header), ) # ...but with 'DETAIL' (3), should get only the detail-level one. with self.assertLogs(level="NOTSET") as catch: cf.aggregate([f0, f1], verbose=3) self.assertTrue( any( log_item.startswith(detail_header) for log_item in catch.output), "No log entry begins with '{}'".format(detail_header), ) self.assertFalse( any( log_item.startswith(debug_header) for log_item in catch.output), "A log entry begins with '{}' but should not".format( debug_header), ) # and neither should emerge at the 'WARNING' (1) level. with self.assertLogs(level="NOTSET") as catch: logger.warning( "Dummy message to log something at warning level so that " "'assertLog' does not error when no logs messages emerge.") # Note: can use assertNoLogs in Python 3.10 to avoid this, see: # https://bugs.python.org/issue39385 cf.aggregate([f0, f1], verbose=1) for header in (detail_header, debug_header): self.assertFalse( any( log_item.startswith(header) for log_item in catch.output), "A log entry begins with '{}' but should not".format( header), )
zeta = cf.relative_vorticity(u, v) print(zeta) print(zeta.array.round(8)) print("\n**Aggregation**\n") a = cf.read('air_temperature.nc')[0] a a_parts = [a[0, :, 0:30], a[0, :, 30:96], a[1, :, 0:30], a[1, :, 30:96]] a_parts for i, f in enumerate(a_parts): cf.write(f, str(i) + '_air_temperature.nc') x = cf.read('[0-3]_air_temperature.nc') y = cf.read('[0-3]_air_temperature.nc', aggregate=False) z = cf.aggregate(y) x z x.equals(z) x = cf.aggregate(a_parts) x a_parts[1].transpose(inplace=True) a_parts[1].units = 'degreesC' a_parts z = cf.aggregate(a_parts) z x.equals(z) print("\n**Compression**\n") h = cf.read('contiguous.nc')[0]
def test_basic_aggregate(self): for chunksize in self.chunk_sizes: cf.chunksize(chunksize) f = cf.read(self.filename, squeeze=True)[0] g = cf.FieldList(f[0]) g.append(f[1:3]) g.append(f[3]) g[-1].flip(0, inplace=True) g.append(f[4:7]) g[-1].flip(0, inplace=True) g.extend([f[i] for i in range(7, f.shape[0])]) g0 = g.copy() self.assertTrue(g.equals(g0, verbose=2), "g != g0") with warnings.catch_warnings(): # Suppress noise throughout the test fixture from: # # ~/cf-python/cf/__init__.py:1459: FutureWarning: elementwise # comparison failed; returning scalar instead, but in the # future will perform elementwise comparison # # TODO: it is not clear where the above emerges from, e.g. # since __init__ file ref'd does not have that many lines. # It seems like this warning arises from NumPy comparisons # done at some point in (only) some aggregate calls (see e.g: # https://github.com/numpy/numpy/issues/6784). warnings.filterwarnings("ignore", category=FutureWarning) h = cf.aggregate(g, verbose=2) self.assertEqual(len(h), 1) self.assertEqual( h[0].shape, (10, 9), "h[0].shape = " + repr(h[0].shape) + " != (10, 9)", ) self.assertTrue(g.equals(g0, verbose=2), "g != itself after aggregation") self.assertTrue(h[0].equals(f, verbose=2), "h[0] != f") with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate(g, verbose=2) self.assertTrue(i.equals(h, verbose=2), "The second aggregation != the first") self.assertTrue( g.equals(g0, verbose=2), "g != itself after the second aggregation", ) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate(g, verbose=2, axes="grid_latitude") self.assertTrue(i.equals(h, verbose=2), "The third aggregation != the first") self.assertTrue( g.equals(g0, verbose=2), "g !=itself after the third aggregation", ) self.assertEqual(i[0].shape, (10, 9), "i[0].shape is " + repr(i[0].shape)) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) i = cf.aggregate( g, verbose=2, axes="grid_latitude", donotchecknonaggregatingaxes=1, ) self.assertTrue(i.equals(h, verbose=2), "The fourth aggregation != the first") self.assertTrue( g.equals(g0, verbose=2), "g != itself after the fourth aggregation", ) self.assertEqual(i[0].shape, (10, 9), "i[0].shape is " + repr(i[0].shape)) q, t = cf.read(self.file) c = cf.read(self.file2)[0] d = cf.aggregate([c, t], verbose=1, relaxed_identities=True) e = cf.aggregate([t, c], verbose=1, relaxed_identities=True) self.assertEqual(len(d), 1) self.assertEqual(len(e), 1) self.assertEqual(d[0].shape, (3, ) + t.shape) self.assertTrue(d[0].equals(e[0], verbose=2)) x = cf.read(["file.nc", "file2.nc"], aggregate=False) self.assertEqual(len(x), 3) x = cf.read(["file.nc", "file2.nc"], aggregate={"relaxed_identities": True}) self.assertEqual(len(x), 2) del t.standard_name del c.standard_name x = cf.aggregate([c, t], verbose=1) self.assertEqual(len(x), 2) t.long_name = "qwerty" c.long_name = "qwerty" x = cf.aggregate([c, t], field_identity="long_name") self.assertEqual(len(x), 1) cf.chunksize(self.original_chunksize)
def test_write_netcdf_mode(self): """Test the `mode` parameter to `write`, notably append mode.""" g = cf.read(self.filename) # note 'g' has one field # Test special case #1: attempt to append fields with groups # (other than 'root') which should be forbidden. Using fmt="NETCDF4" # since it is the only format where groups are allowed. # # Note: this is not the most natural test to do first, but putting # it before the rest reduces spurious seg faults for me, so... g[0].nc_set_variable_groups(["forecast", "model"]) cf.write(g, tmpfile, fmt="NETCDF4", mode="w") # 1. overwrite to wipe f = cf.read(tmpfile) with self.assertRaises(ValueError): cf.write(g[0], tmpfile, fmt="NETCDF4", mode="a") # Test special case #2: attempt to append fields with contradictory # featureType to the original file: g[0].nc_clear_variable_groups() g[0].nc_set_global_attribute("featureType", "profile") cf.write( g, tmpfile, fmt="NETCDF4", mode="w", global_attributes=("featureType", "profile"), ) # 1. overwrite to wipe h = cf.example_field(3) h.nc_set_global_attribute("featureType", "timeSeries") with self.assertRaises(ValueError): cf.write(h, tmpfile, fmt="NETCDF4", mode="a") # Now remove featureType attribute for subsquent tests: g_attrs = g[0].nc_clear_global_attributes() del g_attrs["featureType"] g[0].nc_set_global_attributes(g_attrs) # Set a non-trivial (i.e. not only 'Conventions') global attribute to # make the global attribute testing more robust: add_global_attr = ["remark", "A global comment."] original_global_attrs = g[0].nc_global_attributes() original_global_attrs[add_global_attr[0]] = None # -> None on fields g[0].nc_set_global_attribute(*add_global_attr) # First test a bad mode value: with self.assertRaises(ValueError): cf.write(g[0], tmpfile, mode="g") g_copy = g.copy() for fmt in self.netcdf_fmts: # test over all netCDF 3 and 4 formats # Other tests cover write as default mode (i.e. test with no mode # argument); here test explicit provision of 'w' as argument: cf.write( g, tmpfile, fmt=fmt, mode="w", global_attributes=add_global_attr, ) f = cf.read(tmpfile) new_length = 1 # since 1 == len(g) self.assertEqual(len(f), new_length) # Ignore as 'remark' should be 'None' on the field as tested below self.assertTrue(f[0].equals(g[0], ignore_properties=["remark"])) self.assertEqual( f[0].nc_global_attributes(), original_global_attrs ) # Main aspect of this test: testing the append mode ('a'): now # append all other example fields, to check a diverse variety. for ex_field_n, ex_field in enumerate(cf.example_fields()): # Note: after Issue #141, this skip can be removed. if ex_field_n == 1: continue # Skip since "RuntimeError: Can't create variable in # NETCDF4_CLASSIC file from (2) (NetCDF: Attempting netcdf-4 # operation on strict nc3 netcdf-4 file)" i.e. not possible. if fmt == "NETCDF4_CLASSIC" and ex_field_n in (6, 7): continue # Skip since "Can't write int64 data from <Count: (2) > to a # NETCDF3_CLASSIC file" causes a ValueError i.e. not possible. # Note: can remove this when Issue #140 is closed. if fmt in self.netcdf3_fmts and ex_field_n == 6: continue cf.write(ex_field, tmpfile, fmt=fmt, mode="a") f = cf.read(tmpfile) if ex_field_n == 5: # another special case # The n=2 and n=5 example fields for cf-python aggregate # down to one field, e.g. for b as n=2 and c as n=5: # >>> c.equals(b, verbose=-1) # Data: Different shapes: (118, 5, 8) != (36, 5, 8) # Field: Different data # False # >>> a = cf.aggregate([b, c]) # >>> a # [<CF Field: air_potential_temperature( # time(154), latitude(5), longitude(8)) K>] # # therefore need to check FL length hasn't changed and # (further below) that n=2,5 aggregated field is present. pass # i.e. new_length should remain the same as before else: new_length += 1 # should be exactly one more field now self.assertEqual(len(f), new_length) if ex_field_n == 5: ex_n2_and_n5_aggregated = cf.aggregate( [cf.example_field(2), cf.example_field(5)] )[0] self.assertTrue( any( [ ex_n2_and_n5_aggregated.equals( file_field, ignore_properties=[ "comment", "featureType", "remark", ], ) for file_field in f ] ) ) else: # Can't guarantee order of fields created during append op. # so check new field is *somewhere* in read-in fieldlist self.assertTrue( any( [ ex_field.equals( file_field, ignore_properties=[ "comment", "featureType", "remark", ], ) for file_field in f ] ) ) for file_field in f: self.assertEqual( file_field.nc_global_attributes(), original_global_attrs, ) # Now do the same test, but appending all of the example fields in # one operation rather than one at a time, to check that it works. cf.write(g, tmpfile, fmt=fmt, mode="w") # 1. overwrite to wipe append_ex_fields = cf.example_fields() del append_ex_fields[1] # note: can remove after Issue #141 closed # Note: can remove this del when Issue #140 is closed: if fmt in self.netcdf3_fmts: del append_ex_fields[5] # n=6 ex_field, minus 1 for above del if fmt in "NETCDF4_CLASSIC": # Remove n=6 and =7 for reasons as given above (del => minus 1) append_ex_fields = append_ex_fields[:5] # Equals len(append_ex_fields), + 1 [for original 'g'] and -1 [for # field n=5 which aggregates to one with n=2] => + 1 - 1 = + 0: overall_length = len(append_ex_fields) cf.write( append_ex_fields, tmpfile, fmt=fmt, mode="a" ) # 2. now append f = cf.read(tmpfile) self.assertEqual(len(f), overall_length) # Also test the mode="r+" alias for mode="a". cf.write(g, tmpfile, fmt=fmt, mode="w") # 1. overwrite to wipe cf.write( append_ex_fields, tmpfile, fmt=fmt, mode="r+" ) # 2. now append f = cf.read(tmpfile) self.assertEqual(len(f), overall_length) # The appended fields themselves are now known to be correct, # but we also need to check that any coordinates that are # equal across different fields have been shared in the # source netCDF, rather than written in separately. # # Note that the coordinates that are shared across the set of # all example fields plus the field 'g' from the contents of # the original file (self.filename) are as follows: # # 1. Example fields n=0 and n=1 share: # <DimensionCoordinate: time(1) days since 2018-12-01 > # 2. Example fields n=0, n=2 and n=5 share: # <DimensionCoordinate: latitude(5) degrees_north> and # <DimensionCoordinate: longitude(8) degrees_east> # 3. Example fields n=2 and n=5 share: # <DimensionCoordinate: air_pressure(1) hPa> # 4. The original file field ('g') and example field n=1 share: # <AuxiliaryCoordinate: latitude(10, 9) degrees_N>, # <AuxiliaryCoordinate: longitude(9, 10) degrees_E>, # <Dimension...: atmosphere_hybrid_height_coordinate(1) >, # <DimensionCoordinate: grid_latitude(10) degrees>, # <DimensionCoordinate: grid_longitude(9) degrees> and # <DimensionCoordinate: time(1) days since 2018-12-01 > # # Therefore we check all of those coordinates for singularity, # i.e. the same underlying netCDF variables, in turn. # But first, since the order of the fields appended isn't # guaranteed, we must find the mapping of the example fields to # their position in the read-in FieldList. f = cf.read(tmpfile) # Element at index N gives position of example field n=N in file file_field_order = [] for ex_field in cf.example_fields(): position = [ f.index(file_field) for file_field in f if ex_field.equals( file_field, ignore_properties=["comment", "featureType", "remark"], ) ] if not position: position = [None] # to record skipped example fields file_field_order.append(position[0]) equal_coors = { ((0, "dimensioncoordinate2"), (1, "dimensioncoordinate3")), ((0, "dimensioncoordinate0"), (2, "dimensioncoordinate1")), ((0, "dimensioncoordinate1"), (2, "dimensioncoordinate2")), ((0, "dimensioncoordinate0"), (5, "dimensioncoordinate1")), ((0, "dimensioncoordinate1"), (5, "dimensioncoordinate2")), ((2, "dimensioncoordinate3"), (5, "dimensioncoordinate3")), } for coor_1, coor_2 in equal_coors: ex_field_1_position, c_1 = coor_1 ex_field_2_position, c_2 = coor_2 # Now map the appropriate example field to the file FieldList f_1 = file_field_order[ex_field_1_position] f_2 = file_field_order[ex_field_2_position] # None for fields skipped in test, distinguish from falsy 0 if f_1 is None or f_2 is None: continue self.assertEqual( f[f_1] .constructs() .filter_by_identity(c_1) .value() .nc_get_variable(), f[f_2] .constructs() .filter_by_identity(c_2) .value() .nc_get_variable(), ) # Note: after Issue #141, the block below should be un-commented. # # The original file field 'g' must be at the remaining position: # rem_position = list(set( # range(len(f))).difference(set(file_field_order)))[0] # # In the final cases, it is easier to remove the one differing # # coordinate to get the equal coordinates that should be shared: # original_field_coors = dict(f[rem_position].coordinates()) # ex_field_1_coors = dict(f[file_field_order[1]].coordinates()) # for orig_coor, ex_1_coor in zip( # original_field_coors.values(), ex_field_1_coors.values()): # # The 'auxiliarycoordinate2' construct differs for both, so # # skip that but otherwise the two fields have the same coors: # if orig_coor.identity == "auxiliarycoordinate2": # continue # self.assertEqual( # orig_coor.nc_get_variable(), # ex_1_coor.nc_get_variable(), # ) # Check behaviour when append identical fields, as an edge case: cf.write(g, tmpfile, fmt=fmt, mode="w") # 1. overwrite to wipe cf.write(g_copy, tmpfile, fmt=fmt, mode="a") # 2. now append f = cf.read(tmpfile) self.assertEqual(len(f), 2 * len(g)) self.assertTrue( any( [ file_field.equals(g[0], ignore_properties=["remark"]) for file_field in f ] ) ) self.assertEqual( f[0].nc_global_attributes(), original_global_attrs )
def test_GENERAL(self): # Save original chunksize original_chunksize = cf.chunksize() cf.chunksize(60) g = self.f.squeeze() f = self.f.copy() c = cf.set([0, 3, 4, 5]) _ = f == c # +, -, *, /, ** h = g.copy() h **= 2 h **= 0.5 h.standard_name = g.standard_name self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) h *= 10 h /= 10.0 self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) h += 1 h -= 1 self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) h = h ** 2.0 h = h ** 0.5 h.standard_name = g.standard_name self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) h = h * 10 h = h / 10.0 self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) h = h + 1 h = h - 1 self.assertTrue(g.data.allclose(h.data), repr(g.array - h.array)) # flip, expand_dims, squeeze and remove_axes h = g.copy() h.flip((1, 0), inplace=True) h.flip((1, 0), inplace=True) h.flip(0, inplace=True) h.flip(1, inplace=True) h.flip([0, 1], inplace=True) self.assertTrue(g.equals(h, verbose=2)) # Access the field's data as a numpy array g.array g.item("latitude").array g.item("longitude").array # Subspace the field g[..., 2:5].array g[9::-4, ...].array h = g[(slice(None, None, -1),) * g.ndim] h = h[(slice(None, None, -1),) * h.ndim] self.assertTrue(g.equals(h, verbose=2)) # Indices for a subspace defined by coordinates f.indices() f.indices(grid_latitude=cf.lt(5), grid_longitude=27) f.indices( grid_latitude=cf.lt(5), grid_longitude=27, atmosphere_hybrid_height_coordinate=1.5, ) # Subspace the field g.subspace( grid_latitude=cf.lt(5), grid_longitude=27, atmosphere_hybrid_height_coordinate=1.5, ) # Create list of fields fl = cf.FieldList([g, g, g, g]) # Write a list of fields to disk cf.write((f, fl), tmpfile) cf.write(fl, tmpfile) # Read a list of fields from disk fl = cf.read(tmpfile, squeeze=True) for f in fl: try: del f.history except AttributeError: pass # Access the last field in the list x = fl[-1] # Access the data of the last field in the list x = fl[-1].array # Modify the last field in the list fl[-1] *= -1 x = fl[-1].array # Changing units fl[-1].units = "mm.s-1" x = fl[-1].array # Combine fields not in place g = fl[-1] - fl[-1] x = g.array # Combine field with a size 1 Data object g += cf.Data([[[[[1.5]]]]], "cm.s-1") x = g.array # Setting of (un)masked elements with where() g[::2, 1::2] = numpy.ma.masked g.data.to_memory(1) g.where(True, 99) g.data.to_memory(1) g.where(g.mask, 2) g.data.to_memory(1) g[slice(None, None, 2), slice(1, None, 2)] = cf.masked g.data.to_memory(1) g.where(g.mask, [[-1]]) g.data.to_memory(1) g.where(True, cf.Data(0, None)) g.data.to_memory(1) h = g[:3, :4] h.where(True, -1) h[0, 2] = 2 h.transpose([1, 0], inplace=True) h.flip([1, 0], inplace=True) g[slice(None, 3), slice(None, 4)] = h h = g[:3, :4] h[...] = -1 h[0, 2] = 2 g[slice(None, 3), slice(None, 4)] = h # Make sure all partitions' data are in temporary files g.data.to_disk() # Push partitions' data from temporary files into memory g.data.to_memory(regardless=True) g.data.to_disk() # Iterate through array values for x in f.data.flat(): pass # Reset chunk size cf.chunksize(original_chunksize) # Move Data partitions to disk f.data.to_disk() cf.chunksize(original_chunksize) f.transpose(inplace=True) f.flip(inplace=True) cf.write(f, "delme.nc") f = cf.read("delme.nc")[0] cf.write(f, "delme.nca", fmt="CFA4") g = cf.read("delme.nca")[0] b = f[:, 0:6, :] c = f[:, 6:, :] cf.aggregate([b, c], verbose=2)[0] # Remove temporary files cf.data.partition._remove_temporary_files() cf.chunksize(original_chunksize)
print(b) print(a.coordinate('T').bounds[-1].dtarray) print(b.coordinate('T').bounds[-1].dtarray) print("\n**Aggregation**\n") a = cf.read('air_temperature.nc')[0] a a_parts = [a[0, :, 0:30], a[0, :, 30:], a[1, :, 0:30], a[1, :, 30:]] a_parts for i, f in enumerate(a_parts): cf.write(f, str(i) + '_air_temperature.nc') x = cf.read('[0-3]_air_temperature.nc') y = cf.read('[0-3]_air_temperature.nc', aggregate=False) z = cf.aggregate(y) x.equals(z) print("\n**Compression**\n") h = cf.read('contiguous.nc')[0] print(h) print(h.array) h.data.get_compression_type() print(h.data.compressed_array) count_variable = h.data.get_count() count_variable print(count_variable.array) station2 = h[1] station2 print(station2.array)