def __init__(self, model_file_name, ref_file_name=None, max_cache_size=None): if ref_file_name is not None: self.__reference_file = NetCDFFacade(ref_file_name) if model_file_name is not None: self.__model_file = NetCDFFacade(model_file_name) self.max_cache_size = max_cache_size if max_cache_size is not None else sys.maxsize self.cached_list = [] self.current_memory = 0
class Data(object): def __init__(self, model_file_name, ref_file_name=None, max_cache_size=None): if ref_file_name is not None: self.__reference_file = NetCDFFacade(ref_file_name) if model_file_name is not None: self.__model_file = NetCDFFacade(model_file_name) self.max_cache_size = max_cache_size if max_cache_size is not None else sys.maxsize self.cached_list = [] self.current_memory = 0 def model_vars(self): if not hasattr(self, 'model_variables'): self.model_variables = self.__model_file.get_model_variables() return self.model_variables def close(self): self.__model_file.close() if self.is_ref_data_split(): self.__reference_file.close() def ref_vars(self): if hasattr(self, 'reference_variables'): return self.reference_variables reference_variables = self.__model_file.get_reference_variables() if self.is_ref_data_split(): reference_variables.extend(self.__reference_file.get_reference_variables()) self.reference_variables = reference_variables return reference_variables def has_model_dimension(self, dimension_name): return self.__model_file.has_model_dimension(dimension_name) def reference_coordinate_variables(self): variables = self.__model_file.get_ref_coordinate_variables() if self.is_ref_data_split(): variables.extend(self.__reference_file.get_ref_coordinate_variables()) return variables def model_dim_size(self, dim_name): return self.dim_size(self.__model_file, dim_name) def ref_dim_size(self, dim_name): if self.is_ref_data_split(): return self.dim_size(self.__reference_file, dim_name) return self.dim_size(self.__model_file, dim_name) def dim_size(self, ncfile, dim_name): return ncfile.get_dim_size(dim_name) def __dimension_string(self, ncfile, variable_name): return ncfile.get_dimension_string(variable_name) def is_ref_data_split(self): return hasattr(self, '_Data__reference_file') def reference_records_count(self, dimension_profile): ref_vars = self.ref_vars() if not ref_vars: return 0 if self.is_ref_data_split(): ncfile = self.__reference_file else: ncfile = self.__model_file dim_size = 0 for var in ref_vars: dimensions = self.__dimension_string(ncfile, var).split(' ') dimension_set = {x for x in dimensions} if dimension_profile == dimension_set: temp = 1 for dim in dimension_profile: temp *= self.dim_size(ncfile, dim) dim_size += temp return dim_size def get_reference_dimensions(self, variable_name=None): if self.is_ref_data_split(): ncfile = self.__reference_file else: ncfile = self.__model_file return ncfile.get_dimensions(variable_name) def get_model_dimensions(self, variable_name=None): if not hasattr(self, 'model_dimensions'): self.model_dimensions = {} if not variable_name in self.model_dimensions: self.model_dimensions[variable_name] = self.__model_file.get_dimensions(variable_name) return self.model_dimensions[variable_name] def read_model(self, variable_name, origin=None): return self.__read(self.__model_file, variable_name, origin) def read_reference(self, variable_name, origin=None): ncfile = self.__reference_file if self.is_ref_data_split() else self.__model_file return self.__read(ncfile, variable_name, origin) def find_item_to_delete(self): self.cached_list = sorted(self.cached_list, key=self.compute_variable_size, reverse=True) return self.cached_list.pop(0) def get_current_cache_size(self): return sum([self.compute_variable_size(variable_name) for variable_name in self.cached_list]) def ensure_memory(self, variable_size): will_cache_overflow = self.max_cache_size <= self.current_memory + variable_size while will_cache_overflow and len(self.cached_list) > 0: var_to_delete = self.find_item_to_delete() self.current_memory -= self.compute_variable_size(var_to_delete) logging.debug('Deleting variable \'%s\' from cache.' % var_to_delete) self.__delattr__(var_to_delete) will_cache_overflow = self.max_cache_size <= self.current_memory + variable_size if not os.name == 'nt': logging.debug('Memory in use after \'ensure_memory\' called: %.2f MB' % (mem() / 1024)) def __read(self, ncfile, variable_name, origin=None): variable_size = self.compute_variable_size(variable_name) if not self.__is_cached(variable_name): self.ensure_memory(variable_size) logging.debug('Reading variable \'%s\' fully into cache.' % variable_name) if not os.name == 'nt': logging.debug('Memory in use before reading variable %s fully into cache: %.2f MB' % (variable_name, mem() / 1024)) variable = ncfile.get_variable(variable_name) self.__setattr__(variable_name, variable[:]) self.cached_list.append(variable_name) self.current_memory += self.compute_variable_size(variable_name) if not os.name == 'nt': logging.debug('Memory in use after reading variable %s fully into cache: %.2f MB' % (variable_name, mem() / 1024)) if origin is None: return ma.array(self.__getattribute__(variable_name)) return self.get_data(origin, variable_name) def get_data(self, origin, variable_name): ''' Read single pixel from origin ''' return self.__getattribute__(variable_name)[tuple(origin)] def __is_cached(self, variable_name): return variable_name in self.cached_list def __find_model_variable_name(self, possible_names, standard_name): for name in possible_names: if self.__model_file.get_variable(name) is not None: return name for var in self.__model_file.get_coordinate_variables(): if self.__model_file.attribute(var.name, 'standard_name') == standard_name: return var.name raise ValueError('Unable to find \'%s\'-variable.' % standard_name) def find_model_latitude_variable_name(self): return self.__find_model_variable_name(['lat', 'latitude'], 'latitude') def find_model_longitude_variable_name(self): return self.__find_model_variable_name(['lon', 'longitude'], 'longitude') def unit(self, variable_name): is_not_in_ref_file = not self.is_ref_data_split() or self.is_ref_data_split() and self.__reference_file.get_variable(variable_name) is None is_not_in_model_file = self.__model_file.get_variable(variable_name) is None if is_not_in_model_file and is_not_in_ref_file: raise ValueError('Variable \'%s\' not found.' % variable_name) model_unit = utils.get_unit(self.__model_file, variable_name) if model_unit: return model_unit if self.is_ref_data_split() and utils.get_unit(self.__reference_file, variable_name): return utils.get_unit(self.__reference_file, variable_name) return None def compute_variable_size(self, variable_name): variable = self.__model_file.get_variable(variable_name) if variable is None and self.is_ref_data_split(): variable = self.__reference_file.get_variable(variable_name) if variable is None: raise ValueError('No variable found with name \'%s\'' % variable_name) return compute_array_size(variable.shape, variable.dtype.itemsize) def has_one_dim_ref_var(self): for var in self.ref_vars(): if len(self.get_reference_dimensions(var)) == 1: return True return False def get_values(self, ref_name, model_name): model_values_slices, ref_values_slices = self.get_slices(model_name, ref_name) model_values = self.read_model(model_name)[model_values_slices] reference_values = self.read_reference(ref_name)[ref_values_slices] reference_values.mask = reference_values.mask | model_values.mask model_values.mask = reference_values.mask | model_values.mask logging.debug('Compressing ref-variable %s' % ref_name) reference_values = reference_values.compressed() logging.debug('Compressing model variable %s' % model_name) model_values = model_values.compressed() return reference_values, model_values def get_slices(self, model_name, ref_name): differing_dim_names = self.get_differing_dim_names(model_name, ref_name) differing_model_dimension_var_indices = self.__get_differing_model_dimension_var_indices(differing_dim_names) model_values_slices = [] for dim in self.get_model_dimensions(model_name): if dim in differing_dim_names.keys(): index = differing_model_dimension_var_indices[dim] model_values_slices.append(slice(index, index + 1)) else: model_values_slices.append(slice(None)) ref_values_slices = [] for dim in self.get_reference_dimensions(ref_name): if dim in differing_dim_names.values(): ref_values_slices.append(slice(0, 1)) else: ref_values_slices.append(slice(0, self.ref_dim_size(dim))) return model_values_slices, ref_values_slices def get_differing_dim_names(self, model_var, ref_var): dim_names = {} model_dims = self.__dimension_string(self.__model_file, model_var) ref_file = self.__reference_file if self.is_ref_data_split() else self.__model_file ref_dims = self.__dimension_string(ref_file, ref_var) if model_dims == ref_dims: return dim_names model_dims_list = model_dims.split(' ') ref_dims_list = ref_dims.split(' ') if not len(model_dims_list) == len(ref_dims_list): raise ValueError('model and gridded ref variable need to have identical dimension count') for index, model_dim in enumerate(model_dims_list): ref_dim = ref_dims_list[index] if not ref_dim == model_dim: dim_names[model_dim] = ref_dim return dim_names def __get_differing_model_dimension_var_indices(self, differing_dim_names): differing_model_dimension_var_indices = {} for differing_dim in differing_dim_names.keys(): differing_model_dim = differing_dim differing_ref_dim = differing_dim_names[differing_dim] differing_dim_model_values = self.read_model(differing_model_dim) # we're assuming here that such differing dimensions for ref data have only a single value differing_dim_ref_value = self.read_reference(differing_ref_dim, (0, )) dim_var_index = self.__get_dimension_var_index(differing_dim_model_values, differing_dim_ref_value) differing_model_dimension_var_indices[differing_model_dim] = dim_var_index return differing_model_dimension_var_indices def __get_dimension_var_index(self, dim_values, ref_value): # find out grid position where differing dimension variables are nearest min_delta = float('inf') index = -1 for loop_index, model_dim_value in enumerate(dim_values): current_delta = abs(model_dim_value - ref_value) if current_delta < min_delta: min_delta = current_delta index = loop_index return index
def test_get_gridded_reference_variables(self): filename = 'resources/ogs_test_smaller.nc' netcdf = NetCDFFacade(self.path + filename) self.assertEqual(1, len(netcdf.get_reference_variables())) self.assertEqual('Ref_chl', netcdf.get_reference_variables()[0])
def setUp(self): self.path = os.path.dirname(os.path.realpath(__file__)) + '/../' filename = 'resources/test.nc' self.netcdf = NetCDFFacade(self.path + filename)
class NetCDFFacade_test(unittest.TestCase): def setUp(self): self.path = os.path.dirname(os.path.realpath(__file__)) + '/../' filename = 'resources/test.nc' self.netcdf = NetCDFFacade(self.path + filename) def tearDown(self): self.netcdf.close() def test_get_dim_size(self): self.assertEqual(2, self.netcdf.get_dim_size("time")) self.assertEqual(2, self.netcdf.get_dim_size("depth")) self.assertEqual(2, self.netcdf.get_dim_size("lat")) self.assertEqual(4, self.netcdf.get_dim_size("lon")) def test_get_global_attribute_value(self): self.assertEqual("some title", self.netcdf.get_global_attribute("title")) self.assertEqual("institution code", self.netcdf.get_global_attribute("institution")) self.assertEqual("links to references", self.netcdf.get_global_attribute("references")) self.assertEqual("method of production", self.netcdf.get_global_attribute("source")) self.assertEqual("CF-1.6", self.netcdf.get_global_attribute("Conventions")) self.assertEqual("audit trail", self.netcdf.get_global_attribute("history")) self.assertEqual("comment", self.netcdf.get_global_attribute("comment")) def test_get_variable_attribute(self): self.assertEqual("longitude", self.netcdf.get_variable_attribute("lon", "long_name")) self.assertAlmostEqual(-180.0, self.netcdf.get_variable_attribute("lon", "valid_min"), 5) def test_get_dimension_string(self): self.assertEqual("lon", self.netcdf.get_dimension_string("lon")) self.assertEqual("lat", self.netcdf.get_dimension_string("lat")) self.assertEqual("time", self.netcdf.get_dimension_string("time")) self.assertEqual("time depth lat lon", self.netcdf.get_dimension_string("chl")) def test_get_dim_length(self): self.assertEqual(2, self.netcdf.get_dim_length("chl", 0)) self.assertEqual(2, self.netcdf.get_dim_length("chl", 1)) self.assertEqual(2, self.netcdf.get_dim_length("chl", 2)) self.assertEqual(4, self.netcdf.get_dim_length("chl", 3)) def test_get_data_via_origin_and_shape(self): assert_array_equal(array([[[[0.1111]]]], dtype='float32'), self.netcdf.get_data("chl", [0, 0, 0, 0], [1, 1, 1, 1])) assert_array_equal(array([ [ [[ 0.1111, 0.2111], [ 0.1121, 0.2121]], [[ 0.1112, 0.2112], [ 0.1122, 0.2122]] ], [ [[ 0.1113, 0.2113], [ 0.1123, 0.2123]], [[ 0.1114, 0.2114], [ 0.1124, 0.2124]] ] ], dtype='float32'), self.netcdf.get_data("chl", [0, 0, 0, 0], [2, 2, 2, 2])) assert_array_equal(array([ [ [[ 0.2111, 0.1211], [ 0.2121, 0.1221]], ], ], dtype='float32'), self.netcdf.get_data("chl", [0, 0, 0, 1], [1, 1, 2, 2])) def test_get_dimensions(self): assert_array_equal(["time", "depth", "lat", "lon", "record_num"], self.netcdf.get_dimensions()) self.assertEqual(4, len(self.netcdf.get_dimensions('chl'))) def test_get_model_variables(self): assert_array_equal(['chl', 'sst'], self.netcdf.get_model_variables()) def test_get_reference_variables(self): assert_array_equal(['chl_ref'], self.netcdf.get_reference_variables()) def test_get_gridded_reference_variables(self): filename = 'resources/ogs_test_smaller.nc' netcdf = NetCDFFacade(self.path + filename) self.assertEqual(1, len(netcdf.get_reference_variables())) self.assertEqual('Ref_chl', netcdf.get_reference_variables()[0]) def test_get_reference_variable(self): self.assertIsNone(self.netcdf.get_reference_variable('sst_ref')) self.assertIsNotNone(self.netcdf.get_reference_variable('chl_ref')) def test_read_variable_fully(self): fullyReadChl = self.netcdf.read_variable_fully('chl') assert_array_equal( array( [[[[0.1111, 0.2111, 0.1211, 0.2211], [0.1121, 0.2121, 0.1221, 0.2221]], [[0.1112, 0.2112, 0.1212, 0.2212], [0.1122, 0.2122, 0.1222, 0.2222]]], [[[0.1113, 0.2113, 0.1213, 0.2213], [0.1123, 0.2123, 0.1223, 0.2223]], [[0.1114, 0.2114, 0.1214, 0.2214], [0.1124, 0.2124, 0.1224, 0.2224]]]], dtype='float32'), fullyReadChl) def test_get_variable_size(self): self.assertEqual(2, self.netcdf.get_variable_size('lat')) self.assertEqual(4, self.netcdf.get_variable_size('lon')) self.assertEqual(2, self.netcdf.get_variable_size('time')) self.assertEqual(32, self.netcdf.get_variable_size('chl')) self.assertEqual(32, self.netcdf.get_variable_size('sst')) self.assertEqual(32, self.netcdf.get_variable_size('sst')) self.assertEqual(3, self.netcdf.get_variable_size('chl_ref')) def test_get_coordinate_variables(self): coordinate_variables = self.netcdf.get_coordinate_variables() self.assertEqual(4, len(coordinate_variables)) self.assertTrue('lat' in coordinate_variables) self.assertTrue('lon' in coordinate_variables) self.assertTrue('time' in coordinate_variables) self.assertTrue('depth' in coordinate_variables) def test_get_ref_coordinate_variables(self): ref_coordinate_variables = self.netcdf.get_ref_coordinate_variables() self.assertEqual(4, len(ref_coordinate_variables)) self.assertEqual('time_ref', ref_coordinate_variables[0]) self.assertEqual('depth_ref', ref_coordinate_variables[1]) self.assertEqual('lat_ref', ref_coordinate_variables[2]) self.assertEqual('lon_ref', ref_coordinate_variables[3]) def test_get_ref_coordinate_variables_empty(self): netcdf = NetCDFFacade(self.path + 'resources/test_without_records.nc') self.assertEqual(0, len(netcdf.get_reference_variables())) ref_coordinate_variables = netcdf.get_ref_coordinate_variables() self.assertEqual(0, len(ref_coordinate_variables)) def test_has_model_dimension(self): self.assertTrue(self.netcdf.has_model_dimension('time')) self.assertTrue(self.netcdf.has_model_dimension('depth')) self.assertFalse(self.netcdf.has_model_dimension('kaesemauken')) def test_change_variable_values(self): chl = self.netcdf.get_variable('chl') self.assertAlmostEqual(0.1111, chl[:][0][0][0][0]) chl[0][0][0][0] = nan self.assertAlmostEqual(0.1111, chl[:][0][0][0][0])
def test_get_ref_coordinate_variables_empty(self): netcdf = NetCDFFacade(self.path + 'resources/test_without_records.nc') self.assertEqual(0, len(netcdf.get_reference_variables())) ref_coordinate_variables = netcdf.get_ref_coordinate_variables() self.assertEqual(0, len(ref_coordinate_variables))
class NetCDFFacade_test(unittest.TestCase): def setUp(self): self.path = os.path.dirname(os.path.realpath(__file__)) + '/../' filename = 'resources/test.nc' self.netcdf = NetCDFFacade(self.path + filename) def tearDown(self): self.netcdf.close() def test_get_dim_size(self): self.assertEqual(2, self.netcdf.get_dim_size("time")) self.assertEqual(2, self.netcdf.get_dim_size("depth")) self.assertEqual(2, self.netcdf.get_dim_size("lat")) self.assertEqual(4, self.netcdf.get_dim_size("lon")) def test_get_global_attribute_value(self): self.assertEqual("some title", self.netcdf.get_global_attribute("title")) self.assertEqual("institution code", self.netcdf.get_global_attribute("institution")) self.assertEqual("links to references", self.netcdf.get_global_attribute("references")) self.assertEqual("method of production", self.netcdf.get_global_attribute("source")) self.assertEqual("CF-1.6", self.netcdf.get_global_attribute("Conventions")) self.assertEqual("audit trail", self.netcdf.get_global_attribute("history")) self.assertEqual("comment", self.netcdf.get_global_attribute("comment")) def test_get_variable_attribute(self): self.assertEqual( "longitude", self.netcdf.get_variable_attribute("lon", "long_name")) self.assertAlmostEqual( -180.0, self.netcdf.get_variable_attribute("lon", "valid_min"), 5) def test_get_dimension_string(self): self.assertEqual("lon", self.netcdf.get_dimension_string("lon")) self.assertEqual("lat", self.netcdf.get_dimension_string("lat")) self.assertEqual("time", self.netcdf.get_dimension_string("time")) self.assertEqual("time depth lat lon", self.netcdf.get_dimension_string("chl")) def test_get_dim_length(self): self.assertEqual(2, self.netcdf.get_dim_length("chl", 0)) self.assertEqual(2, self.netcdf.get_dim_length("chl", 1)) self.assertEqual(2, self.netcdf.get_dim_length("chl", 2)) self.assertEqual(4, self.netcdf.get_dim_length("chl", 3)) def test_get_data_via_origin_and_shape(self): assert_array_equal( array([[[[0.1111]]]], dtype='float32'), self.netcdf.get_data("chl", [0, 0, 0, 0], [1, 1, 1, 1])) assert_array_equal( array([[[[0.1111, 0.2111], [0.1121, 0.2121]], [[0.1112, 0.2112], [0.1122, 0.2122]]], [[[0.1113, 0.2113], [0.1123, 0.2123]], [[0.1114, 0.2114], [0.1124, 0.2124]]]], dtype='float32'), self.netcdf.get_data("chl", [0, 0, 0, 0], [2, 2, 2, 2])) assert_array_equal( array([ [ [[0.2111, 0.1211], [0.2121, 0.1221]], ], ], dtype='float32'), self.netcdf.get_data("chl", [0, 0, 0, 1], [1, 1, 2, 2])) def test_get_dimensions(self): assert_array_equal(["time", "depth", "lat", "lon", "record_num"], self.netcdf.get_dimensions()) self.assertEqual(4, len(self.netcdf.get_dimensions('chl'))) def test_get_model_variables(self): assert_array_equal(['chl', 'sst'], self.netcdf.get_model_variables()) def test_get_reference_variables(self): assert_array_equal(['chl_ref'], self.netcdf.get_reference_variables()) def test_get_gridded_reference_variables(self): filename = 'resources/ogs_test_smaller.nc' netcdf = NetCDFFacade(self.path + filename) self.assertEqual(1, len(netcdf.get_reference_variables())) self.assertEqual('Ref_chl', netcdf.get_reference_variables()[0]) def test_get_reference_variable(self): self.assertIsNone(self.netcdf.get_reference_variable('sst_ref')) self.assertIsNotNone(self.netcdf.get_reference_variable('chl_ref')) def test_read_variable_fully(self): fullyReadChl = self.netcdf.read_variable_fully('chl') assert_array_equal( array([[[[0.1111, 0.2111, 0.1211, 0.2211], [0.1121, 0.2121, 0.1221, 0.2221]], [[0.1112, 0.2112, 0.1212, 0.2212], [0.1122, 0.2122, 0.1222, 0.2222]]], [[[0.1113, 0.2113, 0.1213, 0.2213], [0.1123, 0.2123, 0.1223, 0.2223]], [[0.1114, 0.2114, 0.1214, 0.2214], [0.1124, 0.2124, 0.1224, 0.2224]]]], dtype='float32'), fullyReadChl) def test_get_variable_size(self): self.assertEqual(2, self.netcdf.get_variable_size('lat')) self.assertEqual(4, self.netcdf.get_variable_size('lon')) self.assertEqual(2, self.netcdf.get_variable_size('time')) self.assertEqual(32, self.netcdf.get_variable_size('chl')) self.assertEqual(32, self.netcdf.get_variable_size('sst')) self.assertEqual(32, self.netcdf.get_variable_size('sst')) self.assertEqual(3, self.netcdf.get_variable_size('chl_ref')) def test_get_coordinate_variables(self): coordinate_variables = self.netcdf.get_coordinate_variables() self.assertEqual(4, len(coordinate_variables)) self.assertTrue('lat' in coordinate_variables) self.assertTrue('lon' in coordinate_variables) self.assertTrue('time' in coordinate_variables) self.assertTrue('depth' in coordinate_variables) def test_get_ref_coordinate_variables(self): ref_coordinate_variables = self.netcdf.get_ref_coordinate_variables() self.assertEqual(4, len(ref_coordinate_variables)) self.assertEqual('time_ref', ref_coordinate_variables[0]) self.assertEqual('depth_ref', ref_coordinate_variables[1]) self.assertEqual('lat_ref', ref_coordinate_variables[2]) self.assertEqual('lon_ref', ref_coordinate_variables[3]) def test_get_ref_coordinate_variables_empty(self): netcdf = NetCDFFacade(self.path + 'resources/test_without_records.nc') self.assertEqual(0, len(netcdf.get_reference_variables())) ref_coordinate_variables = netcdf.get_ref_coordinate_variables() self.assertEqual(0, len(ref_coordinate_variables)) def test_has_model_dimension(self): self.assertTrue(self.netcdf.has_model_dimension('time')) self.assertTrue(self.netcdf.has_model_dimension('depth')) self.assertFalse(self.netcdf.has_model_dimension('kaesemauken')) def test_change_variable_values(self): chl = self.netcdf.get_variable('chl') self.assertAlmostEqual(0.1111, chl[:][0][0][0][0]) chl[0][0][0][0] = nan self.assertAlmostEqual(0.1111, chl[:][0][0][0][0])
class Data(object): def __init__(self, model_file_name, ref_file_name=None, max_cache_size=None): if ref_file_name is not None: self.__reference_file = NetCDFFacade(ref_file_name) if model_file_name is not None: self.__model_file = NetCDFFacade(model_file_name) self.max_cache_size = max_cache_size if max_cache_size is not None else sys.maxsize self.cached_list = [] self.current_memory = 0 def model_vars(self): if not hasattr(self, 'model_variables'): self.model_variables = self.__model_file.get_model_variables() return self.model_variables def close(self): self.__model_file.close() if self.is_ref_data_split(): self.__reference_file.close() def ref_vars(self): if hasattr(self, 'reference_variables'): return self.reference_variables reference_variables = self.__model_file.get_reference_variables() if self.is_ref_data_split(): reference_variables.extend( self.__reference_file.get_reference_variables()) self.reference_variables = reference_variables return reference_variables def has_model_dimension(self, dimension_name): return self.__model_file.has_model_dimension(dimension_name) def reference_coordinate_variables(self): variables = self.__model_file.get_ref_coordinate_variables() if self.is_ref_data_split(): variables.extend( self.__reference_file.get_ref_coordinate_variables()) return variables def model_dim_size(self, dim_name): return self.dim_size(self.__model_file, dim_name) def ref_dim_size(self, dim_name): if self.is_ref_data_split(): return self.dim_size(self.__reference_file, dim_name) return self.dim_size(self.__model_file, dim_name) def dim_size(self, ncfile, dim_name): return ncfile.get_dim_size(dim_name) def __dimension_string(self, ncfile, variable_name): return ncfile.get_dimension_string(variable_name) def is_ref_data_split(self): return hasattr(self, '_Data__reference_file') def reference_records_count(self, dimension_profile): ref_vars = self.ref_vars() if not ref_vars: return 0 if self.is_ref_data_split(): ncfile = self.__reference_file else: ncfile = self.__model_file dim_size = 0 for var in ref_vars: dimensions = self.__dimension_string(ncfile, var).split(' ') dimension_set = {x for x in dimensions} if dimension_profile == dimension_set: temp = 1 for dim in dimension_profile: temp *= self.dim_size(ncfile, dim) dim_size += temp return dim_size def get_reference_dimensions(self, variable_name=None): if self.is_ref_data_split(): ncfile = self.__reference_file else: ncfile = self.__model_file return ncfile.get_dimensions(variable_name) def get_model_dimensions(self, variable_name=None): if not hasattr(self, 'model_dimensions'): self.model_dimensions = {} if not variable_name in self.model_dimensions: self.model_dimensions[ variable_name] = self.__model_file.get_dimensions( variable_name) return self.model_dimensions[variable_name] def read_model(self, variable_name, origin=None): return self.__read(self.__model_file, variable_name, origin) def read_reference(self, variable_name, origin=None): ncfile = self.__reference_file if self.is_ref_data_split( ) else self.__model_file return self.__read(ncfile, variable_name, origin) def find_item_to_delete(self): self.cached_list = sorted(self.cached_list, key=self.compute_variable_size, reverse=True) return self.cached_list.pop(0) def get_current_cache_size(self): return sum([ self.compute_variable_size(variable_name) for variable_name in self.cached_list ]) def ensure_memory(self, variable_size): will_cache_overflow = self.max_cache_size <= self.current_memory + variable_size while will_cache_overflow and len(self.cached_list) > 0: var_to_delete = self.find_item_to_delete() self.current_memory -= self.compute_variable_size(var_to_delete) logging.debug('Deleting variable \'%s\' from cache.' % var_to_delete) self.__delattr__(var_to_delete) will_cache_overflow = self.max_cache_size <= self.current_memory + variable_size if not os.name == 'nt': logging.debug( 'Memory in use after \'ensure_memory\' called: %.2f MB' % (mem() / 1024)) def __read(self, ncfile, variable_name, origin=None): variable_size = self.compute_variable_size(variable_name) if not self.__is_cached(variable_name): self.ensure_memory(variable_size) logging.debug('Reading variable \'%s\' fully into cache.' % variable_name) if not os.name == 'nt': logging.debug( 'Memory in use before reading variable %s fully into cache: %.2f MB' % (variable_name, mem() / 1024)) variable = ncfile.get_variable(variable_name) self.__setattr__(variable_name, variable[:]) self.cached_list.append(variable_name) self.current_memory += self.compute_variable_size(variable_name) if not os.name == 'nt': logging.debug( 'Memory in use after reading variable %s fully into cache: %.2f MB' % (variable_name, mem() / 1024)) if origin is None: return ma.array(self.__getattribute__(variable_name)) return self.get_data(origin, variable_name) def get_data(self, origin, variable_name): ''' Read single pixel from origin ''' return self.__getattribute__(variable_name)[tuple(origin)] def __is_cached(self, variable_name): return variable_name in self.cached_list def __find_model_variable_name(self, possible_names, standard_name): for name in possible_names: if self.__model_file.get_variable(name) is not None: return name for var in self.__model_file.get_coordinate_variables(): if self.__model_file.attribute(var.name, 'standard_name') == standard_name: return var.name raise ValueError('Unable to find \'%s\'-variable.' % standard_name) def find_model_latitude_variable_name(self): return self.__find_model_variable_name(['lat', 'latitude'], 'latitude') def find_model_longitude_variable_name(self): return self.__find_model_variable_name(['lon', 'longitude'], 'longitude') def unit(self, variable_name): is_not_in_ref_file = not self.is_ref_data_split( ) or self.is_ref_data_split( ) and self.__reference_file.get_variable(variable_name) is None is_not_in_model_file = self.__model_file.get_variable( variable_name) is None if is_not_in_model_file and is_not_in_ref_file: raise ValueError('Variable \'%s\' not found.' % variable_name) model_unit = utils.get_unit(self.__model_file, variable_name) if model_unit: return model_unit if self.is_ref_data_split() and utils.get_unit(self.__reference_file, variable_name): return utils.get_unit(self.__reference_file, variable_name) return None def compute_variable_size(self, variable_name): variable = self.__model_file.get_variable(variable_name) if variable is None and self.is_ref_data_split(): variable = self.__reference_file.get_variable(variable_name) if variable is None: raise ValueError('No variable found with name \'%s\'' % variable_name) return compute_array_size(variable.shape, variable.dtype.itemsize) def has_one_dim_ref_var(self): for var in self.ref_vars(): if len(self.get_reference_dimensions(var)) == 1: return True return False def get_values(self, ref_name, model_name): model_values_slices, ref_values_slices = self.get_slices( model_name, ref_name) model_values = self.read_model(model_name)[model_values_slices] reference_values = self.read_reference(ref_name)[ref_values_slices] reference_values.mask = reference_values.mask | model_values.mask model_values.mask = reference_values.mask | model_values.mask logging.debug('Compressing ref-variable %s' % ref_name) reference_values = reference_values.compressed() logging.debug('Compressing model variable %s' % model_name) model_values = model_values.compressed() return reference_values, model_values def get_slices(self, model_name, ref_name): differing_dim_names = self.get_differing_dim_names( model_name, ref_name) differing_model_dimension_var_indices = self.__get_differing_model_dimension_var_indices( differing_dim_names) model_values_slices = [] for dim in self.get_model_dimensions(model_name): if dim in differing_dim_names.keys(): index = differing_model_dimension_var_indices[dim] model_values_slices.append(slice(index, index + 1)) else: model_values_slices.append(slice(None)) ref_values_slices = [] for dim in self.get_reference_dimensions(ref_name): if dim in differing_dim_names.values(): ref_values_slices.append(slice(0, 1)) else: ref_values_slices.append(slice(0, self.ref_dim_size(dim))) return model_values_slices, ref_values_slices def get_differing_dim_names(self, model_var, ref_var): dim_names = {} model_dims = self.__dimension_string(self.__model_file, model_var) ref_file = self.__reference_file if self.is_ref_data_split( ) else self.__model_file ref_dims = self.__dimension_string(ref_file, ref_var) if model_dims == ref_dims: return dim_names model_dims_list = model_dims.split(' ') ref_dims_list = ref_dims.split(' ') if not len(model_dims_list) == len(ref_dims_list): raise ValueError( 'model and gridded ref variable need to have identical dimension count' ) for index, model_dim in enumerate(model_dims_list): ref_dim = ref_dims_list[index] if not ref_dim == model_dim: dim_names[model_dim] = ref_dim return dim_names def __get_differing_model_dimension_var_indices(self, differing_dim_names): differing_model_dimension_var_indices = {} for differing_dim in differing_dim_names.keys(): differing_model_dim = differing_dim differing_ref_dim = differing_dim_names[differing_dim] differing_dim_model_values = self.read_model(differing_model_dim) # we're assuming here that such differing dimensions for ref data have only a single value differing_dim_ref_value = self.read_reference( differing_ref_dim, (0, )) dim_var_index = self.__get_dimension_var_index( differing_dim_model_values, differing_dim_ref_value) differing_model_dimension_var_indices[ differing_model_dim] = dim_var_index return differing_model_dimension_var_indices def __get_dimension_var_index(self, dim_values, ref_value): # find out grid position where differing dimension variables are nearest min_delta = float('inf') index = -1 for loop_index, model_dim_value in enumerate(dim_values): current_delta = abs(model_dim_value - ref_value) if current_delta < min_delta: min_delta = current_delta index = loop_index return index