def get_variable_metadata_from_request_dataset(driver, variable): variables_metadata = get_group(driver.metadata_source, variable.group, has_root=False)['variables'] try: ret = variables_metadata[variable._source_name] except KeyError: raise VariableMissingMetadataError(variable._source_name) return ret
def test_create_dist(self): def _create_dimensions_(ds, k): if k.dim_count > 0: ds.createDimension('one', 1) if k.dim_count == 2: ds.createDimension('two', 2) kwds = dict(dim_count=[0, 1, 2], nested=[False, True]) for k in self.iter_product_keywords(kwds): path = self.get_temporary_file_path('{}.nc'.format(k.dim_count)) with self.nc_scope(path, 'w') as ds: _create_dimensions_(ds, k) if k.nested: group1 = ds.createGroup('nest1') _create_dimensions_(group1, k) group2 = group1.createGroup('nest2') _create_dimensions_(group2, k) group3 = group2.createGroup('nest1') _create_dimensions_(group3, k) group3a = group2.createGroup('nest3') _create_dimensions_(group3a, k) group3.createDimension('outlier', 4) rd = RequestDataset(uri=path) driver = DriverNetcdf(rd) actual = driver.create_dist().mapping # All dimensions are not distributed. for keyseq in iter_all_group_keys(actual[MPI_RANK]): group = get_group(actual[MPI_RANK], keyseq) for dim in list(group['dimensions'].values()): self.assertFalse(dim.dist) if k.dim_count == 0 and k.nested: desired = {None: {'variables': {}, 'dimensions': {}, 'groups': { u'nest1': {'variables': {}, 'dimensions': {}, 'groups': { u'nest2': {'variables': {}, 'dimensions': {}, 'groups': {u'nest3': {'variables': {}, 'dimensions': {}, 'groups': {}}, u'nest1': {'variables': {}, 'dimensions': { u'outlier': Dimension(name='outlier', size=4, size_current=4, dist=False, is_empty=False, src_idx='auto')}, 'groups': {}}}}}}}}} self.assertEqual(actual[MPI_RANK], desired) if k.dim_count == 2 and k.nested: self.assertIsNotNone(driver.metadata_source['groups']['nest1']['groups']['nest2']) two_dimensions = [Dimension(name='one', size=1, size_current=1), Dimension(name='two', size=2, size_current=2)] nest1 = {'dimensions': two_dimensions, 'groups': {}} template = deepcopy(nest1) nest1['groups']['nest2'] = deepcopy(template) nest1['groups']['nest2']['groups']['nest1'] = deepcopy(template) nest1['groups']['nest2']['groups']['nest3'] = deepcopy(template) nest1['groups']['nest2']['groups']['nest1']['dimensions'].append(Dimension('outlier', 4)) desired = {None: {'dimensions': two_dimensions, 'groups': {'nest1': nest1}}} groups_actual = list(iter_all_group_keys((actual[MPI_RANK]))) groups_desired = list(iter_all_group_keys(desired)) self.assertEqual(groups_actual, groups_desired)
def create_dist(self, metadata=None): """ Create a distribution from global metadata. In general, this should not be overloaded by subclasses. :param dict metadata: Global metadata to use for creating a distribution. :rtype: :class:`ocgis.OcgDist` """ ompi = OcgDist(size=vm.size, ranks=vm.ranks) # Convert metadata into a grouping consistent with the MPI dimensions. if metadata is None: metadata = self.metadata_source metadata = {None: metadata} for group_index in iter_all_group_keys(metadata): group_meta = get_group(metadata, group_index) # Add the dimensions to the distribution object. dimensions = self.create_dimensions(group_meta) # Only build a distribution if the group has more than one dimension. if len(dimensions) == 0: _ = ompi.get_group(group=group_index) else: for dimension_name, dimension_meta in list( group_meta['dimensions'].items()): target_dimension = dimensions[dimension_name] target_dimension.dist = group_meta['dimensions'][ dimension_name].get('dist', False) ompi.add_dimension(target_dimension, group=group_index) try: dimension_map = self.rd.dimension_map.get_group( group_index) except DimensionMapError: # Likely a user-provided dimension map. continue # dimension_map = get_group(self.rd.dimension_map, group_index, has_root=False) distributed_dimension_name = self.get_distributed_dimension_name( dimension_map, group_meta['dimensions'], decomp_type=self.rd.decomp_type) # Allow no distributed dimensions to be returned. if distributed_dimension_name is not None: for target_rank in range(ompi.size): distributed_dimension = ompi.get_dimension( distributed_dimension_name, group=group_index, rank=target_rank) distributed_dimension.dist = True ompi.update_dimension_bounds() return ompi
def create_dist(self, metadata=None): """ Create a distribution from global metadata. In general, this should not be overloaded by subclasses. :param dict metadata: Global metadata to use for creating a distribution. :rtype: :class:`ocgis.OcgDist` """ ompi = OcgDist(size=vm.size, ranks=vm.ranks) # Convert metadata into a grouping consistent with the MPI dimensions. if metadata is None: metadata = self.metadata_source metadata = {None: metadata} for group_index in iter_all_group_keys(metadata): group_meta = get_group(metadata, group_index) # Add the dimensions to the distribution object. dimensions = self.create_dimensions(group_meta) # Only build a distribution if the group has more than one dimension. if len(dimensions) == 0: _ = ompi.get_group(group=group_index) else: for dimension_name, dimension_meta in list(group_meta['dimensions'].items()): target_dimension = dimensions[dimension_name] target_dimension.dist = group_meta['dimensions'][dimension_name].get('dist', False) ompi.add_dimension(target_dimension, group=group_index) try: dimension_map = self.rd.dimension_map.get_group(group_index) except DimensionMapError: # Likely a user-provided dimension map. continue # dimension_map = get_group(self.rd.dimension_map, group_index, has_root=False) distributed_dimension_name = self.get_distributed_dimension_name(dimension_map, group_meta['dimensions'], decomp_type=self.rd.decomp_type) # Allow no distributed dimensions to be returned. if distributed_dimension_name is not None: for target_rank in range(ompi.size): distributed_dimension = ompi.get_dimension(distributed_dimension_name, group=group_index, rank=target_rank) distributed_dimension.dist = True ompi.update_dimension_bounds() return ompi
def get_dist(self): """ :rtype: :class:`ocgis.OcgVM` """ ompi = OcgDist(size=vm.size, ranks=vm.ranks) # ompi = OcgDist() # Convert metadata into a grouping consistent with the MPI dimensions. metadata = {None: self.metadata_source} for group_index in iter_all_group_keys(metadata): group_meta = get_group(metadata, group_index) # Add the dimensions to the distribution object. dimensions = self._get_dimensions_main_(group_meta) for dimension_name, dimension_meta in list(group_meta['dimensions'].items()): target_dimension = find_dimension_in_sequence(dimension_name, dimensions) target_dimension.dist = group_meta['dimensions'][dimension_name].get('dist', False) ompi.add_dimension(target_dimension, group=group_index) try: dimension_map = self.rd.dimension_map.get_group(group_index) except DimensionMapError: # Likely a user-provided dimension map. continue # dimension_map = get_group(self.rd.dimension_map, group_index, has_root=False) distributed_dimension_name = self.get_distributed_dimension_name(dimension_map, group_meta['dimensions']) # Allow no distributed dimensions to be returned. if distributed_dimension_name is not None: for target_rank in range(ompi.size): distributed_dimension = ompi.get_dimension(distributed_dimension_name, group=group_index, rank=target_rank) distributed_dimension.dist = True # Add the variables to the distribution object. for variable_name, variable_meta in list(group_meta['variables'].items()): ompi.add_variable(variable_name, dimensions=variable_meta['dimensions'], group=group_index) # tdk: this will have to be moved to account for slicing ompi.update_dimension_bounds() return ompi
def init_variable_using_metadata_for_netcdf(variable, metadata): source = get_group(metadata, variable.group, has_root=False) desired_name = variable.source_name var = source['variables'][desired_name] if vm.is_null: variable.convert_to_empty() else: # Update data type and fill value. if is_auto_dtype( variable._dtype) or var.get('dtype_packed') is not None: var_dtype = var['dtype'] desired_dtype = deepcopy(var_dtype) if isinstance(var_dtype, VLType): desired_dtype = ObjectType(var_dtype) elif var['dtype_packed'] is not None: desired_dtype = deepcopy(var['dtype_packed']) variable._dtype = desired_dtype if variable._fill_value == 'auto': if var.get('fill_value_packed') is not None: desired_fill_value = var['fill_value_packed'] else: desired_fill_value = var.get('fill_value') variable._fill_value = deepcopy(desired_fill_value) variable_attrs = variable._attrs # Offset and scale factors are not supported by OCGIS. The data is unpacked when written to a new output file. # TODO: Consider supporting offset and scale factors for write operations. exclude = ['add_offset', 'scale_factor'] for k, v in list(var.get('attrs', {}).items()): if k in exclude: continue if k not in variable_attrs: variable_attrs[k] = deepcopy(v) # The conform units to value should be the default units value. Units will be converted on variable load. conform_units_to = var.get('conform_units_to') if conform_units_to is not None: variable_attrs['units'] = conform_units_to
def init_variable_using_metadata_for_netcdf(variable, metadata): source = get_group(metadata, variable.group, has_root=False) desired_name = variable.source_name var = source['variables'][desired_name] if vm.is_null: variable.convert_to_empty() else: # Update data type and fill value. if is_auto_dtype(variable._dtype) or var.get('dtype_packed') is not None: var_dtype = var['dtype'] desired_dtype = deepcopy(var_dtype) if isinstance(var_dtype, VLType): desired_dtype = ObjectType(var_dtype) elif var['dtype_packed'] is not None: desired_dtype = deepcopy(var['dtype_packed']) variable._dtype = desired_dtype if variable._fill_value == 'auto': if var.get('fill_value_packed') is not None: desired_fill_value = var['fill_value_packed'] else: desired_fill_value = var.get('fill_value') variable._fill_value = deepcopy(desired_fill_value) variable_attrs = variable._attrs # Offset and scale factors are not supported by OCGIS. The data is unpacked when written to a new output file. # TODO: Consider supporting offset and scale factors for write operations. exclude = ['add_offset', 'scale_factor'] for k, v in list(var.get('attrs', {}).items()): if k in exclude: continue if k not in variable_attrs: variable_attrs[k] = deepcopy(v) # The conform units to value should be the default units value. Units will be converted on variable load. conform_units_to = var.get('conform_units_to') if conform_units_to is not None: variable_attrs['units'] = conform_units_to
def iter_group_keys(ddict, keyseq): for key in get_group(ddict, keyseq).get('groups', {}): yld = deepcopy(keyseq) yld.append(key) yield yld
def get_group_metadata(group_index, metadata, has_root=False): return get_group(metadata, group_index, has_root=has_root)
def iter_groups(self, rank=MPI_RANK): from ocgis.driver.base import iter_all_group_keys mapping = self.mapping[rank] for group_key in iter_all_group_keys(mapping): group_data = get_group(mapping, group_key) yield group_key, group_data
def test_get_dist(self): def _create_dimensions_(ds, k): if k.dim_count > 0: ds.createDimension('one', 1) if k.dim_count == 2: ds.createDimension('two', 2) kwds = dict(dim_count=[0, 1, 2], nested=[False, True]) for k in self.iter_product_keywords(kwds): path = self.get_temporary_file_path('{}.nc'.format(k.dim_count)) with self.nc_scope(path, 'w') as ds: _create_dimensions_(ds, k) if k.nested: group1 = ds.createGroup('nest1') _create_dimensions_(group1, k) group2 = group1.createGroup('nest2') _create_dimensions_(group2, k) group3 = group2.createGroup('nest1') _create_dimensions_(group3, k) group3a = group2.createGroup('nest3') _create_dimensions_(group3a, k) group3.createDimension('outlier', 4) rd = RequestDataset(uri=path) driver = DriverNetcdf(rd) actual = driver.get_dist().mapping # All dimensions are not distributed. for keyseq in iter_all_group_keys(actual[MPI_RANK]): group = get_group(actual[MPI_RANK], keyseq) for dim in list(group['dimensions'].values()): self.assertFalse(dim.dist) if k.dim_count == 0 and k.nested: desired = { None: { 'variables': {}, 'dimensions': {}, 'groups': { 'nest1': { 'variables': {}, 'dimensions': {}, 'groups': { 'nest2': { 'variables': {}, 'dimensions': {}, 'groups': { 'nest1': { 'variables': {}, 'dimensions': { 'outlier': Dimension(name='outlier', size=4, size_current=4, dist=False, src_idx='auto') }, 'groups': {} } } } } } } } } self.assertEqual(actual[MPI_RANK], desired) if k.dim_count == 2 and k.nested: self.assertIsNotNone(driver.metadata_source['groups']['nest1'] ['groups']['nest2']) two_dimensions = [ Dimension(name='one', size=1, size_current=1), Dimension(name='two', size=2, size_current=2) ] nest1 = {'dimensions': two_dimensions, 'groups': {}} template = deepcopy(nest1) nest1['groups']['nest2'] = deepcopy(template) nest1['groups']['nest2']['groups']['nest1'] = deepcopy( template) nest1['groups']['nest2']['groups']['nest3'] = deepcopy( template) nest1['groups']['nest2']['groups']['nest1'][ 'dimensions'].append(Dimension('outlier', 4)) desired = { None: { 'dimensions': two_dimensions, 'groups': { 'nest1': nest1 } } } groups_actual = list(iter_all_group_keys((actual[MPI_RANK]))) groups_desired = list(iter_all_group_keys(desired)) self.assertEqual(groups_actual, groups_desired)