def _write_variable_collection_main_(cls, field, opened_or_path, write_mode, **kwargs): from ocgis.collection.field import Field if not isinstance(field, Field): raise ValueError('Only fields may be written to vector GIS formats.') fiona_crs = kwargs.get('crs') fiona_schema = kwargs.get('fiona_schema') fiona_driver = kwargs.get('fiona_driver', 'ESRI Shapefile') iter_kwargs = kwargs.pop('iter_kwargs', {}) iter_kwargs[KeywordArgument.DRIVER] = cls # This finds the geometry variable used in the iterator. Need for the general geometry type that may not be # determined using the record iterator. geom_variable = field.geom if geom_variable is None: raise ValueError('A geometry variable is required for writing to vector GIS formats.') # Open the output Fiona object using overloaded values or values determined at call-time. if not cls.inquire_opened_state(opened_or_path): if fiona_crs is None: if field.crs is not None: fiona_crs = field.crs.value _, archetype_record = next(field.iter(**iter_kwargs)) archetype_record = format_record_for_fiona(fiona_driver, archetype_record) if fiona_schema is None: fiona_schema = get_fiona_schema(geom_variable.geom_type, archetype_record) else: fiona_schema = opened_or_path.schema fiona_crs = opened_or_path.crs fiona_driver = opened_or_path.driver # The Fiona GeoJSON driver does not support update. if fiona_driver == 'GeoJSON': mode = 'w' else: mode = 'a' # Write the template file. if fiona_driver != 'GeoJSON': if vm.rank == 0 and write_mode != MPIWriteMode.FILL: with driver_scope(cls, opened_or_path=opened_or_path, mode='w', driver=fiona_driver, crs=fiona_crs, schema=fiona_schema) as _: pass # Write data on each rank to the file. if write_mode != MPIWriteMode.TEMPLATE: for rank_to_write in vm.ranks: if vm.rank == rank_to_write: with driver_scope(cls, opened_or_path=opened_or_path, mode=mode, driver=fiona_driver, crs=fiona_crs, schema=fiona_schema) as sink: itr = field.iter(**iter_kwargs) write_records_to_fiona(sink, itr, fiona_driver) vm.barrier()
def _get_metadata_main_(self): with driver_scope(self) as data: m = data.sc.get_meta(path=self.rd.uri, driver_kwargs=self.rd.driver_kwargs) geom_dimension_name = DimensionName.GEOMETRY_DIMENSION m['dimensions'] = { geom_dimension_name: { 'size': len(data), 'name': geom_dimension_name } } m['variables'] = OrderedDict() # Groups are not currently supported in vector formats but metadata expects groups. m['groups'] = OrderedDict() for p, d in list(m['schema']['properties'].items()): d = get_dtype_from_fiona_type(d) m['variables'][p] = { 'dimensions': (geom_dimension_name, ), 'dtype': d, 'name': p, 'attrs': OrderedDict() } m[VariableName.GEOMETRY_VARIABLE] = { 'dimensions': (geom_dimension_name, ), 'dtype': object, 'name': geom_dimension_name, 'attrs': OrderedDict() } return m
def get_variable_collection(self, **kwargs): with driver_scope(self) as ds: ret = read_from_collection(ds, self.rd, parent=None, uid=kwargs.pop('uid', None)) return ret
def get_value_from_request_dataset(variable): if variable.protected: raise PayloadProtectedError(variable.name) rd = variable._request_dataset with driver_scope(rd.driver) as source: if variable.group is not None: for vg in variable.group: if vg is None: continue else: source = source.groups[vg] desired_name = variable.source_name or rd.variable # Reference the variable in the source dataset. ncvar = source.variables[desired_name] # Allow multi-unit time values for temporal variables. if isinstance(variable, TemporalVariable) and isinstance( source, MFDataset) and rd.format_time: # MFTime may fail if time_bnds do not have a calendar attribute. # Use rd.dimension_map.set_bounds('time', None) to disable indexing on time_bnds. ncvar = MFTime(ncvar) ret = get_variable_value(ncvar, variable.dimensions) return ret
def get_value_from_request_dataset(variable): if variable.protected: raise PayloadProtectedError(variable.name) rd = variable._request_dataset with driver_scope(rd.driver) as source: if variable.group is not None: for vg in variable.group: if vg is None: continue else: source = source.groups[vg] desired_name = variable.source_name or rd.variable # Reference the variable in the source dataset. ncvar = source.variables[desired_name] # Allow multi-unit time values for temporal variables. if isinstance(variable, TemporalVariable) and isinstance(source, MFDataset) and rd.format_time: # MFTime may fail if time_bnds do not have a calendar attribute. # Use rd.dimension_map.set_bounds('time', None) to disable indexing on time_bnds. try: ncvar = MFTime(ncvar, units=variable.units, calendar=variable.calendar) except TypeError: # Older versions of netcdf4-python do not support the calendar argument. ncvar = MFTime(ncvar, units=variable.units) ret = get_variable_value(ncvar, variable.dimensions) return ret
def test_system_file_geodatabase(self, m_RequestDataset): """Test driver keyword arguments make their way to the iterator.""" driver_kwargs = {'feature_class': 'my_features'} m_RequestDataset.driver_kwargs = driver_kwargs m_RequestDataset.opened = None uri = 'a/file/geodatabase' m_RequestDataset.uri = uri driver = DriverVector(m_RequestDataset) driver.inquire_opened_state = mock.Mock(return_value=False) with driver_scope(driver) as gci: self.assertIsInstance(gci, GeomCabinetIterator) gci.sc._get_path_by_key_or_direct_path_ = mock.Mock( return_value=uri) gci.sc.get_meta = mock.Mock(return_value={}) gci.sc._get_features_object_ = mock.Mock( spec=GeomCabinet._get_features_object_, return_value=[]) with self.assertRaises(ValueError): for _ in gci: pass gci.sc._get_features_object_.assert_called_once_with( None, driver_kwargs={'feature_class': 'my_features'}, select_sql_where=None, select_uid=None, uid=None)
def get_variable_value(self, variable, as_geometry_iterator=False): # Iteration is always based on source indices. iteration_dimension = variable.dimensions[0] src_idx = iteration_dimension._src_idx if src_idx is None: raise ValueError("Iteration dimension must have a source index.") else: if iteration_dimension._src_idx_type == SourceIndexType.BOUNDS: src_idx = slice(*src_idx) # For vector formats based on loading via iteration, it makes sense to load all values with a single pass. with driver_scope(self, slc=src_idx) as g: if as_geometry_iterator: return (row['geom'] for row in g) ret = {} if variable.parent is None: ret[variable.name] = np.zeros(variable.shape, dtype=variable.dtype) for idx, row in enumerate(g): ret[variable.name][idx] = row['properties'][variable.name] else: ret = {} # Initialize the variable data as zero arrays. for v in list(variable.parent.values()): if not isinstance(v, CoordinateReferenceSystem): ret[v.name] = np.ma.array(np.zeros(v.shape, dtype=v.dtype), mask=False) # Fill those arrays. for idx, row in enumerate(g): for dv in list(variable.parent.values()): if isinstance( dv, (CoordinateReferenceSystem, GeometryVariable)): continue dv = dv.name try: ret[dv][idx] = row['properties'][dv] except TypeError: # Property value may be none. Set the data to masked if this is the case. if row['properties'][dv] is None: ret[dv].mask[idx] = True else: raise try: ret[constants.VariableName. GEOMETRY_VARIABLE][idx] = row['geom'] except KeyError: pass # Only supply a mask if something is actually masked. Otherwise, remove the mask. is_masked = any([v.mask.any() for v in ret.values()]) if not is_masked: for k, v in ret.items(): ret[k] = v.data return ret
def test_system_driver_kwargs(self, m_RequestDataset, m_MFDataset, m_Dataset): m_RequestDataset.driver_kwargs = {'clobber': False} m_RequestDataset.opened = None uri = 'a/path/foo.nc' m_RequestDataset.uri = uri driver = DriverNetcdf(m_RequestDataset) driver.inquire_opened_state = mock.Mock(return_value=False) with driver_scope(driver) as _: m_Dataset.assert_called_once_with(uri, mode='r', clobber=False) m_RequestDataset.driver_kwargs = {'aggdim': 'not_time'} m_RequestDataset.opened = None uri = ['a/path/foo1.nc', 'a/path/foo2.nc'] m_RequestDataset.uri = uri with driver_scope(driver) as _: m_MFDataset.assert_called_once_with(uri, aggdim='not_time')
def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode, **kwargs): raise_if_empty(vc) iter_kwargs = kwargs.pop(KeywordArgument.ITER_KWARGS, {}) fieldnames = list(six.next(vc.iter(**iter_kwargs))[1].keys()) if vm.rank == 0 and write_mode != MPIWriteMode.FILL: with driver_scope(cls, opened_or_path, mode='w') as opened: writer = csv.DictWriter(opened, fieldnames) writer.writeheader() if write_mode != MPIWriteMode.TEMPLATE: for current_rank_write in vm.ranks: if vm.rank == current_rank_write: with driver_scope(cls, opened_or_path, mode='a') as opened: writer = csv.DictWriter(opened, fieldnames) for _, record in vc.iter(**iter_kwargs): writer.writerow(record) vm.barrier()
def test_driver_scope(self): ocgis_driver = self.fixture_abstract_driver() ocgis_driver.rd.opened = None ocgis_driver.open = mock.Mock(return_value='opened dataset') ocgis_driver.close = mock.Mock(return_value='closed dataset') desired = {'my': 'driver arg'} ocgis_driver.rd.driver_kwargs = desired ocgis_driver.inquire_opened_state = mock.Mock(return_value=False) with driver_scope(ocgis_driver) as _: ocgis_driver.open.assert_called_once_with(my='driver arg', mode='r', rd=ocgis_driver.rd, uri=ocgis_driver.rd.uri)
def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode, **kwargs): assert write_mode is not None dataset_kwargs = kwargs.get('dataset_kwargs', {}) variable_kwargs = kwargs.get('variable_kwargs', {}) # When filling a dataset, we use append mode. if write_mode == MPIWriteMode.FILL: mode = 'a' else: mode = 'w' # For an asynchronous write, treat everything like a single rank. if write_mode == MPIWriteMode.ASYNCHRONOUS: possible_ranks = [0] else: possible_ranks = vm.ranks # Write the data on each rank. for idx, rank_to_write in enumerate(possible_ranks): # The template write only occurs on the first rank. if write_mode == MPIWriteMode.TEMPLATE and rank_to_write != 0: pass # If this is not a template write, fill the data. elif write_mode == MPIWriteMode.ASYNCHRONOUS or vm.rank == rank_to_write: with driver_scope(cls, opened_or_path=opened_or_path, mode=mode, **dataset_kwargs) as dataset: # Write global attributes if we are not filling data. if write_mode != MPIWriteMode.FILL: vc.write_attributes_to_netcdf_object(dataset) # This is the main variable write loop. variables_to_write = get_variables_to_write(vc) for variable in variables_to_write: # Load the variable's data before orphaning. The variable needs its parent to know which # group it is in. variable.load() # Call the individual variable write method in fill mode. Orphaning is required as a # variable will attempt to write its parent first. with orphaned(variable, keep_dimensions=True): variable.write(dataset, write_mode=write_mode, **variable_kwargs) # Recurse the children. for child in list(vc.children.values()): if write_mode != MPIWriteMode.FILL: group = nc.Group(dataset, child.name) else: group = dataset.groups[child.name] child.write(group, write_mode=write_mode, **kwargs) dataset.sync() vm.barrier()
def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode, **kwargs): raise_if_empty(vc) iter_kwargs = kwargs.pop(KeywordArgument.ITER_KWARGS, {}) fieldnames = list(six.next(vc.iter(**iter_kwargs))[1].keys()) if vm.rank == 0 and write_mode != MPIWriteMode.FILL: with driver_scope(cls, opened_or_path, mode='w') as opened: writer = csv.DictWriter(opened, fieldnames) writer.writeheader() if write_mode != MPIWriteMode.TEMPLATE: for current_rank_write in vm.ranks: if vm.rank == current_rank_write: with driver_scope(cls, opened_or_path, mode='a') as opened: writer = csv.DictWriter(opened, fieldnames) for _, record in vc.iter(**iter_kwargs): writer.writerow(record) vm.barrier()
def get_variable_value(self, variable, as_geometry_iterator=False): # Iteration is always based on source indices. iteration_dimension = variable.dimensions[0] src_idx = iteration_dimension._src_idx if src_idx is None: raise ValueError("Iteration dimension must have a source index.") else: if iteration_dimension._src_idx_type == SourceIndexType.BOUNDS: src_idx = slice(*src_idx) # For vector formats based on loading via iteration, it makes sense to load all values with a single pass. with driver_scope(self, slc=src_idx) as g: if as_geometry_iterator: return (row['geom'] for row in g) ret = {} if variable.parent is None: ret[variable.name] = np.zeros(variable.shape, dtype=variable.dtype) for idx, row in enumerate(g): ret[variable.name][idx] = row['properties'][variable.name] else: ret = {} # Initialize the variable data as zero arrays. for v in list(variable.parent.values()): if not isinstance(v, CoordinateReferenceSystem): ret[v.name] = np.ma.array(np.zeros(v.shape, dtype=v.dtype), mask=False) # Fill those arrays. for idx, row in enumerate(g): for dv in list(variable.parent.values()): if isinstance(dv, (CoordinateReferenceSystem, GeometryVariable)): continue dv = dv.name try: ret[dv][idx] = row['properties'][dv] except TypeError: # Property value may be none. Set the data to masked if this is the case. if row['properties'][dv] is None: ret[dv].mask[idx] = True else: raise try: ret[constants.VariableName.GEOMETRY_VARIABLE][idx] = row['geom'] except KeyError: pass # Only supply a mask if something is actually masked. Otherwise, remove the mask. is_masked = any([v.mask.any() for v in ret.values()]) if not is_masked: for k, v in ret.items(): ret[k] = v.data return ret
def test_driver_scope(self): ocgis_driver = self.fixture_abstract_driver() ocgis_driver.rd.opened = None ocgis_driver.open = mock.Mock(return_value='opened dataset') ocgis_driver.close = mock.Mock(return_value='closed dataset') desired = {'my': 'driver arg'} ocgis_driver.rd.driver_kwargs = desired ocgis_driver.inquire_opened_state = mock.Mock(return_value=False) with driver_scope(ocgis_driver) as _: ocgis_driver.open.assert_called_once_with(my='driver arg', mode='r', rd=ocgis_driver.rd, uri=ocgis_driver.rd.uri)
def _get_metadata_main_(self): with driver_scope(self) as f: meta = {} # Get variable names assuming headers are always on the first row. reader = csv.reader(f) variable_names = six.next(reader) # Fill in variable and dimension metadata. meta['variables'] = OrderedDict() meta['dimensions'] = OrderedDict() for varname in variable_names: meta['variables'][varname] = {'name': varname, 'dtype': object, 'dimensions': ('n_records',)} meta['dimensions']['n_records'] = {'name': 'n_records', 'size': sum(1 for _ in f)} return meta
def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode, **kwargs): assert write_mode is not None dataset_kwargs = kwargs.get('dataset_kwargs', {}) variable_kwargs = kwargs.get('variable_kwargs', {}) # When filling a dataset, we use append mode. if write_mode == MPIWriteMode.FILL: mode = 'a' else: mode = 'w' # For an asynchronous write, treat everything like a single rank. if write_mode == MPIWriteMode.ASYNCHRONOUS: possible_ranks = [0] else: possible_ranks = vm.ranks # Write the data on each rank. for idx, rank_to_write in enumerate(possible_ranks): # The template write only occurs on the first rank. if write_mode == MPIWriteMode.TEMPLATE and rank_to_write != 0: pass # If this is not a template write, fill the data. elif write_mode == MPIWriteMode.ASYNCHRONOUS or vm.rank == rank_to_write: with driver_scope(cls, opened_or_path=opened_or_path, mode=mode, **dataset_kwargs) as dataset: # Write global attributes if we are not filling data. if write_mode != MPIWriteMode.FILL: vc.write_attributes_to_netcdf_object(dataset) # This is the main variable write loop. variables_to_write = get_variables_to_write(vc) for variable in variables_to_write: # Load the variable's data before orphaning. The variable needs its parent to know which # group it is in. variable.load() # Call the individual variable write method in fill mode. Orphaning is required as a # variable will attempt to write its parent first. with orphaned(variable, keep_dimensions=True): variable.write(dataset, write_mode=write_mode, **variable_kwargs) # Recurse the children. for child in list(vc.children.values()): if write_mode != MPIWriteMode.FILL: group = nc.Group(dataset, child.name) else: group = dataset.groups[child.name] child.write(group, write_mode=write_mode, **kwargs) dataset.sync() vm.barrier()
def _get_metadata_main_(self): with driver_scope(self) as data: m = data.sc.get_meta(path=self.rd.uri, driver_kwargs=self.rd.driver_kwargs) geom_dimension_name = DimensionName.GEOMETRY_DIMENSION m['dimensions'] = {geom_dimension_name: {'size': len(data), 'name': geom_dimension_name}} m['variables'] = OrderedDict() # Groups are not currently supported in vector formats but metadata expects groups. m['groups'] = OrderedDict() for p, d in list(m['schema']['properties'].items()): d = get_dtype_from_fiona_type(d) m['variables'][p] = {'dimensions': (geom_dimension_name,), 'dtype': d, 'name': p, 'attrs': OrderedDict()} m[VariableName.GEOMETRY_VARIABLE] = {'dimensions': (geom_dimension_name,), 'dtype': object, 'name': geom_dimension_name, 'attrs': OrderedDict()} return m
def _get_metadata_main_(self): with driver_scope(self) as f: meta = {} # Get variable names assuming headers are always on the first row. reader = csv.reader(f) variable_names = six.next(reader) # Fill in variable and dimension metadata. meta['variables'] = OrderedDict() meta['dimensions'] = OrderedDict() for varname in variable_names: meta['variables'][varname] = { 'name': varname, 'dtype': object, 'dimensions': ('n_records', ) } meta['dimensions']['n_records'] = { 'name': 'n_records', 'size': sum(1 for _ in f) } return meta
def get_variable_value(self, variable): # For CSV files, it makes sense to load all variables from source simultaneously. if variable.parent is None: to_load = [variable] else: to_load = list(variable.parent.values()) with driver_scope(self) as f: reader = csv.DictReader(f) bounds_local = variable.dimensions[0].bounds_local for idx, row in enumerate(reader): if idx < bounds_local[0]: continue else: if idx >= bounds_local[1]: break for tl in to_load: if not tl.has_allocated_value: tl.allocate_value() tl.get_value()[idx - bounds_local[0]] = row[tl.name] return variable.get_value()
def test_system_file_geodatabase(self, m_RequestDataset): """Test driver keyword arguments make their way to the iterator.""" driver_kwargs = {'feature_class': 'my_features'} m_RequestDataset.driver_kwargs = driver_kwargs m_RequestDataset.opened = None uri = 'a/file/geodatabase' m_RequestDataset.uri = uri driver = DriverVector(m_RequestDataset) driver.inquire_opened_state = mock.Mock(return_value=False) with driver_scope(driver) as gci: self.assertIsInstance(gci, GeomCabinetIterator) gci.sc._get_path_by_key_or_direct_path_ = mock.Mock(return_value=uri) gci.sc.get_meta = mock.Mock(return_value={}) gci.sc._get_features_object_ = mock.Mock(spec=GeomCabinet._get_features_object_, return_value=[]) with self.assertRaises(ValueError): for _ in gci: pass gci.sc._get_features_object_.assert_called_once_with(None, driver_kwargs={'feature_class': 'my_features'}, select_sql_where=None, select_uid=None, uid=None)
def get_variable_value(self, variable): # For CSV files, it makes sense to load all variables from source simultaneously. if variable.parent is None: to_load = [variable] else: to_load = list(variable.parent.values()) with driver_scope(self) as f: reader = csv.DictReader(f) bounds_local = variable.dimensions[0].bounds_local for idx, row in enumerate(reader): if idx < bounds_local[0]: continue else: if idx >= bounds_local[1]: break for tl in to_load: if not tl.has_allocated_value: tl.allocate_value() tl.get_value()[idx - bounds_local[0]] = row[tl.name] return variable.get_value()
def remove_netcdf_attribute(filename, variable_name, attr_name): with driver_scope(DriverNetcdf, opened_or_path=filename, mode='a') as ds: var = ds[variable_name] var.delncattr(attr_name)
def _get_metadata_main_(self): with driver_scope(self) as ds: ret = parse_metadata(ds) return ret
def _get_metadata_main_(self): with driver_scope(self) as ds: ret = parse_metadata(ds) return ret
def remove_netcdf_attribute(filename, variable_name, attr_name): with driver_scope(DriverNetcdf, opened_or_path=filename, mode='a') as ds: var = ds[variable_name] var.delncattr(attr_name)
def _write_variable_collection_main_(cls, field, opened_or_path, write_mode, **kwargs): from ocgis.collection.field import Field if not isinstance(field, Field): raise ValueError( 'Only fields may be written to vector GIS formats.') fiona_crs = kwargs.get('crs') fiona_schema = kwargs.get('fiona_schema') fiona_driver = kwargs.get('fiona_driver', 'ESRI Shapefile') iter_kwargs = kwargs.pop('iter_kwargs', {}) iter_kwargs[KeywordArgument.DRIVER] = cls # This finds the geometry variable used in the iterator. Need for the general geometry type that may not be # determined using the record iterator. geom_variable = field.geom if geom_variable is None: raise ValueError( 'A geometry variable is required for writing to vector GIS formats.' ) # Open the output Fiona object using overloaded values or values determined at call-time. if not cls.inquire_opened_state(opened_or_path): if fiona_crs is None: if field.crs is not None: fiona_crs = field.crs.value _, archetype_record = next(field.iter(**iter_kwargs)) archetype_record = format_record_for_fiona(fiona_driver, archetype_record) if fiona_schema is None: fiona_schema = get_fiona_schema(geom_variable.geom_type, archetype_record) else: fiona_schema = opened_or_path.schema fiona_crs = opened_or_path.crs fiona_driver = opened_or_path.driver # The Fiona GeoJSON driver does not support update. if fiona_driver == 'GeoJSON': mode = 'w' else: mode = 'a' # Write the template file. if fiona_driver != 'GeoJSON': if vm.rank == 0 and write_mode != MPIWriteMode.FILL: with driver_scope(cls, opened_or_path=opened_or_path, mode='w', driver=fiona_driver, crs=fiona_crs, schema=fiona_schema) as _: pass # Write data on each rank to the file. if write_mode != MPIWriteMode.TEMPLATE: for rank_to_write in vm.ranks: if vm.rank == rank_to_write: with driver_scope(cls, opened_or_path=opened_or_path, mode=mode, driver=fiona_driver, crs=fiona_crs, schema=fiona_schema) as sink: itr = field.iter(**iter_kwargs) write_records_to_fiona(sink, itr, fiona_driver) vm.barrier()