def test_find_missval(): grid = GridType("my_grid") assert find_missval(grid) == None grid.attributes["missing_value"] = np.array([9999]) assert find_missval(grid) == 9999 # _FillValue should take precendence over missing_value grid.attributes["_FillValue"] = np.array([0]) assert find_missval(grid) == 0
def simple_grid_dataset(): """ @brief Create a simple dap grid dataset Just use the pydap interface - passing dicts does not make sense here. """ # Convert metadata and data to a dap dataset ds = DatasetType(name='SimpleGridData') g = GridType(name='grid') data = numpy.arange(24.) data.shape = (4, 2, 3) # The name in the dictionary must match the name in the basetype g['a'] = BaseType(name='a', data=data, shape=data.shape, type=Float32, dimensions=('time', 'x', 'y')) g['time'] = BaseType(name='time', data=numpy.arange(4.), shape=(4, ), type=Float64) g['x'] = BaseType(name='x', data=numpy.arange(2.), shape=(2, ), type=Float64) g['y'] = BaseType(name='y', data=numpy.arange(3.), shape=(3, ), type=Float64) ds[g.name] = g return ds
def stream(self): sz = 10 time = numpy.arange(float(self.index), float(self.index + sz)) self.index += sz data = numpy.arange(float(sz)) for ind in range(sz): data[ind] = numpy.random.random() ds = DatasetType(name='SimpleGridData') g = GridType(name='Time Series') # The name in the dictionary must match the name in the basetype g['timeseries'] = BaseType(name='timeseries', data=data, shape=data.shape, type=Float32, dimensions=('time')) g['time'] = BaseType(name='time', data=time, shape=(sz, ), type=Float32) ds[g.name] = g msg = dap_tools.ds2dap_msg(ds) yield self.send(self.deliver, 'data', msg.encode())
def mean(dataset, var, axis=0): """Calculate the mean of an array along a given axis. The input variable should be either a ``GridType`` or ``BaseType``. The function will return an object of the same type with the mean applied. """ if not isinstance(var, (GridType, BaseType)): raise ConstraintExpressionError( 'Function "mean" should be used on an array or grid.') axis = int(axis) dims = tuple(dim for i, dim in enumerate(var.dimensions) if i != axis) # process basetype if isinstance(var, BaseType): return BaseType(name=var.name, data=np.mean(var.data[:], axis=axis), dimensions=dims, attributes=var.attributes) # process grid out = GridType(name=var.name, attributes=var.attributes) out[var.array.name] = BaseType(name=var.array.name, data=np.mean(var.array.data[:], axis=axis), dimensions=dims, attributes=var.array.attributes) for dim in dims: out[dim] = BaseType(name=dim, data=var[dim].data[:], dimensions=(dim, ), attributes=var[dim].attributes) return out
def gridtype_example(): """Create a simple grid.""" example = GridType("example") example["a"] = BaseType("a", data=np.arange(30*50).reshape(30, 50)) example["x"] = BaseType("x", data=np.arange(30)) example["y"] = BaseType("y", data=np.arange(50)) return example
def setUp(self): sz = 12 time = numpy.arange(float(0), float(sz)) data = numpy.arange(float(sz)) for ind in range(sz): data[ind] = numpy.random.random() ds = DatasetType(name='SimpleGridData') g = GridType(name='TimeSeries') # The name in the dictionary must match the name in the basetype g['timeseries'] = BaseType(name='timeseries', data=data, shape=data.shape, type=Float32, dimensions=('time')) g['time'] = BaseType(name='time', data=time, shape=(sz, ), type=Float32) ds[g.name] = g self.ds1 = ds self.tc = timeseries_consumer.TimeseriesConsumer() yield self.tc.plc_init()
def setUp(self): """Create a simple grid.""" example = GridType("example") example["a"] = BaseType("a", data=np.arange(30 * 50).reshape(30, 50)) example["x"] = BaseType("x", data=np.arange(30)) example["y"] = BaseType("y", data=np.arange(50)) self.example = example
def grid(self): """Parse a DAP grid, returning a ``GridType``.""" grid = GridType('nameless') self.consume('grid') self.consume('{') self.consume('array') self.consume(':') array = self.base() grid[array.name] = array self.consume('maps') self.consume(':') while not self.peek('}'): var = self.base() grid[var.name] = var self.consume('}') grid.name = quote(self.consume('[^;]+')) self.consume(';') return grid
def demo_dataset(): """ @Brief Example methods for creating a dataset http://pydap.org/developer.html#the-dap-data-model """ #Create a dataset object ds = DatasetType(name='Mine') #Add Some attributes ds.attributes['history'] = 'David made a dataset' ds.attributes['conventions'] = 'OOIs special format' # Create some data and put it in a variable varname = 'var1' data = (1, 2, 3, 4, 5, 8) shape = (8, ) type = Int32 # dims = ('time', ) attributes = {'long_name': 'long variable name one'} ds[varname] = BaseType(name=varname, data=data, shape=shape, dimensions=dims, type=type, attributes=attributes) # Now make a grid data object g = GridType(name='g') data = numpy.arange(6.) data.shape = (2, 3) # The name in the dictionary must match the name in the basetype g['a'] = BaseType(name='a', data=data, shape=data.shape, type=Float32, dimensions=('x', 'y')) g['x'] = BaseType(name='x', data=numpy.arange(2.), shape=(2, ), type=Float64) g['y'] = BaseType(name='y', data=numpy.arange(3.), shape=(3, ), type=Float64) ds[g.name] = g return ds
def make_grid(self, response, name, data, time_data, attrs, time_attrs, dims, ttype): grid = GridType(name=name) grid[name] = BaseType(name=name, data=data, type=ttype, attributes=attrs, dimensions=dims, shape=data.shape) grid[dims[0]] = BaseType(name=dims[0], data=time_data, type=time_data.dtype.char, attributes=time_attrs, dimensions=dims, shape=time_data.shape) return grid
def __init__(self, filepath): BaseHandler.__init__(self) self.filepath = filepath try: with netcdf_file(self.filepath, 'r') as source: self.additional_headers.append(('Last-modified', (formatdate( time.mktime(time.localtime( os.stat(filepath)[ST_MTIME])))))) # shortcuts vars = source.variables dims = source.dimensions # build dataset name = os.path.split(filepath)[1] self.dataset = DatasetType( name, attributes=dict(NC_GLOBAL=attrs(source))) for dim in dims: if dims[dim] is None: self.dataset.attributes['DODS_EXTRA'] = { 'Unlimited_Dimension': dim, } break # add grids grids = [var for var in vars if var not in dims] for grid in grids: self.dataset[grid] = GridType(grid, attrs(vars[grid])) # add array self.dataset[grid][grid] = BaseType( grid, LazyVariable(source, grid, grid, self.filepath), vars[grid].dimensions, attrs(vars[grid])) # add maps for dim in vars[grid].dimensions: self.dataset[grid][dim] = BaseType( dim, vars[dim][:], None, attrs(vars[dim])) # add dims for dim in dims: self.dataset[dim] = BaseType(dim, vars[dim][:], None, attrs(vars[dim])) except Exception as exc: raise message = 'Unable to open file %s: %s' % (filepath, exc) raise OpenFileError(message)
def add_variables(dataset, h5, level=0): assert type(h5) in (h5py.File, h5py.Group, h5py.Dataset) name = h5.name.lstrip('/') attrs = process_attrs(h5.attrs) # struct if type(h5) in (h5py.File, h5py.Group): foo = StructureType(name, attributes=attrs) name = foo.name dataset[name] = foo for bar in h5.values(): add_variables(dataset[name], bar, level + 1) return # Recursion base cases rank = len(h5.shape) # basetype if rank == 0: dataset[name] = BaseType(name, data=Hdf5Data(h5), dimensions=(), attributes=attrs) # sequence? #elif rank == 1: # dataset[name] = SequenceType(name, data=h5, attributes=h5.attrs) # grid elif is_gridded(h5): parent = dataset[name] = GridType(name, attributes=attrs) dims = tuple([d.values()[0].name.lstrip('/') for d in h5.dims]) logger.debug("DIMENSIONS: {}".format(dims)) parent[name] = BaseType( name, data=Hdf5Data(h5), dimensions=dims, attributes=attrs) # Add the main variable for dim in h5.dims: # and all of the dimensions add_variables( parent, dim[0], level + 1) # Why would dims have more than one h5py.Dataset? # BaseType else: dataset[name] = BaseType(name, data=Hdf5Data(h5), attributes=attrs)
def handle_dds(self, coverage, dataset, fields): cov = coverage try: time_name = coverage.temporal_parameter_name time_context = coverage.get_parameter_context(time_name) time_attrs = self.get_attrs(cov, time_name) time_base = BaseType(time_name, type=self.dap_type(time_context), attributes=time_attrs, dimensions=(time_name, ), shape=(coverage.num_timesteps, )) dataset[time_name] = time_base except: log.exception('Problem reading cov %s', str(cov)) raise # Can't do much without time for var in fields: while var: name, slice_ = var.pop(0) name = urllib.unquote(name) if name == time_name: continue # Already added to the dataset try: grid = GridType(name=name) context = coverage.get_parameter_context(name) attrs = self.get_attrs(cov, name) grid[name] = BaseType(name=name, type=self.dap_type(context), attributes=attrs, dimensions=(time_name, ), shape=(coverage.num_timesteps, )) grid[cov.temporal_parameter_name] = time_base dataset[name] = grid except Exception: log.exception('Problem reading cov %s', str(cov)) continue return dataset
("float", 1000.0), ])) SimpleStructure['types']['b'] = BaseType('b', np.array(0, np.byte)) SimpleStructure['types']['i32'] = BaseType('i32', np.array(1, np.int32)) SimpleStructure['types']['ui32'] = BaseType('ui32', np.array(0, np.uint32)) SimpleStructure['types']['i16'] = BaseType('i16', np.array(0, np.int16)) SimpleStructure['types']['ui16'] = BaseType('ui16', np.array(0, np.uint16)) SimpleStructure['types']['f32'] = BaseType('f32', np.array(0.0, np.float32)) SimpleStructure['types']['f64'] = BaseType('f64', np.array(1000., np.float64)) SimpleStructure['types']['s'] = BaseType( 's', np.array("This is a data test string (pass 0).")) SimpleStructure['types']['u'] = BaseType('u', np.array("http://www.dods.org")) # test grid rain = DatasetType('test') rain['rain'] = GridType('rain') rain['rain']['rain'] = BaseType('rain', np.arange(6).reshape(2, 3), dimensions=('y', 'x')) rain['rain']['x'] = BaseType('x', np.arange(3), units='degrees_east') rain['rain']['y'] = BaseType('y', np.arange(2), units='degrees_north') # test for ``bounds`` function bounds = DatasetType('test') bounds['sequence'] = SequenceType('sequence') bounds['sequence']['lon'] = BaseType('lon', axis='X') bounds['sequence']['lat'] = BaseType('lat', axis='Y') bounds['sequence']['depth'] = BaseType('depth', axis='Z') bounds['sequence']['time'] = BaseType('time', axis='T', units="days since 1970-01-01")
SimpleStructure['types']['ub'] = BaseType('ub', np.array(10, np.ubyte)) SimpleStructure['types']['i32'] = BaseType('i32', np.array(-10, np.int32)) SimpleStructure['types']['ui32'] = BaseType('ui32', np.array(10, np.uint32)) SimpleStructure['types']['i16'] = BaseType('i16', np.array(-10, np.int16)) SimpleStructure['types']['ui16'] = BaseType('ui16', np.array(10, np.uint16)) SimpleStructure['types']['f32'] = BaseType('f32', np.array(100.0, np.float32)) SimpleStructure['types']['f64'] = BaseType('f64', np.array(1000., np.float64)) SimpleStructure['types']['s'] = BaseType( 's', np.array("This is a data test string (pass 0).")) SimpleStructure['types']['u'] = BaseType('u', np.array("http://www.dods.org")) SimpleStructure['types']['U'] = BaseType('U', np.array(u"test unicode", np.unicode)) # test grid rain = DatasetType('test') rain['rain'] = GridType('rain') rain['rain']['rain'] = BaseType('rain', np.arange(6).reshape(2, 3), dimensions=('y', 'x')) rain['rain']['x'] = BaseType('x', np.arange(3), units='degrees_east') rain['rain']['y'] = BaseType('y', np.arange(2), units='degrees_north') # test for ``bounds`` function bounds = DatasetType('test') bounds['sequence'] = SequenceType('sequence') bounds['sequence']['lon'] = BaseType('lon', axis='X') bounds['sequence']['lat'] = BaseType('lat', axis='Y') bounds['sequence']['depth'] = BaseType('depth', axis='Z') bounds['sequence']['time'] = BaseType('time', axis='T', units="days since 1970-01-01")
def dataset_from_str(name, buffer_str, mtime, directory='.', buffer_cache={}, dataset_cache={}, mtimes={}, known_infiles={}): from fstd2nc import Buffer from fstd2nc.mixins import _var_type, _axis_type, _dim_type from pydap.model import DatasetType, GridType, BaseType from os.path import basename, getmtime import numpy as np from collections import OrderedDict from datetime import datetime from argparse import ArgumentParser from os import chdir, path import shlex from glob import glob # Set the directory (to properly evaluate relative paths). chdir(directory) # Parse the arguments from the string. parser = ArgumentParser() parser.add_argument('infile', nargs='+') Buffer._cmdline_args(parser) buffer_args = shlex.split(buffer_str) buffer_args = parser.parse_args(buffer_args) buffer_args = vars(buffer_args) infiles = buffer_args.pop('infile') # Apply wildcard expansion to filenames. infiles = [ f for filepattern in infiles for f in sorted(glob(filepattern)) or [filepattern] ] # Make sure the filenames are strings (not unicode). infiles = list(map(str, infiles)) # Look at modification times of individual files. mtime = max(map(getmtime, infiles)) # Return a cached version of the dataset if nothing about the file(s) have # changed since last time. if name in dataset_cache and mtime <= mtimes[name] and known_infiles[ name] == infiles: return dataset_cache[name] mtimes[name] = mtime known_infiles[name] = infiles # Construct an fstd2nc Buffer object with the decoded FST data. buf = Buffer(infiles, **buffer_args) # Save a reference to the Buffer so the file reference(s) remain valid. buffer_cache[name] = buf # Get global metadata. global_metadata = buf._metadata.get('global', {}) # Add history to global metadata. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') history = timestamp + ": %s via Pydap+fstd2dap" % path.basename(name) global_metadata = {"history": history} # Construct a pydap Dataset object. dataset = DatasetType(name=path.basename(name), attributes=dict(NC_GLOBAL=global_metadata)) # Save this so it can be immediately returned next time it's requested. dataset_cache[name] = dataset # Split into vars / dims. buf = list(buf) variables = OrderedDict((var.name, var) for var in buf if not isinstance(var, (_axis_type, _dim_type))) dims = OrderedDict((var.name, var) for var in buf if isinstance(var, (_axis_type, _dim_type))) # Based loosely on pydap's builtin netcdf handler. for var in variables.values(): # Add grids. dataset[var.name] = GridType(var.name, var.atts) # Add array. dataset[var.name][var.name] = BaseType(var.name, var.array, var.dims, var.atts) # Add maps. for dim in var.dims: if dim in dims: if hasattr(dims[dim], 'array'): array = dims[dim].array atts = dims[dim].atts else: # Add "dummy" dimensions (or they're not interpreted properly by some # clients like Panoply). array = np.arange(len(dims[dim])) if hasattr(dims[dim], 'atts'): atts = dims[dim].atts else: atts = {} dataset[var.name][dim] = BaseType(dim, array, None, atts) for dim in dims.values(): if hasattr(dim, 'array'): array = dim.array else: # Add "dummy" dimensions (or they're not interpreted properly by some # clients like Panoply). array = np.arange(len(dim)) if hasattr(dim, 'atts'): atts = dim.atts else: atts = {} dataset[dim.name] = BaseType(dim.name, array, None, atts) # Handle unlimited dimension. if dim.name == 'time': dataset.attributes['DODS_EXTRA'] = { 'Unlimited_Dimension': dim.name, } return dataset