def setUp(self): # create dataset self.dataset = DatasetType("test") self.dataset["foo["] = BaseType("foo[", np.array(1)) # create WSGI app self.app = BaseHandler(self.dataset)
def demo_dataset(): """ @Brief Example methods for creating a dataset http://pydap.org/developer.html#the-dap-data-model """ #Create a dataset object ds = DatasetType(name='Mine') #Add Some attributes ds.attributes['history']='David made a dataset' ds.attributes['conventions']='OOIs special format' # Create some data and put it in a variable varname = 'var1' data = (1,2,3,4,5,8) shape=(8,) type = Int32 # dims=('time',) attributes={'long_name':'long variable name one'} ds[varname] = BaseType(name=varname, data=data, shape=shape, dimensions=dims, type=type, attributes=attributes) # Now make a grid data object g = GridType(name='g') data = numpy.arange(6.) data.shape = (2, 3) # The name in the dictionary must match the name in the basetype g['a'] = BaseType(name='a', data=data, shape=data.shape, type=Float32, dimensions=('x', 'y')) g['x'] = BaseType(name='x', data=numpy.arange(2.), shape=(2,), type=Float64) g['y'] = BaseType(name='y', data=numpy.arange(3.), shape=(3,), type=Float64) ds[g.name]=g return ds
class TestQuote(unittest.TestCase): def setUp(self): # create dataset self.dataset = DatasetType("test") self.dataset["foo["] = BaseType("foo[", np.array(1)) # create WSGI app self.app = BaseHandler(self.dataset) def test_name(self): """Check that the name was properly escaped.""" self.assertEqual(list(self.dataset.keys()), ["foo%5B"]) def test_dds(self): text = Request.blank("/.dds").get_response(self.app).text self.assertEqual(text, """Dataset { Int32 foo%5B; } test; """) def test_request(self): text = Request.blank("/.dds?foo%255B").get_response(self.app).text self.assertEqual(text, """Dataset { Int32 foo%5B; } test; """) def test_client(self): dataset = open_url("http://localhost:8001/", application=self.app) self.assertEqual(list(self.dataset.keys()), ["foo%5B"]) self.assertEqual(dataset["foo["].name, "foo%5B") self.assertEqual(dataset["foo%5B"][0], 1)
def test_netcdf(sequence_type_data): """ Test that LocalTestServer works properly and that it works well with netcdf4-python. """ TestDataset = DatasetType('Test') TestDataset['float'] = BaseType('float', np.array(1, dtype=np.float32)) with TestDataset.to_netcdf() as ds: assert 'float' in ds.variables assert ds['float'].dtype == np.float32 assert ds['float'][:] == np.array(1, dtype=np.float32)
def demo_dataset(): """ @Brief Example methods for creating a dataset http://pydap.org/developer.html#the-dap-data-model """ #Create a dataset object ds = DatasetType(name='Mine') #Add Some attributes ds.attributes['history'] = 'David made a dataset' ds.attributes['conventions'] = 'OOIs special format' # Create some data and put it in a variable varname = 'var1' data = (1, 2, 3, 4, 5, 8) shape = (8, ) type = Int32 # dims = ('time', ) attributes = {'long_name': 'long variable name one'} ds[varname] = BaseType(name=varname, data=data, shape=shape, dimensions=dims, type=type, attributes=attributes) # Now make a grid data object g = GridType(name='g') data = numpy.arange(6.) data.shape = (2, 3) # The name in the dictionary must match the name in the basetype g['a'] = BaseType(name='a', data=data, shape=data.shape, type=Float32, dimensions=('x', 'y')) g['x'] = BaseType(name='x', data=numpy.arange(2.), shape=(2, ), type=Float64) g['y'] = BaseType(name='y', data=numpy.arange(3.), shape=(3, ), type=Float64) ds[g.name] = g return ds
def parse(self): """Parse the DAS, returning a dataset.""" dataset = DatasetType('nameless') self.consume('dataset') self.consume('{') while not self.peek('}'): var = self.declaration() dataset[var.name] = var self.consume('}') dataset.name = quote(self.consume('[^;]+')) dataset._set_id(dataset.name) self.consume(';') return dataset
def setUp(self): # create dataset dataset = DatasetType("test") dataset["a.b"] = BaseType("a.b", np.array(1)) # create WSGI app self.app = BaseHandler(dataset)
def parse_constraints(self, environ): base = os.path.split(self.filepath) coverage = SimplexCoverage.load(base[0], base[1],mode='r') last_modified = formatdate(time.mktime(time.localtime(os.stat(self.filepath)[ST_MTIME]))) environ['pydap.headers'].append(('Last-modified', last_modified)) atts = {} atts['title'] = coverage.name dataset = DatasetType(coverage.name) #, attributes=atts) fields, queries = environ['pydap.ce'] response = environ['pydap.response'] queries = filter(bool, queries) # fix for older version of pydap all_vars = coverage.list_parameters() fill_index = -1 if response == "dods": time_context = coverage.get_parameter_context(coverage.temporal_parameter_name) time_fill_value = time_context.fill_value time_data = coverage.get_parameter_values(coverage.temporal_parameter_name) try: fill_index = np.where(time_data == time_fill_value)[0][0] except IndexError: pass # If no fields have been explicitly requested, of if the sequence # has been requested directly, return all variables. if not fields: fields = [[(name, ())] for name in all_vars] dataset = self.get_dataset(coverage, fields, fill_index, dataset, response) return dataset
def stream(self): sz = 10 time = numpy.arange(float(self.index), float(self.index + sz)) self.index += sz data = numpy.arange(float(sz)) for ind in range(sz): data[ind] = numpy.random.random() ds = DatasetType(name='SimpleGridData') g = GridType(name='Time Series') # The name in the dictionary must match the name in the basetype g['timeseries'] = BaseType(name='timeseries', data=data, shape=data.shape, type=Float32, dimensions=('time')) g['time'] = BaseType(name='time', data=time, shape=(sz, ), type=Float32) ds[g.name] = g msg = dap_tools.ds2dap_msg(ds) yield self.send(self.deliver, 'data', msg.encode())
def test_get_var(self): """Test that the id is returned properly.""" dataset = DatasetType("a") dataset["b"] = StructureType("b") dataset["b"]["c"] = BaseType("c") self.assertEqual(get_var(dataset, 'b.c'), dataset['b']['c'])
def test_DatasetType_id(): """Test that the dataset id is not propagated.""" dataset = DatasetType("dataset") child = BaseType("child") child.id = "error" dataset["child"] = child assert (child.id == "child")
def test_id(self): """Test that the dataset id is not propagated.""" dataset = DatasetType("dataset") child = BaseType("child") child.id = "error" dataset["child"] = child self.assertEqual(child.id, "child")
def setUp(self): sz = 12 time = numpy.arange(float(0), float(sz)) data = numpy.arange(float(sz)) for ind in range(sz): data[ind] = numpy.random.random() ds = DatasetType(name='SimpleGridData') g = GridType(name='TimeSeries') # The name in the dictionary must match the name in the basetype g['timeseries'] = BaseType(name='timeseries', data=data, shape=data.shape, type=Float32, dimensions=('time')) g['time'] = BaseType(name='time', data=time, shape=(sz, ), type=Float32) ds[g.name] = g self.ds1 = ds self.tc = timeseries_consumer.TimeseriesConsumer() yield self.tc.plc_init()
def simple_grid_dataset(): """ @brief Create a simple dap grid dataset Just use the pydap interface - passing dicts does not make sense here. """ # Convert metadata and data to a dap dataset ds = DatasetType(name='SimpleGridData') g = GridType(name='grid') data = numpy.arange(24.) data.shape = (4, 2, 3) # The name in the dictionary must match the name in the basetype g['a'] = BaseType(name='a', data=data, shape=data.shape, type=Float32, dimensions=('time', 'x', 'y')) g['time'] = BaseType(name='time', data=numpy.arange(4.), shape=(4, ), type=Float64) g['x'] = BaseType(name='x', data=numpy.arange(2.), shape=(2, ), type=Float64) g['y'] = BaseType(name='y', data=numpy.arange(3.), shape=(3, ), type=Float64) ds[g.name] = g return ds
def parse_constraints(self, environ): base, data_product_id = os.path.split(self.filepath) coverage = self.get_coverage(data_product_id) last_modified = formatdate( time.mktime(time.localtime(os.stat(self.filepath)[ST_MTIME]))) environ['pydap.headers'].append(('Last-modified', last_modified)) atts = {} atts['title'] = coverage.name dataset = DatasetType(coverage.name) #, attributes=atts) response = environ['pydap.response'] if response == 'dods': query_string = environ['QUERY_STRING'] fields, slices, selectors = self.parse_query_string(query_string) elif response in ('dds', 'das'): fields = [] # All fields slices = [] selectors = [] all_vars = coverage.list_parameters() if not fields: fields = all_vars if response == "dods": dataset = self.get_dataset(coverage, fields, slices, selectors, dataset, response) elif response in ('dds', 'das'): self.handle_dds(coverage, dataset, fields) return dataset
def setUp(self): """Create a WSGI app with array data""" dataset = DatasetType("test") self.original_data = np.array([["This ", "is "], ["a ", "test"]], dtype='<U5') dataset["s"] = BaseType("s", self.original_data) self.app = BaseHandler(dataset) self.data = DAPHandler("http://localhost:8001/", self.app).dataset.s
def setUp(self): """Create a WSGI app with array data""" dataset = DatasetType("test") dataset["s"] = BaseType("s", np.array(["one", "two", "three"])) self.app = BaseHandler(dataset) self.data = BaseProxy( "http://localhost:8001/", "s", np.dtype("|S5"), (3,), application=self.app)
def setUp(self): """Create a WSGI app with array data""" dataset = DatasetType("test") data = np.array("This is a test", dtype='S') dataset["s"] = BaseType("s", data) self.app = BaseHandler(dataset) self.data = BaseProxy( "http://localhost:8001/", "s", np.dtype("|S14"), (), application=self.app)
def test_fix_projection(self): """Test a dataset that can use the shorthand notation.""" dataset = DatasetType("a") dataset["b"] = StructureType("b") dataset["b"]["c"] = BaseType("c") projection = [[("c", ())]] self.assertEqual( fix_shorthand(projection, dataset), [[('b', ()), ('c', ())]])
def test_conflict(self): """Test a dataset with conflicting short names.""" dataset = DatasetType("a") dataset["b"] = StructureType("b") dataset["b"]["c"] = BaseType("c") dataset["d"] = StructureType("d") dataset["d"]["c"] = BaseType("c") projection = [[("c", ())]] with self.assertRaises(ConstraintExpressionError): fix_shorthand(projection, dataset)
def simple_object(simple_array): # add sequence and children for each column name = 'nameless' dataset = DatasetType(name) seq = dataset['sequence'] = SequenceType('sequence') for var in simple_array.dtype.names: seq[var] = BaseType(var) obj = IterData([(0, 1, 10.), (1, 2, 20.), (2, 3, 30.), (3, 4, 40.), (4, 5, 50.), (5, 6, 60.), (6, 7, 70.), (7, 8, 80.)], seq) return obj
def nested_object(nested_data): name = 'nameless' dataset = DatasetType(name) seq = dataset['nested'] = SequenceType('nested') for var in ['a', 'b', 'c']: seq[var] = BaseType(var) seq['d'] = SequenceType('d') for var in ['e', 'f', 'g']: seq['d'][var] = BaseType(var) nested = IterData(nested_data, seq) return nested
def apply_projection(projection, dataset): """Apply a given projection to a dataset. This function builds and returns a new dataset by adding those variables that were requested on the projection. """ out = DatasetType(name=dataset.name, attributes=dataset.attributes) # first collect all the variables for p in projection: target, template = out, dataset for i, (name, slice_) in enumerate(p): candidate = template[name] # add variable to target if isinstance(candidate, StructureType): if name not in target.keys(): if i < len(p) - 1: # if there are more children to add we need to clear # the candidate so it has only explicitly added # children; also, Grids are degenerated into Structures if isinstance(candidate, GridType): candidate = StructureType(candidate.name, candidate.attributes) candidate._keys = [] target[name] = candidate target, template = target[name], template[name] else: target[name] = candidate # fix sequence data to include only variables that are in the sequence for seq in walk(out, SequenceType): seq.data = get_var(dataset, seq.id)[tuple(seq.keys())].data # apply slices for p in projection: target = out for name, slice_ in p: target, parent = target[name], target if slice_: if isinstance(target, BaseType): target.data = target[slice_] elif isinstance(target, SequenceType): parent[name] = target[slice_[0]] elif isinstance(target, GridType): parent[name] = target[slice_] else: raise ConstraintExpressionError("Invalid projection!") return out
def test_open(sequence_type_data): """Test that LocalTestServerSSL works properly""" TestDataset = DatasetType('Test') TestDataset['sequence'] = sequence_type_data with LocalTestServerSSL(BaseHandler(TestDataset)) as server: dataset = open_url(server.url) seq = dataset['sequence'] retrieved_data = [line for line in seq] np.testing.assert_array_equal( np.array(retrieved_data, dtype=sequence_type_data.data.dtype), np.array(sequence_type_data.data[:], dtype=sequence_type_data.data.dtype))
def test_timeout(sequence_type_data): """Test that timeout works properly""" TestDataset = DatasetType('Test') TestDataset['sequence'] = sequence_type_data TestDataset['byte'] = BaseType('byte', 0) application = BaseHandler(TestDataset) # Explictly add latency on the devel server # to guarantee that it timeouts def wrap_mocker(func): def mock_add_latency(*args, **kwargs): time.sleep(1e-1) return func(*args, **kwargs) return mock_add_latency application = wrap_mocker(application) with LocalTestServer(application) as server: url = ("http://0.0.0.0:%s/" % server.port) # test open_url assert open_url(url) == TestDataset with pytest.raises(HTTPError) as e: open_url(url, timeout=1e-5) assert 'Timeout' in str(e) # test open_dods with pytest.raises(HTTPError): open_dods(url + '.dods?sequence', timeout=1e-5) assert 'Timeout' in str(e) # test sequenceproxy dataset = open_url(url) seq = dataset['sequence'] assert isinstance(seq.data, SequenceProxy) # Change the timeout of the sequence proxy: seq.data.timeout = 1e-5 with pytest.raises(HTTPError) as e: next(seq.iterdata()) assert 'Timeout' in str(e) # test baseproxy: dat = dataset['byte'] assert isinstance(dat.data, BaseProxy) # Change the timeout of the baseprox proxy: dat.data.timeout = 1e-5 with pytest.raises(HTTPError) as e: dat[:] assert 'Timeout' in str(e)
def __init__(self, filepath): BaseHandler.__init__(self) self.filepath = filepath try: with netcdf_file(self.filepath, 'r') as source: self.additional_headers.append(('Last-modified', (formatdate( time.mktime(time.localtime( os.stat(filepath)[ST_MTIME])))))) # shortcuts vars = source.variables dims = source.dimensions # build dataset name = os.path.split(filepath)[1] self.dataset = DatasetType( name, attributes=dict(NC_GLOBAL=attrs(source))) for dim in dims: if dims[dim] is None: self.dataset.attributes['DODS_EXTRA'] = { 'Unlimited_Dimension': dim, } break # add grids grids = [var for var in vars if var not in dims] for grid in grids: self.dataset[grid] = GridType(grid, attrs(vars[grid])) # add array self.dataset[grid][grid] = BaseType( grid, LazyVariable(source, grid, grid, self.filepath), vars[grid].dimensions, attrs(vars[grid])) # add maps for dim in vars[grid].dimensions: self.dataset[grid][dim] = BaseType( dim, vars[dim][:], None, attrs(vars[dim])) # add dims for dim in dims: self.dataset[dim] = BaseType(dim, vars[dim][:], None, attrs(vars[dim])) except Exception as exc: raise message = 'Unable to open file %s: %s' % (filepath, exc) raise OpenFileError(message)
def _make_model(with_attributes=False, with_data=False): def maybe(attributes): return attributes if with_attributes else None model = DatasetType( name='dataset_name', attributes=maybe(attributes('ds', 1)) ) model['sequence_name'] = SequenceType( name='sequence_name', attributes=maybe(attributes('seq', 2)) ) for name in ['a', 'b', 'c']: model['sequence_name'][name] = BaseType( name=name, attributes=maybe(attributes(name, 2)) ) return model
def simple_dataset(metadata, data): """ @brief Create a simple dap dataset object from dictionary content See test_daptools to see the input structure """ # Convert metadata and data to a dap dataset ds = DatasetType(name=metadata['DataSet Name']) for varname, atts in metadata['variables'].items(): var = BaseType(name=varname, \ data=data[varname], \ shape=(len(data[varname]),), \ dimensions=(varname,), \ type=Int32, \ attributes=atts) ds[varname] = var return ds
def test_verify_open_url(sequence_type_data): """Test that open_url raises the correct SSLError""" warnings.simplefilter("always") TestDataset = DatasetType('Test') TestDataset['sequence'] = sequence_type_data TestDataset['byte'] = BaseType('byte', 0) application = BaseHandler(TestDataset) with LocalTestServerSSL(application, ssl_context='adhoc') as server: try: open_url(server.url, verify=False, session=requests.Session()) except (ssl.SSLError, requests.exceptions.SSLError): pytest.fail("SSLError should not be raised.") with pytest.raises(requests.exceptions.SSLError): open_url(server.url, session=requests.Session()) if not (sys.version_info >= (3, 0) and sys.version_info < (3, 4, 4)): # verify is disabled by default for python 3 before 3.4.4: with pytest.raises(requests.exceptions.SSLError): open_url(server.url)
class HDF5Handler(BaseHandler): extensions = re.compile(r"^.*(\.nc4?|\.h(df)?[45]?)$", re.IGNORECASE) def __init__(self, filepath): BaseHandler.__init__(self) try: self.fp = h5py.File(filepath, 'r') except Exception, exc: message = 'Unable to open file %s: %s' % (filepath, exc) raise OpenFileError(message) self.additional_headers.append(('Last-modified', (formatdate( time.mktime(time.localtime(os.stat(filepath)[ST_MTIME])))))) attrs = {'NC_GLOBAL': process_attrs(self.fp.attrs)} unlim = find_unlimited(self.fp) if len(unlim) > 1: raise Exception( "Found %d unlimited dimensions %s, but DAP supports no more than one." ) elif len(unlim) == 1: attrs.update({'DODS_EXTRA': {'Unlimited_Dimension': unlim.pop()}}) # build dataset name = os.path.split(filepath)[1] self.dataset = DatasetType(name, attributes=attrs) def is_gridded(dst): return sum([len(dim) for dim in dst.dims]) > 0 def add_variables(dataset, h5, level=0): assert type(h5) in (h5py.File, h5py.Group, h5py.Dataset) name = h5.name.lstrip('/') attrs = process_attrs(h5.attrs) # struct if type(h5) in (h5py.File, h5py.Group): foo = StructureType(name, attributes=attrs) name = foo.name dataset[name] = foo for bar in h5.values(): add_variables(dataset[name], bar, level + 1) return # Recursion base cases rank = len(h5.shape) # basetype if rank == 0: dataset[name] = BaseType(name, data=Hdf5Data(h5), dimensions=(), attributes=attrs) # sequence? #elif rank == 1: # dataset[name] = SequenceType(name, data=h5, attributes=h5.attrs) # grid elif is_gridded(h5): parent = dataset[name] = GridType(name, attributes=attrs) dims = tuple([d.values()[0].name.lstrip('/') for d in h5.dims]) logger.debug("DIMENSIONS: {}".format(dims)) parent[name] = BaseType( name, data=Hdf5Data(h5), dimensions=dims, attributes=attrs) # Add the main variable for dim in h5.dims: # and all of the dimensions add_variables( parent, dim[0], level + 1) # Why would dims have more than one h5py.Dataset? # BaseType else: dataset[name] = BaseType(name, data=Hdf5Data(h5), attributes=attrs) for varname in self.fp: add_variables(self.dataset, self.fp[varname])
if sys.version_info < (2, 7): # pragma: no cover from ordereddict import OrderedDict else: from collections import OrderedDict import numpy as np from pydap.model import (DatasetType, BaseType, SequenceType, GridType, StructureType) from pydap.handlers.lib import IterData from pydap.client import open_file # A very simple sequence: flat and with no strings. This sequence can be mapped # directly to a Numpy structured array, and can be easily encoded and decoded # in the DAP spec. VerySimpleSequence = DatasetType("VerySimpleSequence") VerySimpleSequence["sequence"] = SequenceType("sequence") VerySimpleSequence["sequence"]["byte"] = BaseType("byte") VerySimpleSequence["sequence"]["int"] = BaseType("int") VerySimpleSequence["sequence"]["float"] = BaseType("float") VerySimpleSequence["sequence"].data = np.array([ (0, 1, 10.), (1, 2, 20.), (2, 3, 30.), (3, 4, 40.), (4, 5, 50.), (5, 6, 60.), (6, 7, 70.), (7, 8, 80.), ], dtype=[('byte', 'b'),
def parse_constraints(self, environ): base = os.path.split(self.filepath) coverage = SimplexCoverage.load(base[0], base[1]) atts = {} atts['title'] = coverage.name dataset = DatasetType(coverage.name) #, attributes=atts) fields, queries = environ['pydap.ce'] queries = filter(bool, queries) # fix for older version of pydap all_vars = coverage.list_parameters() t = [] for param in all_vars: if numpy.dtype(coverage.get_parameter_context(param).param_type.value_encoding).char == 'O': t.append(param) [all_vars.remove(i) for i in t] time_context = coverage.get_parameter_context(coverage.temporal_parameter_name) time_fill_value = time_context.fill_value time_data = coverage.get_parameter_values(coverage.temporal_parameter_name) fill_index = -1 try: fill_index = numpy.where(time_data == time_fill_value)[0][0] except IndexError: pass # If no fields have been explicitly requested, of if the sequence # has been requested directly, return all variables. if not fields: fields = [[(name, ())] for name in all_vars] for var in fields: target = dataset while var: name, slice_ = var.pop(0) covname = urllib.unquote(name) param = coverage.get_parameter(covname) #need to truncate slice here in case time has fill values if len(slice_) == 0 and fill_index >= 0: slice_ = slice(0, fill_index, 1) if len(slice_) == 1: slice_ = slice_[0] if fill_index > slice_.start: continue if fill_index > slice_.stop: slice_.stop = fill_index if param.is_coordinate or target is not dataset: target[name] = get_var(coverage,name,slice_) elif var: target.setdefault(name, StructureType(name=name, attributes={'units':coverage.get_parameter_context(name).uom})) target = target[name] else: # return grid grid = target[name] = GridType(name=name) grid[name] = get_var(coverage,name, slice_) dim = coverage.temporal_parameter_name grid[dim] = get_var(coverage,dim,slice_) dataset._set_id() dataset.close = coverage.close return dataset