def test_StructureType_set_data(): """Test that data is propagated to children.""" var = StructureType("var", value=42, one="1") var["one"] = BaseType("one") var["two"] = BaseType("two") var.data = [10, 20] assert (var["one"].data == 10) assert (var["two"].data == 20)
def test_set_data(self): """Test that data is propagated to children.""" var = StructureType("var", value=42, one="1") var["one"] = BaseType("one") var["two"] = BaseType("two") var.data = [10, 20] self.assertEqual(var["one"].data, 10) self.assertEqual(var["two"].data, 20)
def test_delitem(self): """Test item deletion.""" var = StructureType("var") var["one"] = BaseType("one") self.assertEqual(var.keys(), ['one']) del var["one"] self.assertEqual(var.keys(), [])
def _sequencetype(var): # a flat array can be processed one record (or more?) at a time if all(isinstance(child, BaseType) for child in var.children()): types = [] position = 0 for child in var.children(): if child.dtype.char in 'SU': types.append('>I') # string length as int types.append('|S{%s}' % position) # string padded to 4n position += 1 else: types.append(typemap[child.dtype.char]) dtype = ','.join(types) strings = position > 0 # array initializations is costy, so we keep a cache here; this will # be inneficient if there are many strings of different length only cache = {} for record in var: yield START_OF_SEQUENCE if strings: out = [] padded = [] for value in record: if isinstance(value, string_types): length = len(value) or 1 out.append(length) padded.append(length + (-length % 4)) out.append(value) record = out dtype = ','.join(types).format(*padded) if dtype not in cache: cache[dtype] = np.zeros((1, ), dtype=dtype) cache[dtype][:] = tuple(record) yield cache[dtype].tostring() yield END_OF_SEQUENCE # nested array, need to process individually else: # create a template structure struct = StructureType(var.name) for name in var.keys(): struct[name] = copy.copy(var[name]) for record in var: yield START_OF_SEQUENCE struct.data = record for block in dods(struct): yield block yield END_OF_SEQUENCE
def _sequencetype(var): # a flat array can be processed one record (or more?) at a time if all(isinstance(child, BaseType) for child in var.children()): types = [] position = 0 for child in var.children(): if child.dtype.char in 'SU': types.append('>I') # string length as int types.append('|S{%s}' % position) # string padded to 4n position += 1 else: types.append(typemap[child.dtype.char]) dtype = ','.join(types) strings = position > 0 # array initializations is costy, so we keep a cache here; this will # be inneficient if there are many strings of different length only cache = {} for record in var: yield START_OF_SEQUENCE if strings: out = [] padded = [] for value in record: if isinstance(value, string_types): length = len(value) or 1 out.append(length) padded.append(length + (-length % 4)) out.append(value) record = out dtype = ','.join(types).format(*padded) if dtype not in cache: cache[dtype] = np.zeros((1,), dtype=dtype) cache[dtype][:] = tuple(record) yield cache[dtype].tostring() yield END_OF_SEQUENCE # nested array, need to process individually else: # create a template structure struct = StructureType(var.name) for name in var.keys(): struct[name] = copy.copy(var[name]) for record in var: yield START_OF_SEQUENCE struct.data = record for block in dods(struct): yield block yield END_OF_SEQUENCE
def test_conflict(self): """Test a dataset with conflicting short names.""" dataset = DatasetType("a") dataset["b"] = StructureType("b") dataset["b"]["c"] = BaseType("c") dataset["d"] = StructureType("d") dataset["d"]["c"] = BaseType("c") projection = [[("c", ())]] with self.assertRaises(ConstraintExpressionError): fix_shorthand(projection, dataset)
def apply_projection(projection, dataset): """Apply a given projection to a dataset. This function builds and returns a new dataset by adding those variables that were requested on the projection. """ out = DatasetType(name=dataset.name, attributes=dataset.attributes) # first collect all the variables for p in projection: target, template = out, dataset for i, (name, slice_) in enumerate(p): candidate = template[name] # add variable to target if isinstance(candidate, StructureType): if name not in target.keys(): if i < len(p) - 1: # if there are more children to add we need to clear # the candidate so it has only explicitly added # children; also, Grids are degenerated into Structures if isinstance(candidate, GridType): candidate = StructureType( candidate.name, candidate.attributes) candidate._keys = [] target[name] = candidate target, template = target[name], template[name] else: target[name] = candidate # fix sequence data to include only variables that are in the sequence for seq in walk(out, SequenceType): seq.data = get_var(dataset, seq.id)[tuple(seq.keys())].data # apply slices for p in projection: target = out for name, slice_ in p: target, parent = target[name], target if slice_: if isinstance(target, BaseType): target.data = target[slice_] elif isinstance(target, SequenceType): parent[name] = target[slice_[0]] elif isinstance(target, GridType): parent[name] = target[slice_] else: raise ConstraintExpressionError("Invalid projection!") return out
def apply_projection(projection, dataset): """Apply a given projection to a dataset. This function builds and returns a new dataset by adding those variables that were requested on the projection. """ out = DatasetType(name=dataset.name, attributes=dataset.attributes) # first collect all the variables for p in projection: target, template = out, dataset for i, (name, slice_) in enumerate(p): candidate = template[name] # add variable to target if isinstance(candidate, StructureType): if name not in target.keys(): if i < len(p) - 1: # if there are more children to add we need to clear # the candidate so it has only explicitly added # children; also, Grids are degenerated into Structures if isinstance(candidate, GridType): candidate = StructureType(candidate.name, candidate.attributes) candidate._keys = [] target[name] = candidate target, template = target[name], template[name] else: target[name] = candidate # fix sequence data to include only variables that are in the sequence for seq in walk(out, SequenceType): seq.data = get_var(dataset, seq.id)[tuple(seq.keys())].data # apply slices for p in projection: target = out for name, slice_ in p: target, parent = target[name], target if slice_: if isinstance(target, BaseType): target.data = target[slice_] elif isinstance(target, SequenceType): parent[name] = target[slice_[0]] elif isinstance(target, GridType): parent[name] = target[slice_] else: raise ConstraintExpressionError("Invalid projection!") return out
def structure(self): """Parse a DAP structure, returning a ``StructureType``.""" structure = StructureType('nameless') self.consume('structure') self.consume('{') while not self.peek('}'): var = self.declaration() structure[var.name] = var self.consume('}') structure.name = quote(self.consume('[^;]+')) self.consume(';') return structure
def test_get_var(self): """Test that the id is returned properly.""" dataset = DatasetType("a") dataset["b"] = StructureType("b") dataset["b"]["c"] = BaseType("c") self.assertEqual(get_var(dataset, 'b.c'), dataset['b']['c'])
def test_StructureType_repr(): """Test ``__repr__`` method.""" var = StructureType("var") assert (repr(var) == "<StructureType with children >") var["one"] = BaseType("one") var["two"] = BaseType("two") assert (repr(var) == "<StructureType with children 'one', 'two'>")
def test_StructureType_instance(): """Test that it is a Mapping and DapType.""" var = StructureType("var") try: from collections.abc import Mapping except ImportError: from collections import Mapping assert isinstance(var, Mapping) assert isinstance(var, DapType)
def test_repr(self): """Test ``__repr__`` method.""" var = StructureType("var") self.assertEqual(repr(var), "<StructureType with children >") var["one"] = BaseType("one") var["two"] = BaseType("two") self.assertEqual(repr(var), "<StructureType with children 'one', 'two'>")
def test_copy(self): """Test lightweight clone of a structure.""" original = StructureType("var", value=42, one="1") original["one"] = BaseType("one") original["two"] = BaseType("two") original.data = [10, 20] clone = copy.copy(original) # note that clones share the same data: self.assertIsNot(original, clone) self.assertIsNot(original["one"], clone["one"]) self.assertIs(original["one"].data, clone["one"].data) self.assertIsNot(original["two"], clone["two"]) self.assertIs(original["two"].data, clone["two"].data) # test attributes self.assertEqual(original.id, clone.id) self.assertEqual(original.name, clone.name)
def test_StructureType_copy(): """Test lightweight clone of a structure.""" original = StructureType("var", value=42, one="1") original["one"] = BaseType("one") original["two"] = BaseType("two") original.data = [10, 20] clone = copy.copy(original) # note that clones share the same data: assert original is not clone assert original["one"] is not clone["one"] assert original["one"].data is clone["one"].data assert original["two"] is not clone["two"] assert original["two"].data is clone["two"].data # test attributes assert (original.id == clone.id) assert (original.name == clone.name)
def test_StructureType_setitem(): """Test item assignment. Assignment requires the key and the name of the variable to be identical. It also takes care of reordering children that are reinserted. """ var = StructureType("var") var["foo.bar"] = BaseType("foo.bar") assert (list(var.keys()) == ['foo%2Ebar']) with pytest.raises(KeyError): var["bar"] = BaseType("baz") # test reordering var["bar"] = BaseType("bar") var["foo.bar"] = BaseType("foo.bar") assert (list(var.keys()) == ['bar', 'foo%2Ebar'])
def test_setitem(self): """Test item assignment. Assignment requires the key and the name of the variable to be identical. It also takes care of reordering children that are reinserted. """ var = StructureType("var") var["foo.bar"] = BaseType("foo.bar") self.assertEqual(var.keys(), ['foo%2Ebar']) with self.assertRaises(KeyError): var["bar"] = BaseType("baz") # test reordering var["bar"] = BaseType("bar") var["foo.bar"] = BaseType("foo.bar") self.assertEqual(var.keys(), ['bar', 'foo%2Ebar'])
def test_fix_projection(self): """Test a dataset that can use the shorthand notation.""" dataset = DatasetType("a") dataset["b"] = StructureType("b") dataset["b"]["c"] = BaseType("c") projection = [[("c", ())]] self.assertEqual( fix_shorthand(projection, dataset), [[('b', ()), ('c', ())]])
def test_StructureType_getitem_tuple(): """Test multiple item retrieval.""" var = StructureType("var") for name in ['child1', 'child2', 'child3']: child = BaseType(name) var[name] = child assert var[name] is child assert list(var['child1', 'child3'].keys()) == ['child1', 'child3'] assert (list(var['child1', 'child3']._all_keys()) == ['child1', 'child2', 'child3']) with pytest.raises(KeyError): var['unloved child']
def test_StructureType_delitem(): """Test item deletion.""" var = StructureType("var") var["one"] = BaseType("one") var["two"] = BaseType("two") var["three"] = BaseType("three") assert (list(var.keys()) == ['one', 'two', 'three']) del var["one"] assert (list(var.keys()) == ['two', 'three']) # Make sure that one can safely delete # a non visible child: subset = var[("two",)] assert list(subset.keys()) == ['two'] assert isinstance(subset, StructureType) subset.__delitem__("three") # Cannot delete an inexistent child: with pytest.raises(KeyError): del var["inexistent"]
def test_StructureType_getitem(): """Test item retrieval.""" var = StructureType("var") child = BaseType("child") var["child"] = child assert var["child"] is child with pytest.raises(KeyError): var["unloved child"] with pytest.raises(KeyError): var[:] assert var["parent.child"] is child assert var["grandparent.parent.child"] is child
def add_variables(dataset, h5, level=0): assert type(h5) in (h5py.File, h5py.Group, h5py.Dataset) name = h5.name.lstrip('/') attrs = process_attrs(h5.attrs) # struct if type(h5) in (h5py.File, h5py.Group): foo = StructureType(name, attributes=attrs) name = foo.name dataset[name] = foo for bar in h5.values(): add_variables(dataset[name], bar, level + 1) return # Recursion base cases rank = len(h5.shape) # basetype if rank == 0: dataset[name] = BaseType(name, data=Hdf5Data(h5), dimensions=(), attributes=attrs) # sequence? #elif rank == 1: # dataset[name] = SequenceType(name, data=h5, attributes=h5.attrs) # grid elif is_gridded(h5): parent = dataset[name] = GridType(name, attributes=attrs) dims = tuple([d.values()[0].name.lstrip('/') for d in h5.dims]) logger.debug("DIMENSIONS: {}".format(dims)) parent[name] = BaseType( name, data=Hdf5Data(h5), dimensions=dims, attributes=attrs) # Add the main variable for dim in h5.dims: # and all of the dimensions add_variables( parent, dim[0], level + 1) # Why would dims have more than one h5py.Dataset? # BaseType else: dataset[name] = BaseType(name, data=Hdf5Data(h5), attributes=attrs)
def simple_structure_dataset(metadata, data): """ @brief Create a simple dap dataset object from dictionary content See test_daptools to see the input structure """ # Convert metadata and data to a dap dataset ds = DatasetType(name=metadata['DataSet Name']) structure = StructureType(name='structure') for varname, atts in metadata['variables'].items(): var = BaseType(name=varname, \ data=data[varname], \ shape=(len(data[varname]),), \ #dimensions=(varname,), \ type=Int32, \ attributes=atts) structure[varname] = var ds[structure.name] = structure return ds
def test_StructureType_children(): """Test children iteration, should return all children.""" var = StructureType("var", value=42, one="1") var["one"] = BaseType("one") var["two"] = BaseType("two") assert (list(var.children()) == [var["one"], var["two"]])
zip([ u"This is a data test string (pass {0}).".format(1 + i * 2) for i in range(5) ], [ u"This is a data test string (pass {0}).".format(i * 2) for i in range(5) ], [1000.0, 999.95, 999.80, 999.55, 999.20], [999.95, 999.55, 998.75, 997.55, 995.95])), names=D1.Drifters.keys())) # testing structures SimpleStructure = DatasetType('SimpleStructure') SimpleStructure['types'] = StructureType(name='types', key="value", nested=OrderedDict([ ("string", "bar"), ("list", [42, 43]), ("array", np.array(1)), ("float", 1000.0), ])) SimpleStructure['types']['b'] = BaseType('b', np.array(0, np.byte)) SimpleStructure['types']['i32'] = BaseType('i32', np.array(1, np.int32)) SimpleStructure['types']['ui32'] = BaseType('ui32', np.array(0, np.uint32)) SimpleStructure['types']['i16'] = BaseType('i16', np.array(0, np.int16)) SimpleStructure['types']['ui16'] = BaseType('ui16', np.array(0, np.uint16)) SimpleStructure['types']['f32'] = BaseType('f32', np.array(0.0, np.float32)) SimpleStructure['types']['f64'] = BaseType('f64', np.array(1000., np.float64)) SimpleStructure['types']['s'] = BaseType( 's', np.array("This is a data test string (pass 0).")) SimpleStructure['types']['u'] = BaseType('u', np.array("http://www.dods.org")) # test grid
def test_get_data(self): """Test that structure collects data from children.""" var = StructureType("var", value=42, one="1") var["one"] = BaseType("one", 1) var["two"] = BaseType("two", 2) self.assertEqual(var.data, [1, 2])
def test_StructureType_get_data(): """Test that structure collects data from children.""" var = StructureType("var", value=42, one="1") var["one"] = BaseType("one", 1) var["two"] = BaseType("two", 2) assert (var.data == [1, 2])
def test_getitem(self): """Test item retrieval.""" var = StructureType("var") child = BaseType("child") var["child"] = child self.assertIs(var["child"], child)
def test_init(self): """Test attributes used for dict-like behavior.""" var = StructureType("var") self.assertEqual(var._keys, []) self.assertEqual(var._dict, {})
def test_instance(self): """Test that it is a Mapping and DapType.""" var = StructureType("var") from collections import Mapping assert isinstance(var, Mapping) assert isinstance(var, DapType)
def test_iter(self): """Test iteration, should return all children.""" var = StructureType("var", value=42, one="1") var["one"] = BaseType("one") var["two"] = BaseType("two") self.assertEqual(list(iter(var)), [var["one"], var["two"]])
def test_lazy_attribute(self): """Test lazy attribute, returning first child.""" var = StructureType("var", value=42, one="1") var["one"] = BaseType("one") self.assertEqual(var.value, 42) self.assertIs(var.one, var["one"])
def test_contains(self): """Test container behavior.""" var = StructureType("var") var["one"] = BaseType("one") self.assertIn("one", var)
def test_len(self): """Test ``__len__`` method.""" var = StructureType("var") var["one"] = BaseType("one") var["two"] = BaseType("two") self.assertEqual(len(var), 2)