def test_descriptor_setitem(self): dd = HDF5DataDescriptor(self.hdf5_file, '/g/a2') self.assertEqual(dd.dshape, datashape.dshape('2, 3, int64')) dd[1, 2] = 10 self.assertEqual(dd_as_py(dd[1, 2]), 10) dd[1] = [10, 11, 12] self.assertEqual(dd_as_py(dd[1]), [10, 11, 12])
def test_descriptor_getitem_types(self): dd = HDF5DataDescriptor(self.hdf5_file, '/g/a2') self.assertEqual(dd.dshape, datashape.dshape('2, 3, int64')) # Indexing should produce DyNDDataDescriptor instances self.assertTrue(isinstance(dd[0], DyNDDataDescriptor)) self.assertEqual(dd_as_py(dd[0]), [1, 2, 3]) self.assertTrue(isinstance(dd[1, 2], DyNDDataDescriptor)) self.assertEqual(dd_as_py(dd[1, 2]), 1)
def test_descriptor_append(self): dd = HDF5DataDescriptor(self.hdf5_file, '/t1') tshape = '2, { f0 : int32; f1 : int64; f2 : float64 }' self.assertEqual(dd.dshape, datashape.dshape(tshape)) dd.append([(10, 11, 12)]) dvals = {'f0': 10, 'f1': 11, 'f2': 12.} rvals = dd_as_py(dd[2]) is_equal = [(rvals[k] == dvals[k]) for k in dvals] self.assertEqual(is_equal, [True] * 3)
def test_descriptor_iter_types(self): dd = HDF5DataDescriptor(self.hdf5_file, '/a1') self.assertEqual(dd.dshape, datashape.dshape('2, 3, int32')) # Iteration should produce DyNDDataDescriptor instances vals = [] for el in dd: self.assertTrue(isinstance(el, DyNDDataDescriptor)) self.assertTrue(isinstance(el, IDataDescriptor)) vals.append(dd_as_py(el)) self.assertEqual(vals, [[1, 2, 3], [4, 5, 6]])
def load_blaze_subcarray(conf, cdir, subcarray): import tables as tb from blaze.datadescriptor import HDF5DataDescriptor with tb.open_file(cdir.fname, 'r') as f: try: dparr = f.get_node(f.root, subcarray, 'Leaf') except tb.NoSuchNodeError: raise RuntimeError( 'HDF5 file does not have a dataset in %r' % dp) dd = HDF5DataDescriptor(cdir.fname, subcarray) return blaze.array(dd)
def test_basic_object_type(self): self.assertTrue(issubclass(HDF5DataDescriptor, IDataDescriptor)) dd = HDF5DataDescriptor(self.hdf5_file, '/a1') # Make sure the right type is returned self.assertTrue(isinstance(dd, IDataDescriptor)) self.assertEqual(dd_as_py(dd), [[1, 2, 3], [4, 5, 6]])
def load_blaze_array(conf, dir): """Loads a blaze array from the catalog configuration and catalog path""" # This is a temporary hack, need to transition to using the # deferred data descriptors for various formats. fsdir = conf.get_fsdir(dir) if not path.isfile(fsdir + '.array'): raise RuntimeError('Could not find blaze array description file %r' % (fsdir + '.array')) with open(fsdir + '.array') as f: arrmeta = yaml.load(f) tp = arrmeta['type'] imp = arrmeta['import'] ds_str = arrmeta.get('datashape') # optional. HDF5 does not need that. if tp == 'csv': with open(fsdir + '.csv', 'r') as f: rd = csv.reader(f) if imp.get('headers', False): # Skip the header line next(rd) dat = list(rd) arr = nd.array(dat, ndt.type(ds_str))[:] return blaze.array(arr) elif tp == 'json': arr = nd.parse_json(ds_str, nd.memmap(fsdir + '.json')) return blaze.array(arr) elif tp == 'hdf5': import tables as tb from blaze.datadescriptor import HDF5DataDescriptor fname = fsdir + '.h5' # XXX .h5 assumed for HDF5 with tb.open_file(fname, 'r') as f: dp = imp.get('datapath') # specifies a path in HDF5 try: dparr = f.get_node(f.root, dp, 'Leaf') except tb.NoSuchNodeError: raise RuntimeError('HDF5 file does not have a dataset in %r' % dp) dd = HDF5DataDescriptor(fname, dp) return blaze.array(dd) elif tp == 'npy': import numpy as np use_memmap = imp.get('memmap', False) if use_memmap: arr = np.load(fsdir + '.npy', 'r') else: arr = np.load(fsdir + '.npy') arr = nd.array(arr) arr = blaze.array(arr) ds = datashape.dshape(ds_str) if not compatible_array_dshape(arr, ds): raise RuntimeError( ('NPY file for blaze catalog path %r ' + 'has the wrong datashape (%r instead of ' + '%r)') % (arr.dshape, ds)) return arr elif tp == 'py': ds = datashape.dshape(ds_str) # The script is run with the following globals, # and should put the loaded array in a global # called 'result'. gbl = { 'catconf': conf, # Catalog configuration object 'impdata': imp, # Import data from the .array file 'catpath': dir, # Catalog path 'fspath': fsdir, # Equivalent filesystem path 'dshape': ds # Datashape the result should have } if py2help.PY2: execfile(fsdir + '.py', gbl, gbl) else: with open(fsdir + '.py') as f: code = compile(f.read(), fsdir + '.py', 'exec') exec(code, gbl, gbl) arr = gbl.get('result', None) if arr is None: raise RuntimeError( ('Script for blaze catalog path %r did not ' + 'return anything in "result" variable') % (dir)) elif not isinstance(arr, blaze.Array): raise RuntimeError( ('Script for blaze catalog path %r returned ' + 'wrong type of object (%r instead of ' + 'blaze.Array)') % (type(arr))) if not compatible_array_dshape(arr, ds): raise RuntimeError( ('Script for blaze catalog path %r returned ' + 'array with wrong datashape (%r instead of ' + '%r)') % (arr.dshape, ds)) return arr else: raise ValueError( ('Unsupported array type %r from ' + 'blaze catalog entry %r') % (tp, dir))