Example #1
0
    def test_descriptor_setitem(self):
        dd = HDF5DataDescriptor(self.hdf5_file, '/g/a2')

        self.assertEqual(dd.dshape, datashape.dshape('2, 3, int64'))
        dd[1, 2] = 10
        self.assertEqual(dd_as_py(dd[1, 2]), 10)
        dd[1] = [10, 11, 12]
        self.assertEqual(dd_as_py(dd[1]), [10, 11, 12])
Example #2
0
    def test_descriptor_getitem_types(self):
        dd = HDF5DataDescriptor(self.hdf5_file, '/g/a2')

        self.assertEqual(dd.dshape, datashape.dshape('2, 3, int64'))
        # Indexing should produce DyNDDataDescriptor instances
        self.assertTrue(isinstance(dd[0], DyNDDataDescriptor))
        self.assertEqual(dd_as_py(dd[0]), [1, 2, 3])
        self.assertTrue(isinstance(dd[1, 2], DyNDDataDescriptor))
        self.assertEqual(dd_as_py(dd[1, 2]), 1)
Example #3
0
    def test_descriptor_append(self):
        dd = HDF5DataDescriptor(self.hdf5_file, '/t1')

        tshape = '2, { f0 : int32; f1 : int64; f2 : float64 }'
        self.assertEqual(dd.dshape, datashape.dshape(tshape))
        dd.append([(10, 11, 12)])
        dvals = {'f0': 10, 'f1': 11, 'f2': 12.}
        rvals = dd_as_py(dd[2])
        is_equal = [(rvals[k] == dvals[k]) for k in dvals]
        self.assertEqual(is_equal, [True] * 3)
Example #4
0
    def test_descriptor_iter_types(self):
        dd = HDF5DataDescriptor(self.hdf5_file, '/a1')

        self.assertEqual(dd.dshape, datashape.dshape('2, 3, int32'))
        # Iteration should produce DyNDDataDescriptor instances
        vals = []
        for el in dd:
            self.assertTrue(isinstance(el, DyNDDataDescriptor))
            self.assertTrue(isinstance(el, IDataDescriptor))
            vals.append(dd_as_py(el))
        self.assertEqual(vals, [[1, 2, 3], [4, 5, 6]])
Example #5
0
def load_blaze_subcarray(conf, cdir, subcarray):
    import tables as tb
    from blaze.datadescriptor import HDF5DataDescriptor
    with tb.open_file(cdir.fname, 'r') as f:
        try:
            dparr = f.get_node(f.root, subcarray, 'Leaf')
        except tb.NoSuchNodeError:
            raise RuntimeError(
                'HDF5 file does not have a dataset in %r' % dp)
        dd = HDF5DataDescriptor(cdir.fname, subcarray)
    return blaze.array(dd)
Example #6
0
 def test_basic_object_type(self):
     self.assertTrue(issubclass(HDF5DataDescriptor, IDataDescriptor))
     dd = HDF5DataDescriptor(self.hdf5_file, '/a1')
     # Make sure the right type is returned
     self.assertTrue(isinstance(dd, IDataDescriptor))
     self.assertEqual(dd_as_py(dd), [[1, 2, 3], [4, 5, 6]])
Example #7
0
def load_blaze_array(conf, dir):
    """Loads a blaze array from the catalog configuration and catalog path"""
    # This is a temporary hack, need to transition to using the
    # deferred data descriptors for various formats.
    fsdir = conf.get_fsdir(dir)
    if not path.isfile(fsdir + '.array'):
        raise RuntimeError('Could not find blaze array description file %r' %
                           (fsdir + '.array'))
    with open(fsdir + '.array') as f:
        arrmeta = yaml.load(f)
    tp = arrmeta['type']
    imp = arrmeta['import']
    ds_str = arrmeta.get('datashape')  # optional. HDF5 does not need that.

    if tp == 'csv':
        with open(fsdir + '.csv', 'r') as f:
            rd = csv.reader(f)
            if imp.get('headers', False):
                # Skip the header line
                next(rd)
            dat = list(rd)
        arr = nd.array(dat, ndt.type(ds_str))[:]
        return blaze.array(arr)
    elif tp == 'json':
        arr = nd.parse_json(ds_str, nd.memmap(fsdir + '.json'))
        return blaze.array(arr)
    elif tp == 'hdf5':
        import tables as tb
        from blaze.datadescriptor import HDF5DataDescriptor
        fname = fsdir + '.h5'  # XXX .h5 assumed for HDF5
        with tb.open_file(fname, 'r') as f:
            dp = imp.get('datapath')  # specifies a path in HDF5
            try:
                dparr = f.get_node(f.root, dp, 'Leaf')
            except tb.NoSuchNodeError:
                raise RuntimeError('HDF5 file does not have a dataset in %r' %
                                   dp)
            dd = HDF5DataDescriptor(fname, dp)
        return blaze.array(dd)
    elif tp == 'npy':
        import numpy as np
        use_memmap = imp.get('memmap', False)
        if use_memmap:
            arr = np.load(fsdir + '.npy', 'r')
        else:
            arr = np.load(fsdir + '.npy')
        arr = nd.array(arr)
        arr = blaze.array(arr)
        ds = datashape.dshape(ds_str)
        if not compatible_array_dshape(arr, ds):
            raise RuntimeError(
                ('NPY file for blaze catalog path %r ' +
                 'has the wrong datashape (%r instead of ' + '%r)') %
                (arr.dshape, ds))
        return arr
    elif tp == 'py':
        ds = datashape.dshape(ds_str)
        # The script is run with the following globals,
        # and should put the loaded array in a global
        # called 'result'.
        gbl = {
            'catconf': conf,  # Catalog configuration object
            'impdata': imp,  # Import data from the .array file
            'catpath': dir,  # Catalog path
            'fspath': fsdir,  # Equivalent filesystem path
            'dshape': ds  # Datashape the result should have
        }
        if py2help.PY2:
            execfile(fsdir + '.py', gbl, gbl)
        else:
            with open(fsdir + '.py') as f:
                code = compile(f.read(), fsdir + '.py', 'exec')
                exec(code, gbl, gbl)
        arr = gbl.get('result', None)
        if arr is None:
            raise RuntimeError(
                ('Script for blaze catalog path %r did not ' +
                 'return anything in "result" variable') % (dir))
        elif not isinstance(arr, blaze.Array):
            raise RuntimeError(
                ('Script for blaze catalog path %r returned ' +
                 'wrong type of object (%r instead of ' + 'blaze.Array)') %
                (type(arr)))
        if not compatible_array_dshape(arr, ds):
            raise RuntimeError(
                ('Script for blaze catalog path %r returned ' +
                 'array with wrong datashape (%r instead of ' + '%r)') %
                (arr.dshape, ds))
        return arr
    else:
        raise ValueError(
            ('Unsupported array type %r from ' + 'blaze catalog entry %r') %
            (tp, dir))