Ejemplo n.º 1
0
 def test_descriptor_setitem(self):
     dd = HDF5_DDesc(self.hdf5_file, '/g/a2', mode='a')
     self.assertEqual(dd.dshape, datashape.dshape('2 * 3 * int64'))
     dd[1, 2] = 10
     self.assertEqual(ddesc_as_py(dd[1, 2]), 10)
     dd[1] = [10, 11, 12]
     self.assertEqual(ddesc_as_py(dd[1]), [10, 11, 12])
Ejemplo n.º 2
0
 def test_descriptor_getitem_types(self):
     dd = HDF5_DDesc(self.hdf5_file, '/g/a2')
     self.assertEqual(dd.dshape, datashape.dshape('2 * 3 * int64'))
     # Indexing should produce DyND_DDesc instances
     self.assertTrue(isinstance(dd[0], DyND_DDesc))
     self.assertEqual(ddesc_as_py(dd[0]), [1, 2, 3])
     self.assertTrue(isinstance(dd[1, 2], DyND_DDesc))
     self.assertEqual(ddesc_as_py(dd[1, 2]), 1)
Ejemplo n.º 3
0
 def test_descriptor_append(self):
     dd = HDF5_DDesc(self.hdf5_file, '/t1', mode='a')
     tshape = datashape.dshape(
         '2 * { f0 : int32, f1 : int64, f2 : float64 }')
     self.assertEqual(dd.dshape, tshape)
     dd.append([(10, 11, 12)])
     dvals = {'f0': 10, 'f1': 11, 'f2': 12.}
     rvals = ddesc_as_py(dd[2])
     is_equal = [(rvals[k] == dvals[k]) for k in dvals]
     self.assertEqual(is_equal, [True] * 3)
Ejemplo n.º 4
0
 def test_descriptor_iter_types(self):
     dd = HDF5_DDesc(self.hdf5_file, '/a1')
     self.assertEqual(dd.dshape, datashape.dshape('2 * 3 * int32'))
     # Iteration should produce DyND_DDesc instances
     vals = []
     for el in dd:
         self.assertTrue(isinstance(el, DyND_DDesc))
         self.assertTrue(isinstance(el, DDesc))
         vals.append(ddesc_as_py(el))
     self.assertEqual(vals, [[1, 2, 3], [4, 5, 6]])
def load_blaze_subcarray(conf, cdir, subcarray):
    import tables as tb
    from blaze.datadescriptor import HDF5_DDesc
    with tb.open_file(cdir.fname, 'r') as f:
        try:
            dparr = f.get_node(f.root, subcarray, 'Leaf')
        except tb.NoSuchNodeError:
            raise RuntimeError('HDF5 file does not have a dataset in %r' % dp)
        dd = HDF5_DDesc(cdir.fname, subcarray)
    return blaze.array(dd)
Ejemplo n.º 6
0
 def test_basic_object_type(self):
     self.assertTrue(issubclass(HDF5_DDesc, DDesc))
     dd = HDF5_DDesc(self.hdf5_file, '/a1')
     # Make sure the right type is returned
     self.assertTrue(isinstance(dd, DDesc))
     self.assertEqual(ddesc_as_py(dd), [[1, 2, 3], [4, 5, 6]])
def load_blaze_array(conf, dir):
    """Loads a blaze array from the catalog configuration and catalog path"""
    # This is a temporary hack, need to transition to using the
    # deferred data descriptors for various formats.
    fsdir = conf.get_fsdir(dir)
    if not path.isfile(fsdir + '.array'):
        raise RuntimeError('Could not find blaze array description file %r' %
                           (fsdir + '.array'))
    with open(fsdir + '.array') as f:
        arrmeta = yaml.load(f)
    tp = arrmeta['type']
    imp = arrmeta['import']
    ds_str = arrmeta.get('datashape')  # optional. HDF5 does not need that.

    if tp == 'csv':
        with open(fsdir + '.csv', 'r') as f:
            rd = csv.reader(f)
            if imp.get('headers', False):
                # Skip the header line
                next(rd)
            dat = list(rd)
        arr = nd.array(dat, ndt.type(ds_str))[:]
        return blaze.array(arr)
    elif tp == 'json':
        arr = nd.parse_json(ds_str, nd.memmap(fsdir + '.json'))
        return blaze.array(arr)
    elif tp == 'hdf5':
        import tables as tb
        from blaze.datadescriptor import HDF5_DDesc
        fname = fsdir + '.h5'  # XXX .h5 assumed for HDF5
        with tb.open_file(fname, 'r') as f:
            dp = imp.get('datapath')  # specifies a path in HDF5
            try:
                dparr = f.get_node(f.root, dp, 'Leaf')
            except tb.NoSuchNodeError:
                raise RuntimeError('HDF5 file does not have a dataset in %r' %
                                   dp)
            dd = HDF5_DDesc(fname, dp)
        return blaze.array(dd)
    elif tp == 'npy':
        import numpy as np
        use_memmap = imp.get('memmap', False)
        if use_memmap:
            arr = np.load(fsdir + '.npy', 'r')
        else:
            arr = np.load(fsdir + '.npy')
        arr = nd.array(arr)
        arr = blaze.array(arr)
        ds = datashape.dshape(ds_str)
        if not matches_datashape_pattern(arr.dshape, ds):
            raise RuntimeError(
                ('NPY file for blaze catalog path %r ' +
                 'has the wrong datashape (%r instead of ' + '%r)') %
                (arr.dshape, ds))
        return arr
    elif tp == 'py':
        ds = datashape.dshape(ds_str)
        # The script is run with the following globals,
        # and should put the loaded array in a global
        # called 'result'.
        gbl = {
            'catconf': conf,  # Catalog configuration object
            'impdata': imp,  # Import data from the .array file
            'catpath': dir,  # Catalog path
            'fspath': fsdir,  # Equivalent filesystem path
            'dshape': ds  # Datashape the result should have
        }
        if py2help.PY2:
            execfile(fsdir + '.py', gbl, gbl)
        else:
            with open(fsdir + '.py') as f:
                code = compile(f.read(), fsdir + '.py', 'exec')
                exec(code, gbl, gbl)
        arr = gbl.get('result', None)
        if arr is None:
            raise RuntimeError(
                ('Script for blaze catalog path %r did not ' +
                 'return anything in "result" variable') % (dir))
        elif not isinstance(arr, blaze.Array):
            raise RuntimeError(
                ('Script for blaze catalog path %r returned ' +
                 'wrong type of object (%r instead of ' + 'blaze.Array)') %
                (type(arr)))
        if not matches_datashape_pattern(arr.dshape, ds):
            raise RuntimeError(
                ('Script for blaze catalog path %r returned ' +
                 'array with wrong datashape (%r instead of ' + '%r)') %
                (arr.dshape, ds))
        return arr
    else:
        raise ValueError(
            ('Unsupported array type %r from ' + 'blaze catalog entry %r') %
            (tp, dir))