def where(self, condition, user_dict=None):
     """Iterate over values fulfilling a condition."""
     dset = self.blzarr
     # Get rid of the leading dimension on which we iterate
     dshape = datashape.from_numpy(dset.shape[1:], dset.dtype)
     for el in dset.where(condition):
         yield DyND_DDesc(nd.array(el, type=str(dshape)))
Example #2
0
 def where(self, condition, user_dict=None):
     """Iterate over values fulfilling a condition."""
     dset = self.blzarr
     # Get rid of the leading dimension on which we iterate
     dshape = datashape.from_numpy(dset.shape[1:], dset.dtype)
     for el in dset.where(condition):
         yield DyND_DDesc(nd.array(el, type=str(dshape)))
Example #3
0
    def __init__(self, data, dshape=None, metadata=None, layout=None,
            params=None):

        # Datashape
        # ---------

        if isinstance(dshape, basestring):
            dshape = _dshape(dshape)

        if not dshape:
            # The user just passed in a raw data source, try
            # and infer how it should be layed out or fail
            # back on dynamic types.
            self._datashape = dshape = CTableSource.infer_datashape(data)
        else:
            # The user overlayed their custom dshape on this
            # data, check if it makes sense
            CTableSource.check_datashape(data, given_dshape=dshape)
            self._datashape = dshape

        # Source
        # ------

        if isinstance(data, ByteProvider):
            self.data = data
        if isinstance(data, dict):
            ct = self.from_dict(data)
            self._axes = data.keys()

            dshape = from_numpy(ct.shape, ct.dtype)
            self.data = CTableSource(ct, dshape=dshape, params=params)
            self._datashape = dshape
        elif isinstance(data, (list, tuple)):
            self.data = CTableSource(data, dshape=dshape, params=params)
            # Pull the labels from the datashape
            self._axes = self._datashape[-1].names
        else:
            raise ValueError

        # children graph nodes
        self.children = []

        self.space = Space(self.data)

        # Layout
        # ------

        if layout:
            self._layout = layout
        elif not layout:
            self._layout = self.data.default_layout()

        # Metadata
        # --------

        self._metadata  = NDTable._metaheader + (metadata or [])

        # Parameters
        # ----------
        self.params = params
Example #4
0
 def test_from_numpy_fields(self):
     import numpy as np
     dt = np.dtype('i4,i8,f8')
     ds = datashape.from_numpy((), dt)
     self.assertEqual(ds.names, ['f0', 'f1', 'f2'])
     self.assertEqual(ds.types,
                      [datashape.int32, datashape.int64, datashape.float64])
Example #5
0
 def test_from_numpy_fields(self):
     import numpy as np
     dt = np.dtype('i4,i8,f8')
     ds = datashape.from_numpy((), dt)
     self.assertEqual(ds.names, ['f0', 'f1', 'f2'])
     self.assertEqual(ds.types,
                      [datashape.int32, datashape.int64, datashape.float64])
 def where(self, condition):
     """Iterate over values fulfilling a condition."""
     f = tb.open_file(self.path, mode='r')
     dset = f.get_node(self.datapath)
     # Get rid of the leading dimension on which we iterate
     dshape = datashape.from_numpy(dset.shape[1:], dset.dtype)
     for el in dset.where(condition):
         yield DyND_DDesc(nd.array(el[:], type=str(dshape)))
     dset._v_file.close()
Example #7
0
 def where(self, condition):
     """Iterate over values fulfilling a condition."""
     f = tb.open_file(self.path, mode='r')
     dset = f.get_node(self.datapath)
     # Get rid of the leading dimension on which we iterate
     dshape = datashape.from_numpy(dset.shape[1:], dset.dtype)
     for el in dset.where(condition):
         yield DyND_DDesc(nd.array(el[:], type=str(dshape)))
     dset._v_file.close()
 def __iter__(self):
     f = tb.open_file(self.path, mode='r')
     dset = f.get_node(self.datapath)
     # Get rid of the leading dimension on which we iterate
     dshape = datashape.from_numpy(dset.shape[1:], dset.dtype)
     for el in dset:
         if hasattr(el, "nrow"):
             yield DyND_DDesc(nd.array(el[:], type=str(dshape)))
         else:
             yield DyND_DDesc(nd.array(el, type=str(dshape)))
     dset._v_file.close()
Example #9
0
 def __iter__(self):
     f = tb.open_file(self.path, mode='r')
     dset = f.get_node(self.datapath)
     # Get rid of the leading dimension on which we iterate
     dshape = datashape.from_numpy(dset.shape[1:], dset.dtype)
     for el in dset:
         if hasattr(el, "nrow"):
             yield DyND_DDesc(nd.array(el[:], type=str(dshape)))
         else:
             yield DyND_DDesc(nd.array(el, type=str(dshape)))
     dset._v_file.close()
Example #10
0
 def test_ndarray_into_table(self, dt_tb, dt_data):
     dtype = ds.from_numpy(dt_data.shape, dt_data.dtype)
     t = PyTables(dt_tb, '/out', dtype)
     try:
         res = into(np.ndarray, into(t, dt_data, filename=dt_tb, datapath='/out'))
         for k in res.dtype.fields:
             lhs, rhs = res[k], dt_data[k]
             if (issubclass(np.datetime64, lhs.dtype.type) and
                 issubclass(np.datetime64, rhs.dtype.type)):
                 lhs, rhs = lhs.astype('M8[us]'), rhs.astype('M8[us]')
             assert np.array_equal(lhs, rhs)
     finally:
         t._v_file.close()
Example #11
0
File: h5py.py Project: MoherX/odo
def discover_h5py_dataset(d):
    dshape = datashape.from_numpy(d.shape, d.dtype)
    shape, measure = dshape.shape, dshape.measure
    if not isrecord(measure):
        if dshape == datashape.object_:
            args = shape + (datashape.string,)
            return DataShape(*args)
        return dshape
    else:
        records = list(record_dshape_replace(measure, datashape.object_,
                                             datashape.string))
        args = shape + (datashape.Record(records),)
        return DataShape(*args)
Example #12
0
File: h5py.py Project: quasiben/odo
def discover_h5py_dataset(d):
    dshape = datashape.from_numpy(d.shape, d.dtype)
    shape, measure = dshape.shape, dshape.measure
    if not isrecord(measure):
        if dshape == datashape.object_:
            args = shape + (datashape.string, )
            return DataShape(*args)
        return dshape
    else:
        records = list(
            record_dshape_replace(measure, datashape.object_,
                                  datashape.string))
        args = shape + (datashape.Record(records), )
        return DataShape(*args)
Example #13
0
    def __init__(self,
                 path,
                 datapath,
                 mode='r',
                 schema=None,
                 dshape=None,
                 **kwargs):
        self.path = path
        self.datapath = datapath
        self.mode = mode

        if schema and not dshape:
            dshape = 'var * ' + str(schema)

        # TODO: provide sane defaults for kwargs
        # Notably chunks and maxshape
        if dshape:
            dshape = datashape.dshape(dshape)
            shape = dshape.shape
            dtype = datashape.to_numpy_dtype(dshape[-1])
            if shape[0] == datashape.Var():
                kwargs['chunks'] = True
                kwargs['maxshape'] = kwargs.get('maxshape',
                                                (None, ) + shape[1:])
                shape = (0, ) + tuple(map(int, shape[1:]))

        with h5py.File(path, mode) as f:
            dset = f.get(datapath)
            if dset is None:
                if dshape is None:
                    raise ValueError('No dataset or dshape provided')
                else:
                    f.create_dataset(datapath, shape, dtype=dtype, **kwargs)
            else:
                dshape2 = datashape.from_numpy(dset.shape, dset.dtype)
                dshape = dshape2
                # TODO: test provided dshape against given dshape
                # if dshape and dshape != dshape2:
                #     raise ValueError('Inconsistent datashapes.'
                #             '\nGiven: %s\nFound: %s' % (dshape, dshape2))

        attributes = self.attributes()
        if attributes['chunks']:
            # is there a better way to do this?
            words = str(dshape).split(' * ')
            dshape = 'var * ' + ' * '.join(words[1:])
            dshape = datashape.dshape(dshape)

        self._dshape = dshape
        self._schema = schema
Example #14
0
 def discover_events(event, **kwargs):
     df = pandas.DataFrame(
         [
             list(event._asdict().values()) + [
                 event.startdate,
                 event.enddate,
                 event.duration,
             ]
             for event in [event,]
         ],
         columns=columns,
     )
     shape = (len(df),)
     dtype = df.values.dtype
     return from_numpy(shape, dtype)
Example #15
0
 def discover_events(event, **kwargs):
     df = pandas.DataFrame(
         [
             list(event._asdict().values()) + [
                 event.startdate,
                 event.enddate,
                 event.duration,
             ]
             for event in [event,]
         ],
         columns=list(self.api["schemas"]["Event"]["properties"].keys()) + [
             "startdate",
             "enddate",
             "duration",
         ]
     )
     shape = (len(df),)
     dtype = df.values.dtype
     return from_numpy(shape, dtype)
    def __init__(self, path, datapath, mode='r', schema=None, dshape=None, **kwargs):
        self.path = path
        self.datapath = datapath
        self.mode = mode

        if schema and not dshape:
            dshape = 'var * ' + str(schema)

        # TODO: provide sane defaults for kwargs
        # Notably chunks and maxshape
        if dshape:
            dshape = datashape.dshape(dshape)
            shape = dshape.shape
            dtype = datashape.to_numpy_dtype(dshape[-1])
            if shape[0] == datashape.Var():
                kwargs['chunks'] = True
                kwargs['maxshape'] = kwargs.get('maxshape', (None,) + shape[1:])
                shape = (0,) + tuple(map(int, shape[1:]))

        with h5py.File(path, mode) as f:
            dset = f.get(datapath)
            if dset is None:
                if dshape is None:
                    raise ValueError('No dataset or dshape provided')
                else:
                    f.create_dataset(datapath, shape, dtype=dtype, **kwargs)
            else:
                dshape2 = datashape.from_numpy(dset.shape, dset.dtype)
                dshape = dshape2
                # TODO: test provided dshape against given dshape
                # if dshape and dshape != dshape2:
                #     raise ValueError('Inconsistent datashapes.'
                #             '\nGiven: %s\nFound: %s' % (dshape, dshape2))

        attributes = self.attributes()
        if attributes['chunks']:
            # is there a better way to do this?
            words = str(dshape).split(' * ')
            dshape = 'var * ' + ' * '.join(words[1:])
            dshape = datashape.dshape(dshape)

        self._dshape = dshape
        self._schema = schema
Example #17
0
def promote(lhs, rhs):
    """Promote two scalar dshapes to a possibly larger, but compatibile type



    Examples
    --------
    >>> from datashape import int32, int64, Option
    >>> x = Option(int32)
    >>> y = int64
    >>> promote(x, y)
    ?int64

    Notes
    ----
    This uses ``numpy.promote_types`` for type promotion logic.  See the numpy
    documentation at
    http://docs.scipy.org/doc/numpy/reference/generated/numpy.promote_types.html
    """
    left, right = getattr(lhs, 'ty', lhs), getattr(rhs, 'ty', rhs)
    dtype = np.promote_types(datashape.to_numpy_dtype(left),
                             datashape.to_numpy_dtype(right))
    dshape = datashape.from_numpy((), dtype)
    return optionify(lhs, rhs, dshape)
 def test_ascii_string(self):
     assert (from_numpy((2,), np.dtype('S7')) ==
             dshape('2 * string[7, "ascii"]'))
 def test_date(self):
     for d in ('D', 'M', 'Y', 'W'):
         assert from_numpy((2, ),
                           np.dtype('M8[%s]' % d)) == dshape('2 * date')
Example #20
0
def discover(data):
    return datashape.from_numpy(data.shape, data.dtype)
 def test_date(self):
     for d in ('D', 'M', 'Y', 'W'):
         assert from_numpy((2,),
                           np.dtype('M8[%s]' % d)) == dshape('2 * date')
Example #22
0
 def __getitem__(self, mask):
     ct = (self.data.ca[mask])
     dshape = from_numpy(ct.shape, ct.dtype)
     source = CTableSource(ct, dshape=dshape)
     return Table(source, dshape=dshape)
Example #23
0
def discover(data):
    val = data.value
    return datashape.from_numpy(val.shape, val.dtype)
Example #24
0
File: hdf5.py Project: vitan/blaze
def discover(d):
    s = str(datashape.from_numpy(d.shape, d.dtype))
    return dshape(s.replace('object', 'string'))
Example #25
0
def discover_dask_array(a, **kwargs):
    return from_numpy(a.shape, a.dtype)
 def __iter__(self):
     dset = self.blzarr
     # Get rid of the leading dimension on which we iterate
     dshape = datashape.from_numpy(dset.shape[1:], dset.dtype)
     for el in self.blzarr:
         yield DyND_DDesc(nd.array(el, type=str(dshape)))
Example #27
0
 def __iter__(self):
     dset = self.blzarr
     # Get rid of the leading dimension on which we iterate
     dshape = datashape.from_numpy(dset.shape[1:], dset.dtype)
     for el in self.blzarr:
         yield DyND_DDesc(nd.array(el, type=str(dshape)))
 def test_string(self):
     assert (from_numpy((2, ),
                        np.dtype('U7')) == dshape('2 * string[7, "U32"]'))
 def test_ascii_string(self):
     assert (from_numpy((2, ),
                        np.dtype('S7')) == dshape('2 * string[7, "ascii"]'))
 def test_timedelta(self):
     for d in _units:
         assert from_numpy((2,),
                           np.dtype('m8[%s]' % d)) == \
             dshape('2 * timedelta[unit=%r]' % d)
Example #31
0
def handle(conn, arrname):
    """Obtain an array handle to an existing SciDB array"""
    scidbpy_arr = conn.wrap_array(arrname)
    dshape = from_numpy(scidbpy_arr.shape, scidbpy_arr.dtype)
    return SciDBDataDescriptor(dshape, Query(arrname, ()), conn)
Example #32
0
def discover_tables_node(n):
    return datashape.from_numpy(n.shape, n.dtype)
Example #33
0
 def dshape(self):
     # This cannot be cached because the BLZ can change the dshape
     obj = self.blzarr
     return datashape.from_numpy(obj.shape, obj.dtype)
Example #34
0
def discover(c):
    dshape = from_numpy(c.shape, c.dtype)
    return {
        'time64': datetime_,
        'time32': date_
    }.get(c.type, dshape.subshape[1])
Example #35
0
 def append(self, data):
     self.data.ca.append(data)
     # Update the shape
     shape, dtype = self.data.ca.shape, self.data.ca.dtype
     self._datashape = from_numpy(shape, dtype)
Example #36
0
def discover_dask_array(a, **kwargs):
    return from_numpy(a.shape, a.dtype)
Example #37
0
 def dshape(self):
     # This cannot be cached because the Array can change the dshape
     with tb.open_file(self.path, mode='r') as f:
         dset = f.get_node(self.datapath)
         odshape = datashape.from_numpy(dset.shape, dset.dtype)
     return odshape
Example #38
0
 def dshape(self):
     # This cannot be cached because the BLZ can change the dshape
     obj = self.blzarr
     return datashape.from_numpy(obj.shape, obj.dtype)
Example #39
0
def discover(t):
    return datashape.from_numpy(t.shape, t.dtype)
Example #40
0
def discover_bcolz(c, **kwargs):
    return datashape.from_numpy(c.shape, c.dtype)
 def test_datetime(self):
     keys = 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs', 'as'
     for k in keys:
         assert from_numpy((2,),
                           np.dtype('M8[%s]' % k)) == dshape('2 * datetime')
 def test_int32(self):
     assert from_numpy((2,), 'int32') == dshape('2 * int32')
     assert from_numpy((2,), 'i4') == dshape('2 * int32')
 def test_timedelta(self):
     for d in _units:
         assert from_numpy((2,),
                           np.dtype('m8[%s]' % d)) == \
             dshape('2 * timedelta[unit=%r]' % d)
 def test_struct(self):
     dtype = np.dtype([('x', '<i4'), ('y', '<i4')])
     result = from_numpy((2, ), dtype)
     assert result == dshape('2 * {x: int32, y: int32}')
 def test_string(self):
     assert (from_numpy((2,), np.dtype('U7')) ==
             dshape('2 * string[7, "U32"]'))
 def test_datetime(self):
     keys = 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs', 'as'
     for k in keys:
         assert from_numpy((2, ),
                           np.dtype('M8[%s]' % k)) == dshape('2 * datetime')
 def test_struct(self):
     dtype = np.dtype([('x', '<i4'), ('y', '<i4')])
     result = from_numpy((2,), dtype)
     assert result == dshape('2 * {x: int32, y: int32}')
 def test_int32(self):
     assert from_numpy((2, ), 'int32') == dshape('2 * int32')
     assert from_numpy((2, ), 'i4') == dshape('2 * int32')
Example #49
0
File: hdf5.py Project: B-Rich/blaze
def discover(d):
    s = str(datashape.from_numpy(d.shape, d.dtype))
    return dshape(s.replace('object', 'string'))
 def dshape(self):
     # This cannot be cached because the Array can change the dshape
     with tb.open_file(self.path, mode='r') as f:
         dset = f.get_node(self.datapath)
         odshape = datashape.from_numpy(dset.shape, dset.dtype)
     return odshape
Example #51
0
 def var_dshape(v):
     return datashape.from_numpy(v.shape, v.dtype)
def _eval_blocks(expression, vars, vlen, rowsize, vm, **kwargs):
    """Perform the evaluation in blocks."""

    # Compute the optimal block size (in elements)
    # The next is based on experiments, but YMMV
    if vm == "numexpr":
        # If numexpr, make sure that operands fit in L3 chache
        bsize = 2**20  # 1 MB is common for L3
    else:
        # If python, make sure that operands fit in L2 chache
        bsize = 2**17  # 256 KB is common for L2
    bsize //= rowsize
    # Evaluation seems more efficient if block size is a power of 2
    bsize = 2 ** (int(math.log(bsize, 2)))
    if vlen < 100*1000:
        bsize //= 8
    elif vlen < 1000*1000:
        bsize //= 4
    elif vlen < 10*1000*1000:
        bsize //= 2
    # Protection against too large rowsizes
    if bsize == 0:
        bsize = 1

    vars_ = {}
    # Convert operands into Blaze arrays and get temporaries for vars
    maxndims = 0
    for name in dict_viewkeys(vars):
        var = vars[name]
        if not hasattr(var, "dshape"):
            # Convert sequences into regular Blaze arrays
            vars[name] = var = array(var)
        if hasattr(var, "__len__"):
            ndims = len(var.dshape.shape)
            if ndims > maxndims:
                maxndims = ndims
            if len(var) > bsize:
                # Variable is too large; get a container for a chunk
                res_shape, res_dtype = datashape.to_numpy(var.dshape)
                res_shape = list(res_shape)
                res_shape[0] = bsize
                dshape = datashape.from_numpy(res_shape, res_dtype)
                vars_[name] = empty(dshape)

    if 'ddesc' in kwargs and kwargs['ddesc'] is not None:
        res_ddesc = True
    else:
        res_ddesc = False

    for i in xrange(0, vlen, bsize):
        # Correction for the block size
        if i+bsize > vlen:
            bsize = vlen - i
        # Get buffers for vars
        for name in dict_viewkeys(vars):
            var = vars[name]
            if hasattr(var, "__len__") and len(var) > bsize:
                vars_[name] = var[i:i+bsize]
            else:
                if hasattr(var, "__getitem__"):
                    vars_[name] = var[:]
                else:
                    vars_[name] = var

        # Perform the evaluation for this block
        # We need array evals
        if vm == "python":
            res_block = eval(expression, vars_)
            dynd_block = blaze_eval(res_block).ddesc.dynd_arr()
        else:
            res_block = numexpr.evaluate(expression, local_dict=vars_)
            # numexpr returns a numpy array, and we need dynd/blaze ones
            dynd_block = nd.array(res_block)
            res_block = array(res_block)

        if i == 0:
            scalar = False
            dim_reduction = False
            # Detection of reduction operations
            if res_block.dshape.shape == ():
                scalar = True
                result = dynd_block
                continue
            elif len(res_block.dshape.shape) < maxndims:
                dim_reduction = True
                result = dynd_block
                continue
            block_shape, block_dtype = datashape.to_numpy(res_block.dshape)
            out_shape = list(block_shape)
            if res_ddesc:
                out_shape[0] = 0
                dshape = datashape.from_numpy(out_shape, block_dtype)
                result = empty(dshape, **kwargs)
                append(result, dynd_block)
            else:
                out_shape[0] = vlen
                dshape = datashape.from_numpy(out_shape, block_dtype)
                result = empty(dshape, **kwargs)
                # The next is a workaround for bug #183
                #result[:bsize] = res_block
                result[:bsize] = dynd_block
        else:
            if scalar:
                result += dynd_block
                result = result.eval()
            elif dim_reduction:
                if len(res_block) < len(result):
                    result[:bsize] += dynd_block
                else:
                    result += dynd_block
                result = result.eval()
            elif res_ddesc:
                append(result, dynd_block)
            else:
                # The next is a workaround for bug #183
                #result[i:i+bsize] = res_block
                result[i:i+bsize] = dynd_block

    # Scalars and dim reductions generate dynd array for workaround
    # different issues in Blaze array operations (see #197)
    if isinstance(result, nd.array):
        if scalar:
            return array(result)
        else:
            # If not an scalar pass the arguments (persistency, etc.)
            return array(result, **kwargs)
    return result
def handle(conn, arrname):
    """Obtain an array handle to an existing SciDB array"""
    scidbpy_arr = conn.wrap_array(arrname)
    dshape = from_numpy(scidbpy_arr.shape, scidbpy_arr.dtype)
    return SciDB_DDesc(dshape, Query(arrname, ()), conn)
Example #54
0
 def dshape(self):
     # This cannot be cached because the Array can change the dshape
     with tb.open_file(self.filename, mode='r') as f:
         h5arr = f.get_node(f.root, self.datapath)
         odshape = datashape.from_numpy(h5arr.shape, h5arr.dtype)
     return odshape
Example #55
0
def discover(c):
    dshape = from_numpy(c.shape, c.dtype)
    return {'time64': datetime_, 'time32': date_}.get(c.type,
                                                      dshape.subshape[1])
 def dshape(self):
     # This cannot be cached because the Array can change the dshape
     with netCDF4.Dataset(self.path, mode='r') as f:
         dset = get_node(f, self.datapath)
         odshape = datashape.from_numpy(dset.shape, dset.dtype)
     return odshape