Beispiel #1
0
def ones(dshape, caps={'efficient-write': True}, storage=None):
    """Create an array and fill it with ones.

    Parameters
    ----------
    dshape : datashape
        The datashape for the resulting array.

    caps : capabilities dictionary
        A dictionary containing the desired capabilities of the array.

    storage : Storage instance
        A Storage object with the necessary info for data storage.

    Returns
    -------
    out: a concrete blaze array.

    """
    dshape = _normalize_dshape(dshape)
    storage = _storage_convert(storage)

    if storage is not None:
        shape, dt = to_numpy(dshape)
        dd = BLZDataDescriptor(blz.ones(shape, dt,
                                        rootdir=storage.path))
    elif 'efficient-write' in caps:
        # TODO: Handle var dimension properly (raise exception?)
        dyndarr = nd.empty(str(dshape))
        dyndarr[...] = True
        dd = DyNDDataDescriptor(dyndarr)
    elif 'compress' in caps:
        shape, dt = to_numpy(dshape)
        dd = BLZDataDescriptor(blz.ones(shape, dt))
    return Array(dd)
Beispiel #2
0
def ones(dshape, caps={'efficient-write': True}, storage=None):
    """Create an array and fill it with ones.

    Parameters
    ----------
    dshape : datashape
        The datashape for the resulting array.

    caps : capabilities dictionary
        A dictionary containing the desired capabilities of the array.

    storage : Storage instance
        A Storage object with the necessary info for data storage.

    Returns
    -------
    out: a concrete blaze array.

    """
    dshape = _normalize_dshape(dshape)
    storage = _storage_convert(storage)

    if storage is not None:
        shape, dt = to_numpy(dshape)
        dd = BLZDataDescriptor(blz.ones(shape, dt, rootdir=storage.path))
    elif 'efficient-write' in caps:
        # TODO: Handle var dimension properly (raise exception?)
        dyndarr = nd.empty(str(dshape))
        dyndarr[...] = True
        dd = DyNDDataDescriptor(dyndarr)
    elif 'compress' in caps:
        shape, dt = to_numpy(dshape)
        dd = BLZDataDescriptor(blz.ones(shape, dt))
    return Array(dd)
Beispiel #3
0
def dataset_from_dshape(file, datapath, ds, **kwargs):
    dtype = varlen_dtype(to_numpy_dtype(ds))
    if datashape.var not in list(ds):
        shape = to_numpy(ds)[0]
    elif datashape.var not in list(ds)[1:]:
        shape = (0,) + to_numpy(ds.subshape[0])[0]
    else:
        raise ValueError("Don't know how to handle varlen nd shapes")

    if shape:
        kwargs['chunks'] = kwargs.get('chunks', True)
        kwargs['maxshape'] = kwargs.get('maxshape', (None,) + shape[1:])

    kwargs2 = keyfilter(h5py_attributes.__contains__, kwargs)
    return file.require_dataset(datapath, shape=shape, dtype=dtype, **kwargs2)
def ones(dshape, ddesc=None):
    """Create an array and fill it with ones.

    Parameters
    ----------
    dshape : datashape
        The datashape for the resulting array.

    ddesc : data descriptor instance
        This comes with the necessary info for storing the data.  If
        None, a DyND_DDesc will be used.

    Returns
    -------
    out: a concrete blaze array.

    """
    dshape = _normalize_dshape(dshape)

    if ddesc is None:
        ddesc = DyND_DDesc(nd.ones(str(dshape), access='rw'))
        return Array(ddesc)
    if isinstance(ddesc, BLZ_DDesc):
        shape, dt = to_numpy(dshape)
        ddesc.blzarr = blz.ones(
            shape, dt, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs)
    elif isinstance(ddesc, HDF5_DDesc):
        obj = nd.as_numpy(nd.empty(str(dshape)))
        with tb.open_file(ddesc.path, mode=ddesc.mode) as f:
            where, name = split_path(ddesc.datapath)
            f.create_earray(where, name, filters=ddesc.filters, obj=obj)
        ddesc.mode = 'a'  # change into 'a'ppend mode for further operations
    return Array(ddesc)
Beispiel #5
0
def empty(dshape, caps={'efficient-write': True}, storage=None):
    """Create an array with uninitialized data.

    Parameters
    ----------
    dshape : datashape
        The datashape for the resulting array.

    caps : capabilities dictionary
        A dictionary containing the desired capabilities of the array.

    storage : Storage instance
        A Storage object with the necessary info for data storage.

    Returns
    -------
    out : a concrete blaze array.

    """
    dshape = _normalize_dshape(dshape)
    storage = _storage_convert(storage)

    if storage is not None:
        shape, dt = to_numpy(dshape)
        dd = BLZDataDescriptor(blz.zeros(shape, dt,
                                         rootdir=storage.path))
    elif 'efficient-write' in caps:
        dd = DyNDDataDescriptor(nd.empty(str(dshape)))
    elif 'compress' in caps:
        dd = BLZDataDescriptor(blz.zeros(shape, dt))
    return Array(dd)
Beispiel #6
0
def dataset_from_dshape(file, datapath, ds, **kwargs):
    dtype = varlen_dtype(to_numpy_dtype(ds))

    if datashape.var not in list(ds):
        shape = to_numpy(ds)[0]
    elif datashape.var not in list(ds)[1:]:
        shape = (0, ) + to_numpy(ds.subshape[0])[0]
    else:
        raise ValueError("Don't know how to handle varlen nd shapes")

    if shape:
        kwargs['chunks'] = kwargs.get('chunks', True)
        kwargs['maxshape'] = kwargs.get('maxshape', (None, ) + shape[1:])

    kwargs2 = keyfilter(h5py_attributes.__contains__, kwargs)
    return file.require_dataset(datapath, shape=shape, dtype=dtype, **kwargs2)
Beispiel #7
0
def empty(dshape, caps={'efficient-write': True}, storage=None):
    """Create an array with uninitialized data.

    Parameters
    ----------
    dshape : datashape
        The datashape for the resulting array.

    caps : capabilities dictionary
        A dictionary containing the desired capabilities of the array.

    storage : Storage instance
        A Storage object with the necessary info for data storage.

    Returns
    -------
    out : a concrete blaze array.

    """
    dshape = _normalize_dshape(dshape)
    storage = _storage_convert(storage)

    if storage is not None:
        shape, dt = to_numpy(dshape)
        dd = BLZDataDescriptor(blz.zeros(shape, dt, rootdir=storage.path))
    elif 'efficient-write' in caps:
        dd = DyNDDataDescriptor(nd.empty(str(dshape)))
    elif 'compress' in caps:
        dd = BLZDataDescriptor(blz.zeros(shape, dt))
    return Array(dd)
Beispiel #8
0
def interpret(func, env, args, storage=None, **kwds):
    assert len(args) == len(func.args)

    # Make a copy, since we're going to mutate our IR!
    func, _ = copy_function(func)

    # If it's a BLZ output, we want an interpreter that streams
    # the processing through in chunks
    if storage is not None:
        if len(func.type.restype.shape) == 0:
            raise TypeError('Require an array, not a scalar, for outputting to BLZ')
        env['stream-outer'] = True
        result_ndim = env['result-ndim'] = len(func.type.restype.shape)
    else:
        # Convert any persistent inputs to memory
        # TODO: should stream the computation in this case
        for i, arg in enumerate(args):
            if isinstance(arg._data, BLZDataDescriptor):
                args[i] = arg[:]

    # Update environment with dynd type information
    dynd_types = dict((arg, get_dynd_type(array))
                          for arg, array in zip(func.args, args)
                              if isinstance(array._data, DyNDDataDescriptor))
    env['dynd-types'] = dynd_types

    # Lift ckernels
    func, env = run_pipeline(func, env, run_time_passes)

    if storage is None:
        # Evaluate once
        values = dict(zip(func.args, args))
        interp = CKernelInterp(values)
        visit(interp, func)
        return interp.result
    else:
        res_shape, res_dt = datashape.to_numpy(func.type.restype)
        dim_size = operator.index(res_shape[0])
        row_size = ndt.type(str(func.type.restype.subarray(1))).data_size
        chunk_size = min(max(1, (1024*1024) // row_size), dim_size)
        # Evaluate by streaming the outermost dimension,
        # and using the BLZ data descriptor's append
        dst_dd = BLZDataDescriptor(blz.zeros((0,)+res_shape[1:], res_dt,
                                             rootdir=storage.path))
        # Loop through all the chunks
        for chunk_start in range(0, dim_size, chunk_size):
            # Tell the interpreter which chunk size to use (last
            # chunk might be smaller)
            chunk_size = min(chunk_size, dim_size - chunk_start)
            # Evaluate the chunk
            args_chunk = [arg[chunk_start:chunk_start+chunk_size]
                            if len(arg.dshape.shape) == result_ndim
                            else arg for arg in args]
            values = dict(zip(func.args, args_chunk))
            interp = CKernelChunkInterp(values, chunk_size, result_ndim)
            visit(interp, func)
            chunk = interp.result._data.dynd_arr()
            dst_dd.append(chunk)
        return blaze.Array(dst_dd)
Beispiel #9
0
def compute_up(expr, data, **kwargs):
    shape, dtype = to_numpy(expr.dshape)
    if shape:
        result = np.empty(shape=shape, dtype=dtype)
        for n, v in zip(expr.names, expr.values):
            result[n] = compute(axify(v, expr.axis, expr.keepdims), data)
        return result
    else:
        return tuple(compute(axify(v, expr.axis), data) for v in expr.values)
Beispiel #10
0
def compute_up(expr, data, **kwargs):
    shape, dtype = to_numpy(expr.dshape)
    if shape:
        result = np.empty(shape=shape, dtype=dtype)
        for n, v in zip(expr.names, expr.values):
            result[n] = compute(axify(v, expr.axis, expr.keepdims), data)
        return result
    else:
        return tuple(compute(axify(v, expr.axis), data) for v in expr.values)
def test_summary_on_ndarray_with_axis():
    for axis in [0, 1, (1, 0)]:
        expr = summary(total=a.sum(), min=a.min(), axis=axis)
        result = compute(expr, ax)

        shape, dtype = to_numpy(expr.dshape)
        expected = np.empty(shape=shape, dtype=dtype)
        expected['total'] = ax.sum(axis=axis)
        expected['min'] = ax.min(axis=axis)

        assert eq(result, expected)
Beispiel #12
0
def test_summary_on_ndarray_with_axis():
    for axis in [0, 1, (1, 0)]:
        expr = summary(total=a.sum(), min=a.min(), axis=axis)
        result = compute(expr, ax)

        shape, dtype = to_numpy(expr.dshape)
        expected = np.empty(shape=shape, dtype=dtype)
        expected['total'] = ax.sum(axis=axis)
        expected['min'] = ax.min(axis=axis)

        assert eq(result, expected)
Beispiel #13
0
 def append(self, values):
     """Append a list of values."""
     shape, dtype = datashape.to_numpy(self.dshape)
     values_arr = np.array(values, dtype=dtype)
     shape_vals = values_arr.shape
     if len(shape_vals) < len(shape):
         shape_vals = (1, ) + shape_vals
     if len(shape_vals) != len(shape):
         raise ValueError("shape of values is not compatible")
     # Now, do the actual append
     self.blzarr.append(values_arr.reshape(shape_vals))
     self.blzarr.flush()
 def append(self, values):
     """Append a list of values."""
     shape, dtype = datashape.to_numpy(self.dshape)
     values_arr = np.array(values, dtype=dtype)
     shape_vals = values_arr.shape
     if len(shape_vals) < len(shape):
         shape_vals = (1,) + shape_vals
     if len(shape_vals) != len(shape):
         raise ValueError("shape of values is not compatible")
     # Now, do the actual append
     self.blzarr.append(values_arr.reshape(shape_vals))
     self.blzarr.flush()
Beispiel #15
0
 def append(self, values):
     """Append a list of values."""
     shape, dtype = datashape.to_numpy(self.dshape)
     values_arr = np.array(values, dtype=dtype)
     shape_vals = values_arr.shape
     if len(shape_vals) < len(shape):
         shape_vals = (1, ) + shape_vals
     if len(shape_vals) != len(shape):
         raise ValueError("shape of values is not compatible")
     # Now, do the actual append
     with tb.open_file(self.filename, mode='a') as f:
         h5arr = f.get_node(f.root, self.datapath)
         h5arr.append(values_arr.reshape(shape_vals))
 def append(self, values):
     """Append a list of values."""
     shape, dtype = datashape.to_numpy(self.dshape)
     values_arr = np.array(values, dtype=dtype)
     shape_vals = values_arr.shape
     if len(shape_vals) < len(shape):
         shape_vals = (1,) + shape_vals
     if len(shape_vals) != len(shape):
         raise ValueError("shape of values is not compatible")
     # Now, do the actual append
     with tb.open_file(self.path, mode=self.mode) as f:
         dset = f.get_node(self.datapath)
         dset.append(values_arr.reshape(shape_vals))
Beispiel #17
0
def compute_down(expr, data, **kwargs):
    """ Compute expressions on H5Py datasets by operating on chunks

    This uses blaze.expr.split to break a full-array-computation into a
    per-chunk computation and a on-aggregate computation.

    This uses blaze.partition to pick out chunks from the h5py dataset, uses
    compute(numpy) to compute on each chunk and then uses blaze.partition to
    aggregate these (hopefully smaller) intermediate results into a local
    numpy array.  It then performs a second operation (again given by
    blaze.expr.split) on this intermediate aggregate

    The expression must contain some sort of Reduction.  Both the intermediate
    result and the final result are assumed to fit into memory
    """
    leaf = expr._leaves()[0]
    if not any(isinstance(node, Reduction) for node in path(expr, leaf)):
        raise MDNotImplementedError()

    # Compute chunksize (this should be improved)
    chunksize = kwargs.get('chunksize', data.chunks)

    # Split expression into per-chunk and on-aggregate pieces
    chunk = Symbol('chunk', DataShape(*(chunksize + (leaf.dshape.measure,))))
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(leaf, expr, chunk=chunk)

    # Create numpy array to hold intermediate aggregate
    shape, dtype = to_numpy(agg.dshape)
    intermediate = np.empty(shape=shape, dtype=dtype)

    # Compute partitions
    data_partitions = partitions(data, chunksize=chunksize)
    int_partitions = partitions(intermediate, chunksize=chunk_expr.shape)

    # For each partition, compute chunk->chunk_expr
    # Insert into intermediate
    # This could be parallelized
    for d, i in zip(data_partitions, int_partitions):
        chunk_data = partition_get(data, d, chunksize=chunksize)
        result = compute(chunk_expr, {chunk: chunk_data})
        partition_set(intermediate, i, result, chunksize=chunk_expr.shape)

    # Compute on the aggregate
    return compute(agg_expr, {agg: intermediate})
Beispiel #18
0
def compute_down(expr, data, **kwargs):
    """ Compute expressions on H5Py datasets by operating on chunks

    This uses blaze.expr.split to break a full-array-computation into a
    per-chunk computation and a on-aggregate computation.

    This uses blaze.partition to pick out chunks from the h5py dataset, uses
    compute(numpy) to compute on each chunk and then uses blaze.partition to
    aggregate these (hopefully smaller) intermediate results into a local
    numpy array.  It then performs a second operation (again given by
    blaze.expr.split) on this intermediate aggregate

    The expression must contain some sort of Reduction.  Both the intermediate
    result and the final result are assumed to fit into memory
    """
    leaf = expr._leaves()[0]
    if not any(isinstance(node, Reduction) for node in path(expr, leaf)):
        raise MDNotImplementedError()

    # Compute chunksize (this should be improved)
    chunksize = kwargs.get('chunksize', data.chunks)

    # Split expression into per-chunk and on-aggregate pieces
    chunk = Symbol('chunk', DataShape(*(chunksize + (leaf.dshape.measure, ))))
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(leaf, expr, chunk=chunk)

    # Create numpy array to hold intermediate aggregate
    shape, dtype = to_numpy(agg.dshape)
    intermediate = np.empty(shape=shape, dtype=dtype)

    # Compute partitions
    data_partitions = partitions(data, chunksize=chunksize)
    int_partitions = partitions(intermediate, chunksize=chunk_expr.shape)

    # For each partition, compute chunk->chunk_expr
    # Insert into intermediate
    # This could be parallelized
    for d, i in zip(data_partitions, int_partitions):
        chunk_data = partition_get(data, d, chunksize=chunksize)
        result = compute(chunk_expr, {chunk: chunk_data})
        partition_set(intermediate, i, result, chunksize=chunk_expr.shape)

    # Compute on the aggregate
    return compute(agg_expr, {agg: intermediate})
Beispiel #19
0
def compute_down(expr, data, map=None, **kwargs):
    """ Compute expressions on H5Py datasets by operating on chunks

    This uses blaze.expr.split to break a full-array-computation into a
    per-chunk computation and a on-aggregate computation.

    This uses blaze.partition to pick out chunks from the h5py dataset, uses
    compute(numpy) to compute on each chunk and then uses blaze.partition to
    aggregate these (hopefully smaller) intermediate results into a local
    numpy array.  It then performs a second operation (again given by
    blaze.expr.split) on this intermediate aggregate

    The expression must contain some sort of Reduction.  Both the intermediate
    result and the final result are assumed to fit into memory
    """
    map = _get_map(map)

    leaf = expr._leaves()[0]
    if not any(isinstance(node, Reduction) for node in path(expr, leaf)):
        raise MDNotImplementedError()

    # Compute chunksize (this should be improved)
    chunksize = kwargs.get('chunksize', data.chunks)

    # Split expression into per-chunk and on-aggregate pieces
    chunk = symbol('chunk', DataShape(*(chunksize + (leaf.dshape.measure,))))
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(leaf, expr, chunk=chunk)

    # Create numpy array to hold intermediate aggregate
    shape, dtype = to_numpy(agg.dshape)
    intermediate = np.empty(shape=shape, dtype=dtype)

    # Compute partitions
    source_parts = list(partitions(data, chunksize=chunksize, keepdims=True))
    target_parts = list(partitions(intermediate, chunksize=chunk_expr.shape,
                                   keepdims=True))

    list(map(
        curry(compute_chunk, data, intermediate, chunk, chunk_expr),
        zip(source_parts, target_parts)
    ))

    # Compute on the aggregate
    return compute(agg_expr, {agg: intermediate}, return_type='native')
Beispiel #20
0
def interpret(func, env, storage=None, **kwds):
    args = env['runtime.arglist']

    if storage is None:
        # Evaluate once
        values = dict(zip(func.args, args))
        interp = CKernelInterp(values)
        visit(interp, func)
        return interp.result
    else:
        result_ndim = env['result-ndim']

        res_shape, res_dt = datashape.to_numpy(func.type.restype)
        dim_size = operator.index(res_shape[0])
        row_size = ndt.type(str(func.type.restype.subarray(1))).data_size
        chunk_size = min(max(1, (1024 * 1024) // row_size), dim_size)
        # Evaluate by streaming the outermost dimension,
        # and using the BLZ data descriptor's append
        dst_dd = BLZDataDescriptor(
            blz.zeros((0, ) + res_shape[1:], res_dt, rootdir=storage.path))
        # Loop through all the chunks
        for chunk_start in range(0, dim_size, chunk_size):
            # Tell the interpreter which chunk size to use (last
            # chunk might be smaller)
            chunk_size = min(chunk_size, dim_size - chunk_start)
            # Evaluate the chunk
            args_chunk = [
                arg[chunk_start:chunk_start + chunk_size]
                if len(arg.dshape.shape) == result_ndim else arg
                for arg in args
            ]
            values = dict(zip(func.args, args_chunk))
            interp = CKernelChunkInterp(values, chunk_size, result_ndim)
            visit(interp, func)
            chunk = interp.result._data.dynd_arr()
            dst_dd.append(chunk)

        return blaze.Array(dst_dd)
def interpret(func, env, ddesc=None, **kwds):
    args = env['runtime.arglist']

    if ddesc is None:
        # Evaluate once
        values = dict(zip(func.args, args))
        interp = CKernelInterp(values)
        visit(interp, func)
        return interp.result
    else:
        result_ndim = env['result-ndim']

        res_shape, res_dt = datashape.to_numpy(func.type.restype)
        dim_size = operator.index(res_shape[0])
        row_size = ndt.type(str(func.type.restype.subarray(1))).default_data_size
        chunk_size = min(max(1, (1024*1024) // row_size), dim_size)
        # Evaluate by streaming the outermost dimension,
        # and using the BLZ data descriptor's append
        ddesc.blzarr = blz.zeros((0,)+res_shape[1:], res_dt,
                                 rootdir=ddesc.path, mode=ddesc.mode)
        # Loop through all the chunks
        for chunk_start in range(0, dim_size, chunk_size):
            # Tell the interpreter which chunk size to use (last
            # chunk might be smaller)
            chunk_size = min(chunk_size, dim_size - chunk_start)
            # Evaluate the chunk
            args_chunk = [arg[chunk_start:chunk_start+chunk_size]
                            if len(arg.dshape.shape) == result_ndim
                            else arg for arg in args]
            values = dict(zip(func.args, args_chunk))
            interp = CKernelChunkInterp(values, chunk_size, result_ndim)
            visit(interp, func)
            chunk = interp.result.ddesc.dynd_arr()
            ddesc.append(chunk)

        return blaze.Array(ddesc)
def ones(dshape, ddesc=None):
    """Create an array and fill it with ones.

    Parameters
    ----------
    dshape : datashape
        The datashape for the resulting array.

    ddesc : data descriptor instance
        This comes with the necessary info for storing the data.  If
        None, a DyND_DDesc will be used.

    Returns
    -------
    out: a concrete blaze array.

    """
    dshape = _normalize_dshape(dshape)

    if ddesc is None:
        ddesc = DyND_DDesc(nd.ones(str(dshape), access='rw'))
        return Array(ddesc)
    if isinstance(ddesc, BLZ_DDesc):
        shape, dt = to_numpy(dshape)
        ddesc.blzarr = blz.ones(shape,
                                dt,
                                rootdir=ddesc.path,
                                mode=ddesc.mode,
                                **ddesc.kwargs)
    elif isinstance(ddesc, HDF5_DDesc):
        obj = nd.as_numpy(nd.empty(str(dshape)))
        with tb.open_file(ddesc.path, mode=ddesc.mode) as f:
            where, name = split_path(ddesc.datapath)
            f.create_earray(where, name, filters=ddesc.filters, obj=obj)
        ddesc.mode = 'a'  # change into 'a'ppend mode for further operations
    return Array(ddesc)
Beispiel #23
0
def create_np_ndarray(_, dshape=None, **kwargs):
    shape, dtype = datashape.to_numpy(dshape)
    return np.empty(shape=shape, dtype=dtype)
Beispiel #24
0
 def test_to_numpy_fields(self):
     import numpy as np
     ds = datashape.dshape('{x: int32, y: float32}')
     shape, dt = datashape.to_numpy(ds)
     self.assertEqual(shape, ())
     self.assertEqual(dt, np.dtype([('x', 'int32'), ('y', 'float32')]))
Beispiel #25
0
def _to_numpy(ds):
    res = to_numpy(ds)
    res = res if type(res) is tuple else ((), to_numpy_dtype(ds))
    return res
Beispiel #26
0
 def test_to_numpy_fields(self):
     import numpy as np
     ds = datashape.dshape('{x: int32, y: float32}')
     shape, dt = datashape.to_numpy(ds)
     self.assertEqual(shape, ())
     self.assertEqual(dt, np.dtype([('x', 'int32'), ('y', 'float32')]))
def test_to_numpy_fails():
    ds = var * int32
    with pytest.raises(TypeError):
        to_numpy(ds)
    with pytest.raises(TypeError):
        to_numpy(Option(int32))
def _eval_blocks(expression, vars, vlen, rowsize, vm, **kwargs):
    """Perform the evaluation in blocks."""

    # Compute the optimal block size (in elements)
    # The next is based on experiments, but YMMV
    if vm == "numexpr":
        # If numexpr, make sure that operands fit in L3 chache
        bsize = 2**20  # 1 MB is common for L3
    else:
        # If python, make sure that operands fit in L2 chache
        bsize = 2**17  # 256 KB is common for L2
    bsize //= rowsize
    # Evaluation seems more efficient if block size is a power of 2
    bsize = 2 ** (int(math.log(bsize, 2)))
    if vlen < 100*1000:
        bsize //= 8
    elif vlen < 1000*1000:
        bsize //= 4
    elif vlen < 10*1000*1000:
        bsize //= 2
    # Protection against too large rowsizes
    if bsize == 0:
        bsize = 1

    vars_ = {}
    # Convert operands into Blaze arrays and get temporaries for vars
    maxndims = 0
    for name in dict_viewkeys(vars):
        var = vars[name]
        if not hasattr(var, "dshape"):
            # Convert sequences into regular Blaze arrays
            vars[name] = var = array(var)
        if hasattr(var, "__len__"):
            ndims = len(var.dshape.shape)
            if ndims > maxndims:
                maxndims = ndims
            if len(var) > bsize:
                # Variable is too large; get a container for a chunk
                res_shape, res_dtype = datashape.to_numpy(var.dshape)
                res_shape = list(res_shape)
                res_shape[0] = bsize
                dshape = datashape.from_numpy(res_shape, res_dtype)
                vars_[name] = empty(dshape)

    if 'ddesc' in kwargs and kwargs['ddesc'] is not None:
        res_ddesc = True
    else:
        res_ddesc = False

    for i in xrange(0, vlen, bsize):
        # Correction for the block size
        if i+bsize > vlen:
            bsize = vlen - i
        # Get buffers for vars
        for name in dict_viewkeys(vars):
            var = vars[name]
            if hasattr(var, "__len__") and len(var) > bsize:
                vars_[name] = var[i:i+bsize]
            else:
                if hasattr(var, "__getitem__"):
                    vars_[name] = var[:]
                else:
                    vars_[name] = var

        # Perform the evaluation for this block
        # We need array evals
        if vm == "python":
            res_block = eval(expression, vars_)
            dynd_block = blaze_eval(res_block).ddesc.dynd_arr()
        else:
            res_block = numexpr.evaluate(expression, local_dict=vars_)
            # numexpr returns a numpy array, and we need dynd/blaze ones
            dynd_block = nd.array(res_block)
            res_block = array(res_block)

        if i == 0:
            scalar = False
            dim_reduction = False
            # Detection of reduction operations
            if res_block.dshape.shape == ():
                scalar = True
                result = dynd_block
                continue
            elif len(res_block.dshape.shape) < maxndims:
                dim_reduction = True
                result = dynd_block
                continue
            block_shape, block_dtype = datashape.to_numpy(res_block.dshape)
            out_shape = list(block_shape)
            if res_ddesc:
                out_shape[0] = 0
                dshape = datashape.from_numpy(out_shape, block_dtype)
                result = empty(dshape, **kwargs)
                append(result, dynd_block)
            else:
                out_shape[0] = vlen
                dshape = datashape.from_numpy(out_shape, block_dtype)
                result = empty(dshape, **kwargs)
                # The next is a workaround for bug #183
                #result[:bsize] = res_block
                result[:bsize] = dynd_block
        else:
            if scalar:
                result += dynd_block
                result = result.eval()
            elif dim_reduction:
                if len(res_block) < len(result):
                    result[:bsize] += dynd_block
                else:
                    result += dynd_block
                result = result.eval()
            elif res_ddesc:
                append(result, dynd_block)
            else:
                # The next is a workaround for bug #183
                #result[i:i+bsize] = res_block
                result[i:i+bsize] = dynd_block

    # Scalars and dim reductions generate dynd array for workaround
    # different issues in Blaze array operations (see #197)
    if isinstance(result, nd.array):
        if scalar:
            return array(result)
        else:
            # If not an scalar pass the arguments (persistency, etc.)
            return array(result, **kwargs)
    return result
def test_to_numpy_fails():
    ds = var * int32
    with pytest.raises(TypeError):
        to_numpy(ds)
    with pytest.raises(TypeError):
        to_numpy(Option(int32))
Beispiel #30
0
def _to_numpy(ds):
    res = to_numpy(ds)
    res = res if type(res) is tuple else ((), to_numpy_dtype(ds))
    return res