Beispiel #1
0
 def test_array(self):
     t = fromiter(self.gen(), self.ds, self.p)
     shape, dtype = to_numpy(t.datashape)
     shape_orig, dtype_orig = to_numpy(self.ds)
     self.assertEqual(dtype, dtype_orig)
     self.assertEqual(len(shape), 1)
     self.assertEqual(shape[0], self.count)
Beispiel #2
0
 def test_array(self):
     t = fromiter(self.gen(), self.ds, self.p)
     shape, dtype = to_numpy(t.datashape)
     shape_orig, dtype_orig = to_numpy(self.ds)
     self.assertEqual(dtype, dtype_orig)
     self.assertEqual(len(shape), 1)
     self.assertEqual(shape[0], self.count)
Beispiel #3
0
def run_test(in_memory, args):
    T = Terminal

    print "opening blaze arrays..."
    x = blaze.open(_persistent_array_names[0])
    y = blaze.open(_persistent_array_names[1])
    z = blaze.open(_persistent_array_names[2])
    w = blaze.open(_persistent_array_names[3])
    shape, dtype = blaze.to_numpy(x.datashape)
    print "***nelements:", shape[0]

    if in_memory:
        print "getting an in-memory version of blaze arrays..."
        params = blaze.params(clevel=9)
        t0 = time()
        x = blaze.array(x[:], params=params)
        y = blaze.array(y[:], params=params)
        z = blaze.array(z[:], params=params)
        w = blaze.array(w[:], params=params)
        print "conversion to blaze in-memory: %.3f" % (time() - t0)

    print "datashape is:", x.datashape

    print "evaluating expression with blir..."
    expr = (T(x) + T(y)).dot(T(2.0) * T(z) + T(2.0) * T(w))

    if "print_expr" in args:
        print expr.gen_blir()[1]

    t_ce = time()
    result_ce = chunked_eval(expr, chunk_size=50000)
    t_ce = time() - t_ce
    print "blir chunked result is : %s in %f s" % (result_ce, t_ce)
    print "***blir time: %.3f" % t_ce

    # in numpy...
    t0 = time()
    x = x[:]
    y = y[:]
    z = z[:]
    w = w[:]
    print "conversion to numpy in-memory: %.3f" % (time() - t0)

    print "evaluating expression with numpy..."
    t_np = time()
    result_np = np.dot(x + y, 2.0 * z + 2.0 * w)
    t_np = time() - t_np

    print "numpy result is : %s in %f s" % (result_np, t_np)
    print "***numpy time: %.3f" % t_np

    print "**** %d, %.5f, %.5f" % (shape[0], t_ce, t_np)
Beispiel #4
0
def run_test(in_memory, args):
    T = Terminal

    print 'opening blaze arrays...'
    x = blaze.open(_persistent_array_names[0])
    y = blaze.open(_persistent_array_names[1])
    z = blaze.open(_persistent_array_names[2])
    w = blaze.open(_persistent_array_names[3])
    shape, dtype = blaze.to_numpy(x.datashape)
    print "***nelements:", shape[0]

    if in_memory:
        print 'getting an in-memory version of blaze arrays...'
        params = blaze.params(clevel=9)
        t0 = time()
        x = blaze.array(x[:], params=params)
        y = blaze.array(y[:], params=params)
        z = blaze.array(z[:], params=params)
        w = blaze.array(w[:], params=params)
        print "conversion to blaze in-memory: %.3f" % (time() - t0)

    print 'datashape is:', x.datashape

    print 'evaluating expression with blir...'
    expr = (T(x) + T(y)).dot(T(2.0) * T(z) + T(2.0) * T(w))

    if 'print_expr' in args:
        print expr.gen_blir()[1]

    t_ce = time()
    result_ce = chunked_eval(expr, chunk_size=50000)
    t_ce = time() - t_ce
    print 'blir chunked result is : %s in %f s' % (result_ce, t_ce)
    print '***blir time: %.3f' % t_ce

    # in numpy...
    t0 = time()
    x = x[:]
    y = y[:]
    z = z[:]
    w = w[:]
    print "conversion to numpy in-memory: %.3f" % (time() - t0)

    print 'evaluating expression with numpy...'
    t_np = time()
    result_np = np.dot(x + y, 2.0 * z + 2.0 * w)
    t_np = time() - t_np

    print 'numpy result is : %s in %f s' % (result_np, t_np)
    print '***numpy time: %.3f' % t_np

    print '**** %d, %.5f, %.5f' % (shape[0], t_ce, t_np)
def chunked_dot(a, b, chunk_size=1024):

    a_shape, a_dtype = blaze.to_numpy(a.datashape)
    total_size = a_shape[0]
    accum = 0.0
    offset = 0

    while offset < total_size:
        accum += np.dot(a[offset:offset+chunk_size],
                        b[offset:offset+chunk_size])
        offset += chunk_size
        
    return accum
Beispiel #6
0
def create_persistent_arrays(elements, clevel):
    dshape = str(elements) + ", float64"

    try:
        dshape = blaze.dshape(dshape)
    except:
        print elements + " is not a valid size for the arrays"
        return

    for name in _persistent_array_names:
        # First create a numpy container
        shape, dtype = blaze.to_numpy(dshape)
        n = np.sin(np.linspace(0, 10 * math.pi, shape[0]))
        _create_persistent_array(name, n, clevel)
Beispiel #7
0
def create_persistent_arrays(elements, clevel):
    dshape = str(elements) + ', float64'

    try:
        dshape = blaze.dshape(dshape)
    except:
        print elements + ' is not a valid size for the arrays'
        return

    for name in _persistent_array_names:
        # First create a numpy container
        shape, dtype = blaze.to_numpy(dshape)
        n = np.sin(np.linspace(0, 10 * math.pi, shape[0]))
        _create_persistent_array(name, n, clevel)
Beispiel #8
0
def evaluate(expression, chunk_size, vm="python", out_flavor="blaze", user_dict={}, **kwargs):
    """
    evaluate(expression,
             vm=None,
             out_flavor=None,
             chunk_size=None,
             user_dict=None,
             **kwargs)

    Evaluate an `expression` and return the result.

    Parameters
    ----------
    expression : string
        A string forming an expression, like '2*a+3*b'. The values for 'a' and
        'b' are variable names to be taken from the calling function's frame.
        These variables may be scalars, carrays or NumPy arrays.
    vm : string
        The virtual machine to be used in computations.  It can be 'numexpr'
        or 'python'.  The default is to use 'numexpr' if it is installed.
    chunk_size : size of the chunk for chunked evaluation. If None, use some
                 heuristics to infer it.
    out_flavor : string
        The flavor for the `out` object.  It can be 'Blaze' or 'numpy'.
    user_dict : dict
        An user-provided dictionary where the variables in expression
        can be found by name.    
    kwargs : list of parameters or dictionary
        Any parameter supported by the carray constructor.

    Returns
    -------
    out : Blaze object
        The outcome of the expression.  You can tailor the
        properties of this Blaze array by passing additional arguments
        supported by carray constructor in `kwargs`.

    """

    if vm not in ("numexpr", "python"):
        raiseValue, "`vm` must be either 'numexpr' or 'python'"

    if out_flavor not in ("blaze", "numpy"):
        raiseValue, "`out_flavor` must be either 'blaze' or 'numpy'"

    # Get variables and column names participating in expression
    vars = user_dict

    # Gather info about sizes and lengths
    typesize, vlen = 0, 1
    for var in vars.itervalues():
        if not hasattr(var, "datashape"):
            # scalar detection
            continue
        else:  # blaze arrays
            shape, dtype = blaze.to_numpy(var.datashape)
            typesize += dtype.itemsize
            lvar = shape[0]
            if vlen > 1 and vlen != lvar:
                raise ValueError, "arrays must have the same length"
            vlen = lvar

    if typesize == 0:
        # All scalars
        if vm == "python":
            return eval(expression, vars)
        else:
            import numexpr

            return numexpr.evaluate(expression, local_dict=vars)

    return _eval_blocks(expression, chunk_size, vars, vlen, typesize, vm, out_flavor, **kwargs)
Beispiel #9
0
def evaluate(expression,
             chunk_size,
             vm='python',
             out_flavor='blaze',
             user_dict={},
             **kwargs):
    """
    evaluate(expression,
             vm=None,
             out_flavor=None,
             chunk_size=None,
             user_dict=None,
             **kwargs)

    Evaluate an `expression` and return the result.

    Parameters
    ----------
    expression : string
        A string forming an expression, like '2*a+3*b'. The values for 'a' and
        'b' are variable names to be taken from the calling function's frame.
        These variables may be scalars, carrays or NumPy arrays.
    vm : string
        The virtual machine to be used in computations.  It can be 'numexpr'
        or 'python'.  The default is to use 'numexpr' if it is installed.
    chunk_size : size of the chunk for chunked evaluation. If None, use some
                 heuristics to infer it.
    out_flavor : string
        The flavor for the `out` object.  It can be 'Blaze' or 'numpy'.
    user_dict : dict
        An user-provided dictionary where the variables in expression
        can be found by name.    
    kwargs : list of parameters or dictionary
        Any parameter supported by the carray constructor.

    Returns
    -------
    out : Blaze object
        The outcome of the expression.  You can tailor the
        properties of this Blaze array by passing additional arguments
        supported by carray constructor in `kwargs`.

    """

    if vm not in ('numexpr', 'python'):
        raiseValue, "`vm` must be either 'numexpr' or 'python'"

    if out_flavor not in ('blaze', 'numpy'):
        raiseValue, "`out_flavor` must be either 'blaze' or 'numpy'"

    # Get variables and column names participating in expression
    vars = user_dict

    # Gather info about sizes and lengths
    typesize, vlen = 0, 1
    for var in vars.itervalues():
        if not hasattr(var, "datashape"):
            # scalar detection
            continue
        else:  # blaze arrays
            shape, dtype = blaze.to_numpy(var.datashape)
            typesize += dtype.itemsize
            lvar = shape[0]
            if vlen > 1 and vlen != lvar:
                raise ValueError, "arrays must have the same length"
            vlen = lvar

    if typesize == 0:
        # All scalars
        if vm == "python":
            return eval(expression, vars)
        else:
            import numexpr
            return numexpr.evaluate(expression, local_dict=vars)

    return _eval_blocks(expression, chunk_size, vars, vlen, typesize, vm,
                        out_flavor, **kwargs)
def _eval_blocks(expression, vars, vlen, typesize, vm, out_flavor,
                 **kwargs):
    """Perform the evaluation in blocks."""

    # Compute the optimal block size (in elements)
    # The next is based on experiments with bench/ctable-query.py
    if vm == "numexpr":
        # If numexpr, make sure that operands fits in L3 chache
        bsize = 2**20  # 1 MB is common for L3
    else:
        # If python, make sure that operands fits in L2 chache
        bsize = 2**17  # 256 KB is common for L2
    bsize //= typesize
    # Evaluation seems more efficient if block size is a power of 2
    bsize = 2 ** (int(math.log(bsize, 2)))
    if vlen < 100*1000:
        bsize //= 8
    elif vlen < 1000*1000:
        bsize //= 4
    elif vlen < 10*1000*1000:
        bsize //= 2
    # Protection against too large atomsizes
    if bsize == 0:
        bsize = 1

    vars_ = {}
    # Get temporaries for vars
    maxndims = 0
    for name in vars.iterkeys():
        var = vars[name]
        if hasattr(var, "datashape"):
            shape, dtype = blaze.to_numpy(var.datashape)
            ndims = len(shape) + len(dtype.shape)
            if ndims > maxndims:
                maxndims = ndims

    for i in xrange(0, vlen, bsize):
        # Get buffers for vars
        for name in vars.iterkeys():
            var = vars[name]
            if hasattr(var, "datashape"):
                shape, dtype = blaze.to_numpy(var.datashape)
                vars_[name] = var[i:i+bsize]
            else:
                if hasattr(var, "__getitem__"):
                    vars_[name] = var[:]
                else:
                    vars_[name] = var

        # Perform the evaluation for this block
        if vm == "python":
            res_block = eval(expression, None, vars_)
        else:
            import numexpr
            res_block = numexpr.evaluate(expression, local_dict=vars_)

        if i == 0:
            # Detection of reduction operations
            scalar = False
            dim_reduction = False
            if len(res_block.shape) == 0:
                scalar = True
                result = res_block
                continue
            elif len(res_block.shape) < maxndims:
                dim_reduction = True
                result = res_block
                continue
            # Get a decent default for expectedlen
            if out_flavor == "blaze":
                nrows = kwargs.pop('expectedlen', vlen)
                result = blaze.array(res_block, **kwargs)
            else:
                out_shape = list(res_block.shape)
                out_shape[0] = vlen
                result = np.empty(out_shape, dtype=res_block.dtype)
                result[:bsize] = res_block
        else:
            if scalar or dim_reduction:
                result += res_block
            elif out_flavor == "blaze":
                result.append(res_block)
            else:
                result[i:i+bsize] = res_block

    # if isinstance(result, blaze.Array):
    #     result.flush()
    if scalar:
        return result[()]
    return result