Ejemplo n.º 1
0
 def time_concatenate(self):
     if Suite.style == 'numpy':
         for _ in xrange(Suite.T):
             Suite.r = numpy.concatenate(Suite.a, 0)
     elif Suite.style == 'concat':
         for _ in xrange(Suite.T):
             Suite.r = concat(Suite.a)
     elif Suite.style == 'bcolz':
         for _ in xrange(Suite.T):
             Suite.r = append(Suite.a, Suite.clevel)
Ejemplo n.º 2
0
def test_numpy():
    enter()
    t = np.fromiter((mv + np.random.rand(NC) - mv for i in xrange(int(NR))),
                    dtype=dt)
    after_create()
    out = np.fromiter(((row['f1'], row['f3']) for row in t[eval(nquery)]),
                      dtype="f8,f8")
    after_query()
    return out
Ejemplo n.º 3
0
def test_numpy():
    enter()
    t = np.fromiter((mv + np.random.rand(NC) - mv for i in xrange(int(NR))),
                    dtype=dt)
    after_create()
    out = np.fromiter(((row['f1'], row['f3']) for row in t[eval(nquery)]),
                      dtype="f8,f8")
    after_query()
    return out
Ejemplo n.º 4
0
 def test02(self):
     """Testing `fetchwhere` method with a `outcols` with 1 field"""
     N = self.N
     ra = np.fromiter(((i, i, i * 3) for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     ct = t.fetchwhere('f1 < f2', outcols=('f1',))
     self.assertEqual(ct.names, ['f1'])
     l, s = len(ct), ct['f1'].sum()
     self.assertEqual(l, N - 1)
     self.assertEqual(s, (N - 1) * (N / 2))  # Gauss summation formula
Ejemplo n.º 5
0
 def test00(self):
     """Testing `fetchwhere` method with only an expression"""
     N = self.N
     ra = np.fromiter(((i, i * 2., i * 3)
                       for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     ct = t.fetchwhere('f1 < f2')
     l, s = len(ct), ct['f0'].sum()
     self.assertEqual(l, N - 1)
     self.assertEqual(s, (N - 1) * (N / 2))  # Gauss summation formula
Ejemplo n.º 6
0
 def test04(self):
     """Testing `fetchwhere` method with an `out_flavor` parameter"""
     N = self.N
     ra = np.fromiter(((i, i * 2., i * 3)
                       for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     ct = t.fetchwhere('f1 < f2', out_flavor="numpy")
     self.assertEqual(type(ct), np.ndarray)
     l, s = len(ct), ct['f0'].sum()
     self.assertEqual(l, N - 1)
     self.assertEqual(s, (N - 1) * (N / 2))  # Gauss summation formula
Ejemplo n.º 7
0
 def test03(self):
     """Testing `fetchwhere` method with a `limit`, `skip` parameter"""
     N, M = self.N, 101
     ra = np.fromiter(((i, i * 2., i * 3)
                       for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     ct = t.fetchwhere('f1 < f2', limit=N - M - 2, skip=M)
     l, s = len(ct), ct['f0'].sum()
     self.assertEqual(l, N - M - 2)
     self.assertEqual(s, np.arange(M + 1, N - 1).sum())
Ejemplo n.º 8
0
 def test05(self):
     """Testing `fetchwhere` method with global and local variables"""
     N = self.N
     lvar = GVAR
     ra = np.fromiter(((i, i * 2., i * 3)
                       for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     ct = t.fetchwhere('(f1 + lvar) < (f2 + GVAR)', out_flavor="numpy")
     self.assertEqual(type(ct), np.ndarray)
     l, s = len(ct), ct['f0'].sum()
     self.assertEqual(l, N - 1)
     self.assertEqual(s, (N - 1) * (N / 2))  # Gauss summation formula
Ejemplo n.º 9
0
 def test00(self):
     """Testing `whereblocks` method with only an expression"""
     N = self.N
     ra = np.fromiter(((i, i * 2., i * 3)
                       for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     for block in t.whereblocks('f1 < f2'):
         l += len(block)
         s += block['f0'].sum()
     self.assertEqual(l, N - 1)
     self.assertEqual(s, (N - 1) * (N / 2))  # Gauss summation formula
Ejemplo n.º 10
0
 def test03(self):
     """Testing `whereblocks` method with a `outfields` with 1 field"""
     N = self.N
     ra = np.fromiter(((i, i, i * 3) for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     for block in t.whereblocks('f1 < f2', outfields=('f1', )):
         self.assertEqual(block.dtype.names, ('f1', ))
         l += len(block)
         s += block['f1'].sum()
     self.assertEqual(l, N - 1)
     self.assertEqual(s, (N - 1) * (N / 2))  # Gauss summation formula
Ejemplo n.º 11
0
 def test07(self):
     """Testing `whereblocks` method with a `limit`, `skip` parameter"""
     N, M = self.N, 101
     ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)),
                      dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     for block in t.whereblocks('f1 < f2', limit=N - M - 2, skip=M):
         l += len(block)
         s += block['f0'].sum()
     self.assertEqual(l, N - M - 2)
     self.assertEqual(s, np.arange(M + 1, N - 1).sum())
Ejemplo n.º 12
0
 def test05(self):
     """Testing `whereblocks` method with a `limit` parameter"""
     N, M = self.N, 101
     ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)),
                      dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     for block in t.whereblocks('f1 < f2', limit=M):
         l += len(block)
         s += block['f0'].sum()
     self.assertEqual(l, M)
     self.assertEqual(s, M * ((M + 1) / 2))  # Gauss summation formula
Ejemplo n.º 13
0
 def test00(self):
     """Testing `whereblocks` method with only an expression"""
     N = self.N
     ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)),
                      dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     for block in t.whereblocks('f1 < f2'):
         l += len(block)
         s += block['f0'].sum()
     self.assertEqual(l, N - 1)
     self.assertEqual(s, (N - 1) * (N / 2))  # Gauss summation formula
Ejemplo n.º 14
0
 def test05(self):
     """Testing `whereblocks` method with a `limit` parameter"""
     N, M = self.N, 101
     ra = np.fromiter(((i, i * 2., i * 3)
                       for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     for block in t.whereblocks('f1 < f2', limit=M):
         l += len(block)
         s += block['f0'].sum()
     self.assertEqual(l, M)
     self.assertEqual(s, M * ((M + 1) / 2))  # Gauss summation formula
Ejemplo n.º 15
0
 def test07(self):
     """Testing `whereblocks` method with a `limit`, `skip` parameter"""
     N, M = self.N, 101
     ra = np.fromiter(((i, i * 2., i * 3)
                       for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     for block in t.whereblocks('f1 < f2', limit=N - M - 2, skip=M):
         l += len(block)
         s += block['f0'].sum()
     self.assertEqual(l, N - M - 2)
     self.assertEqual(s, np.arange(M + 1, N - 1).sum())
Ejemplo n.º 16
0
 def test03(self):
     """Testing `whereblocks` method with a `outfields` with 1 field"""
     N = self.N
     ra = np.fromiter(((i, i, i * 3) for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     for block in t.whereblocks('f1 < f2', outfields=('f1',)):
         self.assertEqual(block.dtype.names, ('f1',))
         l += len(block)
         s += block['f1'].sum()
     self.assertEqual(l, N - 1)
     self.assertEqual(s, (N - 1) * (N / 2))  # Gauss summation formula
Ejemplo n.º 17
0
def test_ctable(clevel):
    enter()
    tc = bcolz.fromiter(
        (mv + np.random.rand(NC) - mv for i in xrange(int(NR))),
        dtype=dt,
        cparams=bcolz.cparams(clevel, cname=cname),
        count=int(NR))
    after_create()

    out = np.fromiter((row for row in tc.where(squery, 'f1,f3')),
                      dtype="f8,f8")
    after_query()
    return out
Ejemplo n.º 18
0
 def test08(self):
     """Testing `whereblocks` method with global and local variables"""
     N = self.N
     lvar = GVAR
     ra = np.fromiter(((i, i * 2., i * 3)
                       for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     for block in t.whereblocks('(f1 + lvar) < (f2 + GVAR)'):
         l += len(block)
         s += block['f0'].sum()
     self.assertEqual(l, N - 1)
     self.assertEqual(s, (N - 1) * (N / 2))  # Gauss summation formula
Ejemplo n.º 19
0
def test_ctable(clevel):
    enter()
    tc = bcolz.fromiter(
        (mv + np.random.rand(NC) - mv for i in xrange(int(NR))),
        dtype=dt,
        cparams=bcolz.cparams(clevel, cname=cname),
        count=int(NR))
    after_create()

    out = np.fromiter((row for row in tc.where(squery, 'f1,f3')),
                      dtype="f8,f8")
    after_query()
    return out
Ejemplo n.º 20
0
def test_numexpr():
    import numexpr as ne

    enter()
    t = np.fromiter((mv + np.random.rand(NC) - mv for i in xrange(int(NR))),
                    dtype=dt)
    after_create()

    map_field = dict(("f%s" % i, t["f%s" % i]) for i in range(NC))
    out = np.fromiter(
        ((row['f1'], row['f3']) for row in t[ne.evaluate(squery, map_field)]),
        dtype="f8,f8")
    after_query()
    return out
Ejemplo n.º 21
0
def test_numexpr():
    import numexpr as ne

    enter()
    t = np.fromiter((mv + np.random.rand(NC) - mv for i in xrange(int(NR))),
                    dtype=dt)
    after_create()

    map_field = dict(("f%s" % i, t["f%s" % i]) for i in range(NC))
    out = np.fromiter(((row['f1'], row['f3']) for row in
                       t[ne.evaluate(squery, map_field)]),
                      dtype="f8,f8")
    after_query()
    return out
Ejemplo n.º 22
0
 def test01(self):
     """Testing `whereblocks` method with a `blen`"""
     N = self.N
     ra = np.fromiter(((i, i * 2., i * 3)
                       for i in xrange(N)), dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     for block in t.whereblocks('f0 <= f1', blen=100):
         l += len(block)
         # All blocks should be of length 100, except the last one,
         # which should be 0 or 20
         self.assertTrue(len(block) in (0, 20, 100))
         s += block['f0'].sum()
     self.assertEqual(l, N)
     self.assertEqual(s, (N - 1) * (N / 2))  # Gauss summation formula
Ejemplo n.º 23
0
 def test01(self):
     """Testing `whereblocks` method with a `blen`"""
     N = self.N
     ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)),
                      dtype='i4,f8,i8')
     t = bcolz.ctable(ra)
     l, s = 0, 0
     for block in t.whereblocks('f0 <= f1', blen=100):
         l += len(block)
         # All blocks should be of length 100, except the last one,
         # which should be 0 or 20
         self.assertTrue(len(block) in (0, 20, 100))
         s += block['f0'].sum()
     self.assertEqual(l, N)
     self.assertEqual(s, (N - 1) * (N / 2))  # Gauss summation formula
Ejemplo n.º 24
0
def test_sqlite():
    enter()
    sqlquery = "(f2>.9) and ((f8>.3) and (f8<.4))"  # the query

    con = sqlite3.connect(":memory:")

    # Create table
    fields = "(%s)" % ",".join(["f%d real" % i for i in range(NC)])
    con.execute("create table bench %s" % fields)

    # Insert a NR rows of data
    vals = "(%s)" % ",".join(["?" for i in range(NC)])
    with con:
        con.executemany("insert into bench values %s" % vals,
                        (mv + np.random.rand(NC) - mv
                         for i in xrange(int(NR))))
    after_create()

    out = np.fromiter(
        (row for row in con.execute("select f1, f3 from bench where %s" %
                                    sqlquery)),
        dtype="f8,f8")
    after_query("non-indexed")

    # Create indexes
    con.execute("CREATE INDEX f1idx ON bench (f1)")
    con.execute("CREATE INDEX f2idx ON bench (f8)")
    after_create("index")

    out = np.fromiter(
        (row for row in con.execute("select f1, f3 from bench where %s" %
                                    sqlquery)),
        dtype="f8,f8")
    after_query("indexed")

    return out
Ejemplo n.º 25
0
def test_sqlite():
    enter()
    sqlquery = "(f2>.9) and ((f8>.3) and (f8<.4))"  # the query

    con = sqlite3.connect(":memory:")

    # Create table
    fields = "(%s)" % ",".join(["f%d real" % i for i in range(NC)])
    con.execute("create table bench %s" % fields)

    # Insert a NR rows of data
    vals = "(%s)" % ",".join(["?" for i in range(NC)])
    with con:
        con.executemany("insert into bench values %s" % vals,
                        (mv + np.random.rand(NC) - mv for i in
                         xrange(int(NR))))
    after_create()

    out = np.fromiter(
        (row for row in con.execute(
            "select f1, f3 from bench where %s" % sqlquery)),
        dtype="f8,f8")
    after_query("non-indexed")

    # Create indexes
    con.execute("CREATE INDEX f1idx ON bench (f1)")
    con.execute("CREATE INDEX f2idx ON bench (f8)")
    after_create("index")

    out = np.fromiter(
        (row for row in con.execute(
            "select f1, f3 from bench where %s" % sqlquery)),
        dtype="f8,f8")
    after_query("indexed")

    return out
Ejemplo n.º 26
0
def _eval_blocks(expression, vars, vlen, typesize, vm, out_flavor, blen,
                 **kwargs):
    """Perform the evaluation in blocks."""

    if not blen:
        # Compute the optimal block size (in elements)
        # The next is based on experiments with bench/ctable-query.py
        # and the 'movielens-bench' repository
        if vm == "numexpr":
            bsize = 2**23
        elif vm == "dask":
            bsize = 2**25
        else:  # python
            bsize = 2**21
        blen = int(bsize / typesize)
        # Protection against too large atomsizes
        if blen == 0:
            blen = 1

    if vm == "dask":
        if 'da' in vars:
            raise NameError("'da' is reserved as a prefix for dask.array. "
                            "Please use another prefix")
        for name in vars:
            var = vars[name]
            if is_sequence_like(var):
                vars[name] = da.from_array(var,
                                           chunks=(blen, ) + var.shape[1:])
        # Build the expression graph
        vars['da'] = da
        da_expr = _eval(expression, vars)
        if out_flavor in ("bcolz", "carray") and da_expr.shape:
            result = bcolz.zeros(da_expr.shape, da_expr.dtype, **kwargs)
            # Store while compute expression graph
            da.store(da_expr, result)
            return result
        else:
            # Store while compute
            return np.array(da_expr)

    # Check whether we have a re_evaluate() function in numexpr
    re_evaluate = bcolz.numexpr_here and hasattr(bcolz.numexpr, "re_evaluate")

    vars_ = {}
    # Get containers for vars
    maxndims = 0
    for name in vars:
        var = vars[name]
        if is_sequence_like(var):
            ndims = len(var.shape) + len(var.dtype.shape)
            if ndims > maxndims:
                maxndims = ndims
            if len(var) > blen and hasattr(var, "_getrange"):
                shape = (blen, ) + var.shape[1:]
                vars_[name] = np.empty(shape, dtype=var.dtype)

    for i in xrange(0, vlen, blen):
        # Fill buffers for vars
        for name in vars:
            var = vars[name]
            if is_sequence_like(var) and len(var) > blen:
                if hasattr(var, "_getrange"):
                    if i + blen < vlen:
                        var._getrange(i, blen, vars_[name])
                    else:
                        vars_[name] = var[i:]
                else:
                    vars_[name] = var[i:i + blen]
            else:
                if hasattr(var, "__getitem__"):
                    vars_[name] = var[:]
                else:
                    vars_[name] = var

        # Perform the evaluation for this block
        if vm == "python":
            res_block = _eval(expression, vars_)
        else:
            if i == 0 or not re_evaluate:
                try:
                    res_block = bcolz.numexpr.evaluate(expression,
                                                       local_dict=vars_)
                except ValueError:
                    # numexpr cannot handle this, so fall back to "python" vm
                    warnings.warn(
                        "numexpr cannot handle this expression: falling back "
                        "to the 'python' virtual machine.  You can choose "
                        "another virtual machine by using the `vm` parameter.")
                    return _eval_blocks(expression, vars, vlen, typesize,
                                        "python", out_flavor, blen, **kwargs)
            else:
                res_block = bcolz.numexpr.re_evaluate(local_dict=vars_)

        if i == 0:
            # Detection of reduction operations
            scalar = False
            dim_reduction = False
            if len(res_block.shape) == 0:
                scalar = True
                result = res_block
                continue
            elif len(res_block.shape) < maxndims:
                dim_reduction = True
                result = res_block
                continue
            # Get a decent default for expectedlen
            if out_flavor in ("bcolz", "carray"):
                nrows = kwargs.pop('expectedlen', vlen)
                result = bcolz.carray(res_block, expectedlen=nrows, **kwargs)
            else:
                out_shape = list(res_block.shape)
                out_shape[0] = vlen
                result = np.empty(out_shape, dtype=res_block.dtype)
                result[:blen] = res_block
        else:
            if scalar or dim_reduction:
                result += res_block
            elif out_flavor in ("bcolz", "carray"):
                result.append(res_block)
            else:
                result[i:i + blen] = res_block

    if isinstance(result, bcolz.carray):
        result.flush()
    if scalar:
        return result[()]
    return result
Ejemplo n.º 27
0
cparams = bcolz.cparams(clevel)

# x = np.arange(N)
x = np.zeros(N, dtype="f8")
y = x.copy()
z = x.copy()
cx = bcolz.carray(x, cparams=cparams)
cy = cx.copy()
cz = cx.copy()
ct = bcolz.ctable((cx, cy, cz), names=['x', 'y', 'z'])
t = ct[:]

print("Starting benchmark now for getting %d elements..." % M)
# Retrieve from a ndarray
t0 = time()
vals = [x[i] for i in xrange(0, M, 3)]
print("Time for array--> %.3f" % (time() - t0,))
print("vals-->", len(vals))

#bcolz.set_num_threads(bcolz.ncores//2)

# Retrieve from a carray
t0 = time()
cvals = [cx[i] for i in xrange(0, M, 3)]
#cvals = cx[:M:3][:].tolist()
print("Time for carray--> %.3f" % (time() - t0,))
print("vals-->", len(cvals))
assert vals == cvals

# Retrieve from a structured ndarray
t0 = time()
Ejemplo n.º 28
0
if len(sys.argv) == 2:
    N, K, T, clevel = (1000000, 10, 3, 1)
else:
    N, K, T = [int(arg) for arg in sys.argv[2:5]]
    if len(sys.argv) > 5:
        clevel = int(sys.argv[5])
    else:
        clevel = 0

# The next datasets allow for very high compression ratios
a = [numpy.arange(N, dtype='f8') for _ in range(K)]
print("problem size: (%d) x %d = 10^%g" % (N, K, math.log10(N * K)))

t = time.time()
if style == 'numpy':
    for _ in xrange(T):
        r = numpy.concatenate(a, 0)
elif style == 'concat':
    for _ in xrange(T):
        r = concat(a)
elif style == 'bcolz':
    for _ in xrange(T):
        r = append(a, clevel)

t = time.time() - t
print('time for concat: %.3fs' % (t / T))

if style == 'bcolz':
    size = r.cbytes
else:
    size = r.size * r.dtype.itemsize
Ejemplo n.º 29
0
def _eval_blocks(expression, vars, vlen, typesize, vm, out_flavor, blen,
                 **kwargs):
    """Perform the evaluation in blocks."""

    if not blen:
        # Compute the optimal block size (in elements)
        # The next is based on experiments with bench/ctable-query.py
        # and the 'movielens-bench' repository
        if vm == "numexpr":
            bsize = 2**23
        elif vm == "dask":
            bsize = 2**25
        else:  # python
            bsize = 2**21
        blen = int(bsize / typesize)
        # Protection against too large atomsizes
        if blen == 0:
            blen = 1

    if vm == "dask":
        if 'da' in vars:
            raise NameError(
                "'da' is reserved as a prefix for dask.array. "
                "Please use another prefix")
        for name in vars:
            var = vars[name]
            if is_sequence_like(var):
                vars[name] = da.from_array(var, chunks=(blen,) + var.shape[1:])
        # Build the expression graph
        vars['da'] = da
        da_expr = _eval(expression, vars)
        if out_flavor in ("bcolz", "carray") and da_expr.shape:
            result = bcolz.zeros(da_expr.shape, da_expr.dtype, **kwargs)
            # Store while compute expression graph
            da.store(da_expr, result)
            return result
        else:
            # Store while compute
            return np.array(da_expr)

    # Check whether we have a re_evaluate() function in numexpr
    re_evaluate = bcolz.numexpr_here and hasattr(bcolz.numexpr, "re_evaluate")

    vars_ = {}
    # Get containers for vars
    maxndims = 0
    for name in vars:
        var = vars[name]
        if is_sequence_like(var):
            ndims = len(var.shape) + len(var.dtype.shape)
            if ndims > maxndims:
                maxndims = ndims
            if len(var) > blen and hasattr(var, "_getrange"):
                    shape = (blen, ) + var.shape[1:]
                    vars_[name] = np.empty(shape, dtype=var.dtype)

    for i in xrange(0, vlen, blen):
        # Fill buffers for vars
        for name in vars:
            var = vars[name]
            if is_sequence_like(var) and len(var) > blen:
                if hasattr(var, "_getrange"):
                    if i+blen < vlen:
                        var._getrange(i, blen, vars_[name])
                    else:
                        vars_[name] = var[i:]
                else:
                    vars_[name] = var[i:i+blen]
            else:
                if hasattr(var, "__getitem__"):
                    vars_[name] = var[:]
                else:
                    vars_[name] = var

        # Perform the evaluation for this block
        if vm == "python":
            res_block = _eval(expression, vars_)
        else:
            if i == 0 or not re_evaluate:
                try:
                    res_block = bcolz.numexpr.evaluate(expression,
                                                       local_dict=vars_)
                except ValueError:
                    # numexpr cannot handle this, so fall back to "python" vm
                    warnings.warn(
                        "numexpr cannot handle this expression: falling back "
                        "to the 'python' virtual machine.  You can choose "
                        "another virtual machine by using the `vm` parameter.")
                    return _eval_blocks(
                        expression, vars, vlen, typesize, "python",
                        out_flavor, blen, **kwargs)
            else:
                res_block = bcolz.numexpr.re_evaluate(local_dict=vars_)

        if i == 0:
            # Detection of reduction operations
            scalar = False
            dim_reduction = False
            if len(res_block.shape) == 0:
                scalar = True
                result = res_block
                continue
            elif len(res_block.shape) < maxndims:
                dim_reduction = True
                result = res_block
                continue
            # Get a decent default for expectedlen
            if out_flavor in ("bcolz", "carray"):
                nrows = kwargs.pop('expectedlen', vlen)
                result = bcolz.carray(res_block, expectedlen=nrows, **kwargs)
            else:
                out_shape = list(res_block.shape)
                out_shape[0] = vlen
                result = np.empty(out_shape, dtype=res_block.dtype)
                result[:blen] = res_block
        else:
            if scalar or dim_reduction:
                result += res_block
            elif out_flavor in ("bcolz", "carray"):
                result.append(res_block)
            else:
                result[i:i+blen] = res_block

    if isinstance(result, bcolz.carray):
        result.flush()
    if scalar:
        return result[()]
    return result
Ejemplo n.º 30
0
def _eval_blocks(expression, vars, vlen, typesize, vm, out_flavor,
                 **kwargs):
    """Perform the evaluation in blocks."""

    # Compute the optimal block size (in elements)
    # The next is based on experiments with bench/ctable-query.py
    # and the 'movielens-bench' repository
    if vm == "numexpr":
        bsize = 2**24
    else:
        bsize = 2**22
    bsize //= typesize
    # Evaluation seems more efficient if block size is a power of 2
    bsize = 2 ** (int(math.log(bsize, 2)))
    if vlen < 100*1000:
        bsize //= 8
    elif vlen < 1000*1000:
        bsize //= 4
    elif vlen < 10*1000*1000:
        bsize //= 2
    # Protection against too large atomsizes
    if bsize == 0:
        bsize = 1

    vars_ = {}
    # Get temporaries for vars
    maxndims = 0
    for name in vars:
        var = vars[name]
        if hasattr(var, "__len__"):
            ndims = len(var.shape) + len(var.dtype.shape)
            if ndims > maxndims:
                maxndims = ndims
            if len(var) > bsize and hasattr(var, "_getrange"):
                vars_[name] = np.empty(bsize, dtype=var.dtype)

    for i in xrange(0, vlen, bsize):
        # Get buffers for vars
        for name in vars:
            var = vars[name]
            if hasattr(var, "__len__") and len(var) > bsize:
                if hasattr(var, "_getrange"):
                    if i+bsize < vlen:
                        var._getrange(i, bsize, vars_[name])
                    else:
                        vars_[name] = var[i:]
                else:
                    vars_[name] = var[i:i+bsize]
            else:
                if hasattr(var, "__getitem__"):
                    vars_[name] = var[:]
                else:
                    vars_[name] = var

        # Perform the evaluation for this block
        if vm == "python":
            res_block = _eval(expression, vars_)
        else:
            try:
                res_block = bcolz.numexpr.evaluate(expression,
                                                   local_dict=vars_)
            except ValueError:
                # numexpr cannot handle this. Fall back to a pure "python" VM.
                return _eval_blocks(
                    expression, vars, vlen, typesize, "python",
                    out_flavor, **kwargs)

        if i == 0:
            # Detection of reduction operations
            scalar = False
            dim_reduction = False
            if len(res_block.shape) == 0:
                scalar = True
                result = res_block
                continue
            elif len(res_block.shape) < maxndims:
                dim_reduction = True
                result = res_block
                continue
            # Get a decent default for expectedlen
            if out_flavor == "carray":
                nrows = kwargs.pop('expectedlen', vlen)
                result = bcolz.carray(res_block, expectedlen=nrows, **kwargs)
            else:
                out_shape = list(res_block.shape)
                out_shape[0] = vlen
                result = np.empty(out_shape, dtype=res_block.dtype)
                result[:bsize] = res_block
        else:
            if scalar or dim_reduction:
                result += res_block
            elif out_flavor == "carray":
                result.append(res_block)
            else:
                result[i:i+bsize] = res_block

    if isinstance(result, bcolz.carray):
        result.flush()
    if scalar:
        return result[()]
    return result
Ejemplo n.º 31
0
from time import time

import numpy as np
from numpy.testing import assert_array_equal

import bcolz
from bcolz.py2help import xrange, izip


N = int(1e6)  # the number of elements in x
clevel = 2  # the compression level

print("Creating inputs with %d elements..." % N)

x = xrange(N)  # not a true iterable, but can be converted
y = xrange(1, N + 1)
z = xrange(2, N + 2)

print("Starting benchmark now for creating arrays...")
# Create a ndarray
# x = (i for i in xrange(N))    # true iterable
t0 = time()
out = np.fromiter(x, dtype='f8', count=N)
print("Time for array--> %.3f" % (time() - t0,))
print("out-->", len(out))

#bcolz.set_num_threads(bcolz.ncores//2)

# Create a carray
#x = (i for i in xrange(N))    # true iterable
Ejemplo n.º 32
0
# Benchmark for assessing the `fromiter()` speed.

from time import time

import numpy as np
from numpy.testing import assert_array_equal

import bcolz
from bcolz.py2help import xrange, izip

N = int(1e6)  # the number of elements in x
clevel = 2  # the compression level

print("Creating inputs with %d elements..." % N)

x = xrange(N)  # not a true iterable, but can be converted
y = xrange(1, N + 1)
z = xrange(2, N + 2)

print("Starting benchmark now for creating arrays...")
# Create a ndarray
# x = (i for i in xrange(N))    # true iterable
t0 = time()
out = np.fromiter(x, dtype='f8', count=N)
print("Time for array--> %.3f" % (time() - t0, ))
print("out-->", len(out))

#bcolz.set_num_threads(bcolz.ncores//2)

# Create a carray
#x = (i for i in xrange(N))    # true iterable