Esempio n. 1
0
def run_test(args):
    T = Terminal

    x = T('x')
    y = T('y')
    z = T('z')
    w = T('w')
    a = T('a')
    b = T('b')
    vm = "python" if "python" in args else "numexpr"
    print "evaluating expression with '%s' vm..." % vm
    expr = (x+y).dot(a*z + b*w)

    print 'opening blaze arrays...'
    x_ = blaze.open(_persistent_array_names[0])
    y_ = blaze.open(_persistent_array_names[1])
    z_ = blaze.open(_persistent_array_names[2])
    w_ = blaze.open(_persistent_array_names[3])
    a_ = 2.0
    b_ = 2.0

    if 'in_memory' in args:
        print 'getting an in-memory version of blaze arrays...'
        params = blaze.params(clevel=0)
        t0 = time()
        x_ = blaze.array(x_[:], params=params)
        y_ = blaze.array(y_[:], params=params)
        z_ = blaze.array(z_[:], params=params)
        w_ = blaze.array(w_[:], params=params)
        print "conversion to blaze in-memory: %.3f" % (time() - t0)

    print 'datashape is:', x_.datashape

    if 'print_expr' in args:
        print expr

    t_ce = time()
    expr_vars = {'x': x_, 'y': y_, 'z': z_, 'w': w_, 'a': a_, 'b': b_, }
    result_ce = expr.eval(expr_vars, params={'vm': vm})
    t_ce = time() - t_ce
    print "'%s' vm result is : %s in %.3f s" % (vm, result_ce, t_ce)
    
    # in numpy...
    print 'evaluating expression with numpy...'
    x_ = x_[:]
    y_ = y_[:]
    z_ = z_[:]
    w_ = w_[:]

    t_np = time()
    result_np = np.dot(x_+y_, a_*z_ + b_*w_)
    t_np = time() - t_np

    print 'numpy result is : %s in %.3f s' % (result_np, t_np)
Esempio n. 2
0
def run_test(in_memory, args):
    T = Terminal

    print 'opening blaze arrays...'
    x = blaze.open(_persistent_array_names[0])
    y = blaze.open(_persistent_array_names[1])
    z = blaze.open(_persistent_array_names[2])
    w = blaze.open(_persistent_array_names[3])
    shape, dtype = blaze.to_numpy(x.datashape)
    print "***nelements:", shape[0]

    if in_memory:
        print 'getting an in-memory version of blaze arrays...'
        params = blaze.params(clevel=9)
        t0 = time()
        x = blaze.array(x[:], params=params)
        y = blaze.array(y[:], params=params)
        z = blaze.array(z[:], params=params)
        w = blaze.array(w[:], params=params)
        print "conversion to blaze in-memory: %.3f" % (time() - t0)

    print 'datashape is:', x.datashape

    print 'evaluating expression with blir...'
    expr = (T(x) + T(y)).dot(T(2.0) * T(z) + T(2.0) * T(w))

    if 'print_expr' in args:
        print expr.gen_blir()[1]

    t_ce = time()
    result_ce = chunked_eval(expr, chunk_size=50000)
    t_ce = time() - t_ce
    print 'blir chunked result is : %s in %f s' % (result_ce, t_ce)
    print '***blir time: %.3f' % t_ce

    # in numpy...
    t0 = time()
    x = x[:]
    y = y[:]
    z = z[:]
    w = w[:]
    print "conversion to numpy in-memory: %.3f" % (time() - t0)

    print 'evaluating expression with numpy...'
    t_np = time()
    result_np = np.dot(x + y, 2.0 * z + 2.0 * w)
    t_np = time() - t_np

    print 'numpy result is : %s in %f s' % (result_np, t_np)
    print '***numpy time: %.3f' % t_np

    print '**** %d, %.5f, %.5f' % (shape[0], t_ce, t_np)
Esempio n. 3
0
def run_test(in_memory, args):
    T = Terminal

    print "opening blaze arrays..."
    x = blaze.open(_persistent_array_names[0])
    y = blaze.open(_persistent_array_names[1])
    z = blaze.open(_persistent_array_names[2])
    w = blaze.open(_persistent_array_names[3])
    shape, dtype = blaze.to_numpy(x.datashape)
    print "***nelements:", shape[0]

    if in_memory:
        print "getting an in-memory version of blaze arrays..."
        params = blaze.params(clevel=9)
        t0 = time()
        x = blaze.array(x[:], params=params)
        y = blaze.array(y[:], params=params)
        z = blaze.array(z[:], params=params)
        w = blaze.array(w[:], params=params)
        print "conversion to blaze in-memory: %.3f" % (time() - t0)

    print "datashape is:", x.datashape

    print "evaluating expression with blir..."
    expr = (T(x) + T(y)).dot(T(2.0) * T(z) + T(2.0) * T(w))

    if "print_expr" in args:
        print expr.gen_blir()[1]

    t_ce = time()
    result_ce = chunked_eval(expr, chunk_size=50000)
    t_ce = time() - t_ce
    print "blir chunked result is : %s in %f s" % (result_ce, t_ce)
    print "***blir time: %.3f" % t_ce

    # in numpy...
    t0 = time()
    x = x[:]
    y = y[:]
    z = z[:]
    w = w[:]
    print "conversion to numpy in-memory: %.3f" % (time() - t0)

    print "evaluating expression with numpy..."
    t_np = time()
    result_np = np.dot(x + y, 2.0 * z + 2.0 * w)
    t_np = time() - t_np

    print "numpy result is : %s in %f s" % (result_np, t_np)
    print "***numpy time: %.3f" % t_np

    print "**** %d, %.5f, %.5f" % (shape[0], t_ce, t_np)
Esempio n. 4
0
def chunkwise_kernel():
    ast, env = compile(source)

    #Array = ca.carray(xrange(25000), rootdir='example1', mode='w',
                #dtype='int32', cparams=ca.cparams(clevel=0))
    Array = open('example1', mode='w')
    c = Array.data.ca
    ctx = Context(env)

    for i in range(c.nchunks):
        chunk = c.chunks[i]
        # read only access
        #x = c.chunks[0][:]
        # write access
        x = view(chunk)

        size = x.strides[0]
        args = (x, size)
        execute(ctx, args, fname='main')

        # this does a _save() behind the scenes
        c.chunks[i] = chunk

    ctx.destroy()

    rts = Runtime(1,2,3)
    rts.join()

    print Array
Esempio n. 5
0
 def test_open(self):
     persist = blaze.Persist(self.rooturi)
     a = blaze.ones('0, float64', persist=persist)
     a.append(range(10))
     # Re-open the dataset in URI
     a2 = blaze.open(persist)
     self.assert_(isinstance(a2, blaze.Array))
     self.assertEqual(dd_as_py(a2._data), list(range(10)))
Esempio n. 6
0
def run_test(args):
    T = Terminal

    print 'opening blaze arrays...'
    x = blaze.open(_persistent_array_names[0])
    y = blaze.open(_persistent_array_names[1])
    z = blaze.open(_persistent_array_names[2])
    w = blaze.open(_persistent_array_names[3])

    if 'in_memory' in args:
        print 'getting an in-memory version of blaze arrays...'
        params = blaze.params(clevel=9)
        t0 = time()
        x = blaze.array(x, params=params)
        y = blaze.array(y, params=params)
        z = blaze.array(z, params=params)
        w = blaze.array(w, params=params)
        print "conversion to blaze in-memory: %.3f" % (time() - t0)

    print 'datashape is:', x.datashape

    print 'evaluating expression with blir...'
    expr = (T(x)+T(y)).dot(T(2.0)*T(z) + T(2.0)*T(w))

    if 'print_expr' in args:
        print expr.gen_blir()[1]

    t_ce = time()
    result_ce = chunked_eval(expr, chunk_size=50000)
    t_ce = time() - t_ce
    print 'blir chunked result is : %s in %f s' % (result_ce, t_ce)

    # in numpy...
    t0 = time()
    x = x[:]
    y = y[:]
    z = z[:]
    w = w[:]
    print "Conversion to numpy in-memory: %.3f" % (time() - t0)

    print 'evaluating expression with numpy...'
    t_np = time()
    result_np = np.dot(x+y, 2.0*z + 2.0*w)
    t_np = time() - t_np

    print 'numpy result is : %s in %f s' % (result_np, t_np)
Esempio n. 7
0
 def test_open(self):
     persist = blaze.Storage(self.rooturi, format="blz")
     a = blaze.ones('0 * float64', storage=persist)
     append(a,range(10))
     # Re-open the dataset in URI
     a2 = blaze.open(persist)
     self.assertTrue(isinstance(a2, blaze.Array))
     self.assertEqual(dd_as_py(a2._data), list(range(10)))
Esempio n. 8
0
def run_test(args):
    T = Terminal

    x = T('x')
    y = T('y')
    z = T('z')
    w = T('w')
    a = T('a')
    b = T('b')
    vm = "numexpr" if "numexpr" in args else "python"
    print "evaluating expression with '%s' vm..." % vm
    expr = (x+y).dot(a*z + b*w)

    print 'opening blaze arrays...'
    x_ = blaze.open(_persistent_array_names[0])
    y_ = blaze.open(_persistent_array_names[0])
    z_ = blaze.open(_persistent_array_names[0])
    w_ = blaze.open(_persistent_array_names[0])
    a_ = 2.0
    b_ = 2.0

    print 'datashape is ', x_.datashape

    if 'print_expr' in args:
        print expr

    t_ce = time()
    expr_vars = {'x': x_, 'y': y_, 'z': z_, 'w': w_, 'a': a_, 'b': b_, }
    result_ce = expr.eval(expr_vars, params={'vm': vm})
    t_ce = time() - t_ce
    print "'%s' vm result is : %s in %.3f s" % (vm, result_ce, t_ce)
    
    # in numpy...
    print 'evaluating expression with numpy..'
    x_ = x_[:]
    y_ = y_[:]
    z_ = z_[:]
    w_ = w_[:]

    t_np = time()
    result_np = np.dot(x_+y_, a_*z_ + b_*w_)
    t_np = time() - t_np

    print 'numpy result is : %s in %.3f s' % (result_np, t_np)
Esempio n. 9
0
def build_array(array_name, rows):
    if not os.path.exists(array_name):
        ds = 'x, float'

        p = params(clevel=5, storage=array_name)
        t = fromiter((0.1*i for i in xrange(rows)),
                     dshape=ds, params=p)
        t.commit()
    else:
        t = open(array_name)

    return t
Esempio n. 10
0
def sliding_window_blz(dirname, window_size):
    X = blz.open(dirname)
    if os.path.exists('result.blz'): shutil.rmtree('result.blz')
    filtered = blz.array([], dshape=X.datashape,
                         params=blz.params(storage='result.blz'))
    starts = window_size * [0] + range(1, NROWS - window_size + 1)
    for i in range(NROWS):
        start = starts[i]
        partial = (X[start:i + 1]).mean()
        filtered.append([partial])
    filtered.commit()
    return filtered
Esempio n. 11
0
def build_array(array_name, rows):
    if not os.path.exists(array_name):
        ds = 'x, float'

        p = params(clevel=5, storage=array_name)
        t = fromiter((0.1*i for i in xrange(rows)),
                     dshape=ds, params=p)
        t.commit()
    else:
        t = open(array_name)

    return t
Esempio n. 12
0
def sliding_window_blz(dirname, window_size):
    X = blz.open(dirname)
    if os.path.exists('result.blz'): shutil.rmtree('result.blz')
    filtered = blz.array([],
                         dshape=X.datashape,
                         params=blz.params(storage='result.blz'))
    starts = window_size * [0] + range(1, NROWS - window_size + 1)
    for i in range(NROWS):
        start = starts[i]
        partial = (X[start:i + 1]).mean()
        filtered.append([partial])
    filtered.commit()
    return filtered
Esempio n. 13
0
def build_table(table_name, rows):
    """build the table to use in our example.

    if already built just open it"""
    if not os.path.exists(table_name):
        ds = 'x, {i: int64; f: float64}'
        p = params(clevel=5, storage=table_name)
        t = Table([], dshape=ds, params=p)
        for i in xrange(rows):
            t.append((i, random()))

        t.commit()
    else:
        t = open(table_name)

    return t
Esempio n. 14
0
def test_object_persistent_blob_reopen():
    td = tempfile.mkdtemp()
    tmppath = os.path.join(td, 'c')

    ds = blaze.dshape('x, blob')
    c = blaze.Array([(i, "s"*i) for i in range(10)], ds,
                    params=blaze.params(storage=tmppath))

    c2 = blaze.open(tmppath)

    for i, v in enumerate(c2):
        assert v[0] == i
        assert v[1] == "s"*i

    # Remove everything under the temporary dir
    shutil.rmtree(td)
Esempio n. 15
0
def build_table(table_name, rows):
    """build the table to use in our example.

    if already built just open it"""
    if not os.path.exists(table_name):
        ds = 'x, {i: int64; f: float64}'
        p = params(clevel=5, storage=table_name)
        t = Table([], dshape=ds, params=p)
        for i in xrange(rows):
            t.append((i, random()))

        t.commit()
    else:
        t = open(table_name)

    return t
Esempio n. 16
0
def test_perserve():
    shape = (3, 4)
    arr = np.ones(shape)

    dshape = "%s,%s, float64" % (shape[0], shape[1])
    path = "p.blz"
    if os.path.exists(path):
        shutil.rmtree(path)
    bparams = blz.params(storage=path)
    barray = blz.Array(arr, dshape, params=bparams)
    print "barray:", repr(barray)

    barray2 = blz.open(path)
    print "barray2:", repr(barray2)

    assert (str(barray.datashape) == str(barray2.datashape))
Esempio n. 17
0
def test_perserve():
    shape = (3,4)
    arr = np.ones(shape)

    dshape = "%s,%s, float64" % (shape[0], shape[1])
    path = "p.blz"
    if os.path.exists(path):
        shutil.rmtree(path)
    bparams = blz.params(storage=path)
    barray = blz.Array(arr, dshape, params=bparams)
    print "barray:", repr(barray)

    barray2 = blz.open(path)
    print "barray2:", repr(barray2)

    assert(str(barray.datashape) == str(barray2.datashape))
Esempio n. 18
0
def test_object_persistent_blob_reopen():
    td = tempfile.mkdtemp()
    tmppath = os.path.join(td, 'c')

    ds = blaze.dshape('x, blob')
    c = blaze.Array([(i, "s" * i) for i in range(10)],
                    ds,
                    params=blaze.params(storage=tmppath))

    c2 = blaze.open(tmppath)

    for i, v in enumerate(c2):
        assert v[0] == i
        assert v[1] == "s" * i

    # Remove everything under the temporary dir
    shutil.rmtree(td)
Esempio n. 19
0
def test_getitem_nd_persistent():
    import tempfile, shutil, os.path

    td = tempfile.mkdtemp()
    path = os.path.join(td, 'test.blz')

    # write
    bparams = params(storage=path, clevel=6)
    nd = ndarr()
    barray = Array(nd, params=bparams)

    # read
    arr = open(path)
    data = arr[:]

    assert np.all(data == nd)

    shutil.rmtree(td)
Esempio n. 20
0
def test_getitem_nd_persistent():
    import tempfile, shutil, os.path

    td = tempfile.mkdtemp()
    path = os.path.join(td, 'test.blz')

    # write
    bparams = params(storage=path, clevel=6)
    nd = ndarr()
    barray = Array(nd, params=bparams)

    # read
    arr = open(path)
    data = arr[:]

    assert np.all(data == nd)

    shutil.rmtree(td)
Esempio n. 21
0
def test_simple():
    if not os.path.exists('./noaa_data'):
        p = params(clevel=5, storage='./noaa_data')

        t = Table([], dshape='{f0: int, f1:int, f2:int, f3:float}', params=p)

        # TODO: chunkwise copy
        t.append(adapter[:])
        t.commit()
    else:
        t = open('ctable://noaa_data')

    print '--------------------------------------'
    print 'mean', mean(t, 'f3')
    print 'std', std(t, 'f2')
    print '--------------------------------------'

    qs1 = select(t, lambda x: x > 80000, 'f0')
    qs2 = select2(t, lambda x, y: x > y, ['f0', 'f1'])

    result = t[qs1]
Esempio n. 22
0
def test_simple():
    if not os.path.exists('./noaa_data'):
        p = params(clevel=5, storage='./noaa_data')

        t = Table([], dshape='{f0: int, f1:int, f2:int, f3:float}', params=p)

        # TODO: chunkwise copy
        t.append(adapter[:])
        t.commit()
    else:
        t = open('ctable://noaa_data')

    print '--------------------------------------'
    print 'mean', mean(t, 'f3')
    print 'std', std(t, 'f2')
    print '--------------------------------------'

    qs1 = select(t, lambda x: x > 80000, 'f0')
    qs2 = select2(t, lambda x,y: x > y, ['f0', 'f1'])

    result = t[qs1]
Esempio n. 23
0
 def test_open(self):
     store = blaze.Storage(self.url, mode='r')
     a = blaze.open(store, schema=json_schema)
     self.assert_(isinstance(a, blaze.Array))
     self.assertEqual(dd_as_py(a._data), [1, 2, 3, 4, 5])
Esempio n. 24
0
 def test_deprecated_open(self):
     url = "csv://" + self.fname
     store = blaze.Storage(url, mode='r')
     a = blaze.open(store, schema=csv_schema)
     self.assert_(isinstance(a, blaze.Array))
     self.assertEqual(dd_as_py(a._data), csv_ldict)
Esempio n. 25
0
 def test_open(self):
     store = blaze.Storage(self.url, mode='r')
     a = blaze.open(store, schema=json_schema)
     self.assert_(isinstance(a, blaze.Array))
     self.assertEqual(dd_as_py(a._data), [1, 2, 3, 4, 5])
Esempio n. 26
0
del describe_array

# --------------------------------------------------------------------

print_section('Persisted arrays')


def maybe_remove(persist):
    import os.path
    if os.path.exists(persist.path):
        # Remove every directory starting with rootdir
        blaze.drop(persist)


# Create an empty array on-disk
dname = 'blz://persisted.blz'
store = blaze.Storage(dname)
maybe_remove(store)
p = blaze.zeros('0, float64', storage=store)
# Feed it with some data
blaze.append(p, range(10))

print('Before re-opening:', p)

# Re-open the dataset in URI
p2 = blaze.open(store)

print('After re-opening:', p2)

blaze.drop(dname)
Esempio n. 27
0

del describe_array

# --------------------------------------------------------------------

print_section('Persisted arrays')

def maybe_remove(persist):
    import os.path
    if os.path.exists(persist.path):
        # Remove every directory starting with rootdir
        blaze.drop(persist)

# Create an empty array on-disk
dname = 'blz://persisted.blz'
store = blaze.Storage(dname)
maybe_remove(store)
p = blaze.zeros('0, float64', storage=store)
# Feed it with some data
blaze.append(p, range(10))

print('Before re-opening:', p)

# Re-open the dataset in URI
p2 = blaze.open(store)

print('After re-opening:', p2)

blaze.drop(dname)
Esempio n. 28
0
 def test_open_has_header(self):
     store = blaze.Storage(self.url, mode='r')
     a = blaze.open(store, schema=csv_schema, has_header=False)
     self.assert_(isinstance(a, blaze.Array))
     self.assertEqual(dd_as_py(a._data), csv_ldict)
Esempio n. 29
0
N = 500

if os.path.exists('c'):
    shutil.rmtree('c')

t0 = time()
c = blaze.Array([], 'x, object', params=blaze.params(storage='c', clevel=5))

for i in xrange(N):
    c.append(u"s"*N*i)
c.commit()
print "time taken for writing in Blaze: %.3f" % (time() - t0)

t0 = time()
c2 = blaze.open('c')
#c2 = c
#print c2.datashape

tlen = 0
for i in range(N):
    #print "i:", i, repr(c2[i]), type(c2[i])
    tlen += len(c2[i][()])
print "time taken for reading in Blaze: %.3f" % (time() - t0)
print "tlen", tlen


# Create a VLArray:
t0 = time()
f = tables.openFile('vlarray.h5', mode='w')
vlarray = f.createVLArray(f.root, 'vlarray',
Esempio n. 30
0
    x = np.linspace(i*math.pi, (i+1)*math.pi, CS)
    ts.append(x*np.sin(x))
ts.commit()

# Create a dataset to query
if os.path.exists('query.blz'): shutil.rmtree('query.blz')
xq = np.linspace(3*math.pi, 4*math.pi, CS)
query = blz.array(xq*np.sin(xq), params=blz.params(storage='query.blz'))
if os.path.exists('query2.blz'): shutil.rmtree('query2.blz')
n = np.random.randn(query.size)*.1  # introduce some noise
query2 = blz.array(xq*np.sin(xq)+n, params=blz.params(storage='query2.blz'))
if timing: print "Total Blaze arrays create time :", round(time()-t0, 4)

t0 = time()
# Open Blaze arrays on-disk (will not be loaded in memory)
ts = blz.open("ts.blz")
query = blz.open("query.blz")
query2 = blz.open("query2.blz")
if timing: print "Total Blaze arrays open time :", round(time()-t0, 4)
print "query size:", query.size

# Do the search for the exact pattern
print "   ***   Querying *exact* pattern   ***"
t0 = time()
loc, dist = ucr.dtw(ts, query, 0.1, query.size, verbose=False)
print "Location : ", loc
print "Distance : ", dist
print "Data Scanned : ", ts.size
if timing: print "Total Execution Time (exact):", round(time()-t0, 4)

# Do the search for the noisy pattern
Esempio n. 31
0
N = 500

if os.path.exists('c'):
    shutil.rmtree('c')

t0 = time()
c = blaze.Array([], 'x, object', params=blaze.params(storage='c', clevel=5))

for i in xrange(N):
    c.append(u"s" * N * i)
c.commit()
print "time taken for writing in Blaze: %.3f" % (time() - t0)

t0 = time()
c2 = blaze.open('c')
#c2 = c
#print c2.datashape

tlen = 0
for i in range(N):
    #print "i:", i, repr(c2[i]), type(c2[i])
    tlen += len(c2[i][()])
print "time taken for reading in Blaze: %.3f" % (time() - t0)
print "tlen", tlen

# Create a VLArray:
t0 = time()
f = tables.openFile('vlarray.h5', mode='w')
vlarray = f.createVLArray(f.root,
                          'vlarray',
Esempio n. 32
0
 def test_open_has_header(self):
     store = blaze.Storage(self.url, mode='r')
     a = blaze.open(store, schema=csv_schema, has_header=False)
     self.assert_(isinstance(a, blaze.Array))
     self.assertEqual(dd_as_py(a._data), csv_ldict)
Esempio n. 33
0
 def test_append(self):
     store = blaze.Storage(self.url, mode='r+')
     a = blaze.open(store, schema=csv_schema)
     blaze.append(a, ["k4", "v4", 4, True])
     self.assertEqual(dd_as_py(a._data), csv_ldict + \
         [{u'f0': u'k4', u'f1': u'v4', u'f2': 4, u'f3': True}])
Esempio n. 34
0
print_section('building basic hdf5 files')
# Create a simple HDF5 file
a1 = np.array([[1, 2, 3], [4, 5, 6]], dtype="int32")
a2 = np.array([[1, 2, 3], [3, 2, 1]], dtype="int64")
t1 = np.array([(1, 2, 3), (3, 2, 1)], dtype="i4,i8,f8")
with tb.open_file(fname, "w") as f:
    f.create_array(f.root, 'a1', a1)
    f.create_table(f.root, 't1', t1)
    f.create_group(f.root, 'g')
    f.create_array(f.root.g, 'a2', a2)
    print("Created HDF5 file with the next contents:\n%s" % str(f))

print_section('opening and handling datasets in hdf5 files')
# Open an homogeneous dataset there
store = blaze.Storage(fname, format='hdf5')
a = blaze.open(store, datapath="/a1")
# Print it
print("/a1 contents:", a)
# Print the datashape
print("datashape for /a1:", a.dshape)

# Open another homogeneous dataset there
store = blaze.Storage(fname, format='hdf5')
a = blaze.open(store, datapath="/g/a2")
# Print it
print("/g/a2 contents:", a)
# Print the datashape
print("datashape for /g/a2:", a.dshape)

# Now, get an heterogeneous dataset
store = blaze.Storage(fname, format='hdf5')
Esempio n. 35
0
 def test_deprecated_open(self):
     url = "csv://" + self.fname
     store = blaze.Storage(url, mode='r')
     a = blaze.open(store, schema=csv_schema)
     self.assert_(isinstance(a, blaze.Array))
     self.assertEqual(dd_as_py(a._data), csv_ldict)
Esempio n. 36
0
 def test_deprecated_open(self):
     url = "json://" + self.fname
     store = blaze.Storage(url, mode='r')
     a = blaze.open(store, schema=json_schema)
     self.assert_(isinstance(a, blaze.Array))
     self.assertEqual(dd_as_py(a._data), [1, 2, 3, 4, 5])
Esempio n. 37
0
 def test_open_dialect(self):
     store = blaze.Storage(self.url, mode='r')
     a = blaze.open(store, schema=csv_schema, dialect='excel')
     self.assert_(isinstance(a, blaze.Array))
     self.assertEqual(dd_as_py(a._data), csv_ldict)
Esempio n. 38
0
# Convert txt file into Blaze native format
def convert(filetxt, storage):
    import os.path
    if not os.path.exists(storage):
        blaze.Array(np.loadtxt(filetxt),
                    params=blaze.params(storage=storage))

# Make sure that data is converted into a persistent Blaze array
convert("Data.txt", "Data")
convert("Query.txt", "Query")
convert("Query2.txt", "Query2")

t0 = time()
# Open Blaze arrays on-disk (will not be loaded in memory)
data = blaze.open("Data")
query = blaze.open("Query")
query2 = blaze.open("Query2")
print "Total Blaze arrays open time :", round(time()-t0, 4)

t0 = time()
# Do different searches using ED/DTW with native Blaze arrays
#loc, dist = ucr.ed(data, query, 128)
loc, dist = ucr.dtw(data, query, 0.1, 128, verbose=False)
#loc, dist = ucr.dtw(data, query2, 0.1, 128)

print "Location : ", loc
print "Distance : ", dist
print "Data Scanned : ", data.size
print "Total Execution Time :", round(time()-t0, 4)
Esempio n. 39
0
'''Sample module showing how to read JSON files into blaze arrays'''

from __future__ import absolute_import, division, print_function

import blaze
from blaze.datadescriptor import dd_as_py

json_buf = u"[1, 2, 3, 4, 5]"
json_schema = "var, int8"

# Create a temporary JSON file in URI and open the dataset
dname = '/tmp/test.json'
store = blaze.Storage(dname)
print "store:", store
with file(store.path, "wb") as f:
    f.write(json_buf)
arr = blaze.open(store, json_schema)

#print('Blaze array:', arr)  # XXX This does not work yet
#print('Blaze array:', nd.array(arr))  # XXX idem
# Convert the data to a native Python object
print('Blaze array:', dd_as_py(arr._data))

# Remove the temporary JSON file
blaze.drop(store)
Esempio n. 40
0
'''Sample module showing how to read CSV files into blaze arrays'''

from __future__ import absolute_import, division, print_function

import blaze
from blaze.datadescriptor import dd_as_py

# A CSV toy example
csv_buf = u"""k1,v1,1,False
k2,v2,2,True
k3,v3,3,False
"""
csv_schema = "{ f0: string; f1: string; f2: int16; f3: bool }"

# Create a temporary CSV file in URI and open the dataset
dname = '/tmp/test.csv'
store = blaze.Storage(dname)
print "store:", store
with file(store.path, "wb") as f:
    f.write(csv_buf)
arr = blaze.open(store, csv_schema)

#print('Blaze array:', arr)  # XXX This does not work yet
#print('Blaze array:', nd.array(arr))  # XXX idem
# Convert the data to a native Python object
print('Blaze array:', dd_as_py(arr._data))

# Remove the temporary CSV file
blaze.drop(store)
Esempio n. 41
0
# Convert txt file into Blaze native format
def convert(filetxt, storage):
    import os.path
    if not os.path.exists(storage):
        blaze.Array(np.loadtxt(filetxt), params=blaze.params(storage=storage))


# Make sure that data is converted into a persistent Blaze array
convert("Data.txt", "Data")
convert("Query.txt", "Query")
convert("Query2.txt", "Query2")

t0 = time()
# Open Blaze arrays on-disk (will not be loaded in memory)
data = blaze.open("Data")
query = blaze.open("Query")
query2 = blaze.open("Query2")
print "Total Blaze arrays open time :", round(time() - t0, 4)

t0 = time()
# Do different searches using ED/DTW with native Blaze arrays
#loc, dist = ucr.ed(data, query, 128)
loc, dist = ucr.dtw(data, query, 0.1, 128, verbose=False)
#loc, dist = ucr.dtw(data, query2, 0.1, 128)

print "Location : ", loc
print "Distance : ", dist
print "Data Scanned : ", data.size
print "Total Execution Time :", round(time() - t0, 4)
Esempio n. 42
0
 def test_open_dialect(self):
     store = blaze.Storage(self.url, mode='r')
     a = blaze.open(store, schema=csv_schema, dialect='excel')
     self.assert_(isinstance(a, blaze.Array))
     self.assertEqual(dd_as_py(a._data), csv_ldict)
Esempio n. 43
0
'''Sample module showing how to read JSON files into blaze arrays'''

from __future__ import absolute_import, division, print_function

import blaze
from blaze.datadescriptor import dd_as_py
#from dynd import nd

json_buf = u"[1, 2, 3, 4, 5]"
json_schema = "var, int8"

# Create a temporary JSON file in URI and open the dataset
dname = 'json:///tmp/test.json'
store = blaze.Storage(dname)
print "store:", store
with file(store.path, "wb") as f:
    f.write(json_buf)
arr = blaze.open(store, json_schema)

#print('Blaze array:', arr)  # XXX This does not work yet
#print('Blaze array:', nd.array(arr))  # XXX idem
# Convert the data to a native Python object
print('Blaze array:', dd_as_py(arr._data))

# Remove the temporary JSON file
blaze.drop(store)
Esempio n. 44
0
 def test_append(self):
     store = blaze.Storage(self.url, mode='r+')
     a = blaze.open(store, schema=csv_schema)
     blaze.append(a, ["k4", "v4", 4, True])
     self.assertEqual(dd_as_py(a._data), csv_ldict + \
         [{u'f0': u'k4', u'f1': u'v4', u'f2': 4, u'f3': True}])
Esempio n. 45
0
def test_sqlite():
    from blaze import open
    a = open('sqlite://')
Esempio n. 46
0
'''Sample module showing how to read CSV files into blaze arrays'''

import blaze
from blaze.datadescriptor import dd_as_py
#from dynd import nd

# A CSV toy example
csv_buf = u"""k1,v1,1,False
k2,v2,2,True
k3,v3,3,False
"""
csv_schema = "{ f0: string; f1: string; f2: int16; f3: bool }"

# Create a temporary CSV file in URI and open the dataset
dname = 'csv:///tmp/test.csv'
store = blaze.Storage(dname)
print "store:", store
with file(store.path, "wb") as f:
    f.write(csv_buf)
arr = blaze.open(store, csv_schema)

#print('Blaze array:', arr)  # XXX This does not work yet
#print('Blaze array:', nd.array(arr))  # XXX idem
# Convert the data to a native Python object
print('Blaze array:', dd_as_py(arr._data))

# Remove the temporary CSV file
blaze.drop(store)
Esempio n. 47
0
#------------------------------------------------------------------------
if not os.path.exists(STORAGE):
    print 'Creating tables'
    N = 100000
    a = carray(np.arange(N, dtype='i4'))
    b = carray(np.arange(N, dtype='f8')+1)
    t = ctable((a, b), ('f0', 'f1'), rootdir='example1', mode='w')
    t.flush()
#------------------------------------------------------------------------

from time import time

print '-------------------'

t = blaze.open('ctable://example1')

# Using chunked blaze array we can optimize for IO doing the sum
# operations chunkwise from disk.

t0 = time()
print blaze.mean(t, 'f0')
print "Chunked mean", round(time()-t0, 6)

# Using NumPy is just going to through the iterator protocol on
# carray which isn't going to efficient.

t0 = time()
print np.mean(t.data.ca['f0'])
print "NumPy mean", round(time()-t0, 6)
Esempio n. 48
0
print_section('building basic hdf5 files')
# Create a simple HDF5 file
a1 = np.array([[1, 2, 3], [4, 5, 6]], dtype="int32")
a2 = np.array([[1, 2, 3], [3, 2, 1]], dtype="int64")
t1 = np.array([(1, 2, 3), (3, 2, 1)], dtype="i4,i8,f8")
with tb.open_file(fname, "w") as f:
    f.create_array(f.root, 'a1', a1)
    f.create_table(f.root, 't1', t1)
    f.create_group(f.root, 'g')
    f.create_array(f.root.g, 'a2', a2)
    print("Created HDF5 file with the next contents:\n%s" % str(f))

print_section('opening and handling datasets in hdf5 files')
# Open an homogeneous dataset there
store = blaze.Storage(fname, format='hdf5')
a = blaze.open(store, datapath="/a1")
# Print it
print("/a1 contents:", a)
# Print the datashape
print("datashape for /a1:", a.dshape)

# Open another homogeneous dataset there
store = blaze.Storage(fname, format='hdf5')
a = blaze.open(store, datapath="/g/a2")
# Print it
print("/g/a2 contents:", a)
# Print the datashape
print("datashape for /g/a2:", a.dshape)

# Now, get an heterogeneous dataset
store = blaze.Storage(fname, format='hdf5')
Esempio n. 49
0
 def test_deprecated_open(self):
     url = "json://" + self.fname
     store = blaze.Storage(url, mode='r')
     a = blaze.open(store, schema=json_schema)
     self.assert_(isinstance(a, blaze.Array))
     self.assertEqual(dd_as_py(a._data), [1, 2, 3, 4, 5])