Esempio n. 1
0
 def test05(self):
     """Testing `where()` iterator using `skip`"""
     a = np.arange(1, 11)
     b = blz.barray(a)
     wt = [v for v in a if v<=5][2:]
     cwt = [v for v in b.where(blz.barray(a<=5), skip=2)]
     #print "numpy ->", [v for v in a if v<=5][2:]
     #print "where ->", [v for v in b.where(blz.barray(a<=5), skip=2)]
     self.assert_(wt == cwt, "where() does not work correctly")
Esempio n. 2
0
 def test06(self):
     """Testing `where()` iterator (using array bool in fancy indexing)"""
     a = np.arange(1, 110)
     b = blz.barray(a, chunklen=10)
     wt = a[(a<5)|(a>9)]
     cwt = b[blz.barray((a<5)|(a>9))]
     #print "numpy ->", a[(a<5)|(a>9)]
     #print "where ->", b[blz.barray((a<5)|(a>9))]
     assert_array_equal(wt, cwt, "where() does not work correctly")
Esempio n. 3
0
 def test03(self):
     """Testing `where()` iterator (using a boolean array)"""
     a = np.arange(1, 11)
     b = blz.barray(a)
     wt = [v for v in a if v<=5]
     cwt = [v for v in b.where(blz.barray(a<=5))]
     #print "numpy ->", [v for v in a if v<=5]
     #print "where ->", [v for v in b.where(blz.barray(a<=5))]
     self.assert_(wt == cwt, "where() does not work correctly")
Esempio n. 4
0
 def test06(self):
     """Testing `where()` iterator using `limit` and `skip`"""
     a = np.arange(1, 11)
     b = blz.barray(a)
     wt = [v for v in a if v<=5][1:4]
     cwt = [v for v in b.where(blz.barray(a<=5), limit=3, skip=1)]
     #print "numpy ->", [v for v in a if v<=5][1:4]
     #print "where ->", [v for v in b.where(blz.barray(a<=5),
     #                                      limit=3, skip=1)]
     self.assert_(wt == cwt, "where() does not work correctly")
Esempio n. 5
0
 def test04(self):
     """Testing fancy indexing with __setitem__ (bool barray)"""
     a = np.arange(1,1e2)
     b = blz.barray(a, chunklen=10)
     bc = (a > 5) & (a < 40)
     sl = blz.barray(bc)
     b[sl] = 3.
     a[bc] = 3.
     #print "b[%s] -> %r" % (sl, b)
     assert_array_equal(b[:], a, "fancy indexing does not work correctly")
Esempio n. 6
0
 def test07(self):
     """Testing `where()` iterator using `limit` and `skip` (zeros)"""
     a = np.arange(10000)
     b = blz.barray(a,)
     wt = [v for v in a if v<=5000][1010:2020]
     cwt = [v for v in b.where(blz.barray(a<=5000, chunklen=100),
                               limit=1010, skip=1010)]
     # print "numpy ->", [v for v in a if v>=5000][1010:2020]
     # print "where ->", [v for v in b.where(blz.barray(a>=5000,chunklen=100),
     #                                       limit=1010, skip=1010)]
     self.assert_(wt == cwt, "where() does not work correctly")
Esempio n. 7
0
 def test01d(self):
     """Testing `__getitem()__` method with only a (large) start"""
     a = np.arange(1e4)
     b = blz.barray(a, rootdir=self.rootdir)
     sl = -2   # second last element
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Esempio n. 8
0
 def test04(self):
     """Testing `iter()` method with large zero arrays"""
     a = np.zeros(1e4, dtype='f8')
     b = blz.barray(a, chunklen=100, rootdir=self.rootdir)
     c = blz.fromiter((v for v in b), dtype='f8', count=len(a))
     #print "c ->", repr(c)
     assert_array_equal(a, c[:], "iterator fails on zeros")
Esempio n. 9
0
 def test03a(self):
     """Testing `iter()` method with only step"""
     a = np.arange(101)
     b = blz.barray(a, chunklen=2, rootdir=self.rootdir)
     #print "sum iter->", sum(b.iter(step=4))
     self.assert_(sum(a[::4]) == sum(b.iter(step=4)),
                  "Sums are not equal")
Esempio n. 10
0
 def test03d(self):
     """Testing `__getitem()__` method with ranges and steps (IV)"""
     a = np.arange(1e3)
     b = blz.barray(a, chunklen=10, rootdir=self.rootdir)
     sl = slice(4, 80, 3000)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Esempio n. 11
0
 def test02b(self):
     """Testing `__getitem()__` method with ranges (negative start)"""
     a = np.arange(1e2)
     b = blz.barray(a, chunklen=10, rootdir=self.rootdir)
     sl = slice(-3)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Esempio n. 12
0
def open(persist, **kwargs):
    """Open an existing persistent array.

    Parameters
    ----------
    persist : a Storage instance
        The Storage instance specifies, among other things, URI of
        where the array is stored.
    kwargs : a dictionary
        Put here different parameters depending on the format.

    Returns
    -------
    out: a concrete blaze array.

    Notes
    -----
    Only BLZ, HDF5, CSV and JSON formats are supported currently.

    """
    persist = _persist_convert(persist)
    if persist.format == 'blz':
        d = blz.barray(rootdir=persist.path, **kwargs)
        dd = BLZDataDescriptor(d)
    elif persist.format == 'csv':
        dd = CSVDataDescriptor(persist.path, **kwargs)
    elif persist.format == 'json':
        dd = JSONDataDescriptor(persist.path, **kwargs)
    elif persist.format == 'hdf5':
        dd = HDF5DataDescriptor(persist.path, **kwargs)
    return Array(dd)
Esempio n. 13
0
def open(persist, **kwargs):
    """Open an existing persistent array.

    Parameters
    ----------
    persist : a Storage instance
        The Storage instance specifies, among other things, URI of
        where the array is stored.
    kwargs : a dictionary
        Put here different parameters depending on the format.

    Returns
    -------
    out: a concrete blaze array.

    Notes
    -----
    Only BLZ, HDF5, CSV and JSON formats are supported currently.

    """
    persist = _persist_convert(persist)
    if persist.format == 'blz':
        d = blz.barray(rootdir=persist.path, **kwargs)
        dd = BLZDataDescriptor(d)
    elif persist.format == 'csv':
        dd = CSVDataDescriptor(persist.path, **kwargs)
    elif persist.format == 'json':
        dd = JSONDataDescriptor(persist.path, **kwargs)
    elif persist.format == 'hdf5':
        dd = HDF5DataDescriptor(persist.path, **kwargs)
    return Array(dd)
Esempio n. 14
0
 def select_and_apply(self, apply_func = lambda xx:np.dot(xx,np.ones(xx.shape[1]) / xx.shape[1]),
                      select_format = (None,1), 
                      combine_fun = lambda xx:np.concatenate(xx,1), 
                      limit_n_per_slice = 2000000):
     
     selected_slices = self.generate_axis_dividing_slice_selectors(select_format=select_format,
                                                                   limit_n_per_slice=limit_n_per_slice)
     
     print "selected_slices = ",selected_slices
     
     one_selected_slices = selected_slices[0]
     combined_arr = self.select_all_barrays(one_selected_slices,
                                            combine_fun=combine_fun)
         
     output_barray = blz.barray(apply_func(combined_arr))
     
     for one_selected_slices in selected_slices[1:]:
         print "applying one_selected_slices = ",one_selected_slices
         
         combined_arr = self.select_all_barrays(one_selected_slices,
                                                combine_fun=combine_fun)
     
         output_barray.append(apply_func(combined_arr))
         
     return output_barray
Esempio n. 15
0
 def test03(self):
     """Testing copy() with no shuffle"""
     a = np.linspace(-1., 1., 1e4)
     b = blz.barray(a, rootdir=self.rootdir)
     c = b.copy(bparams=blz.bparams(shuffle=False))
     #print "b.cbytes, c.cbytes:", b.cbytes, c.cbytes
     self.assert_(b.cbytes < c.cbytes, "shuffle not changed")
Esempio n. 16
0
 def test02c(self):
     """Testing `iter()` method with positive start, negative stop"""
     a = np.arange(101)
     b = blz.barray(a, chunklen=2, rootdir=self.rootdir)
     #print "sum iter->", sum(b.iter(24, -3))
     self.assert_(sum(a[24:-3]) == sum(b.iter(24, -3)),
                  "Sums are not equal")
Esempio n. 17
0
 def test04a(self):
     """Testing `__getitem()__` method with long ranges"""
     a = np.arange(1e3)
     b = blz.barray(a, chunklen=100, rootdir=self.rootdir)
     sl = slice(1, 8000)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Esempio n. 18
0
 def test03b(self):
     """Testing `iter()` method with start, stop, step"""
     a = np.arange(101)
     b = blz.barray(a, chunklen=2, rootdir=self.rootdir)
     #print "sum iter->", sum(b.iter(3, 24, 4))
     self.assert_(sum(a[3:24:4]) == sum(b.iter(3, 24, 4)),
                  "Sums are not equal")
Esempio n. 19
0
 def test04d(self):
     """Testing `__getitem()__` method with no start and no stop"""
     a = np.arange(1e3)
     b = blz.barray(a, chunklen=100, rootdir=self.rootdir)
     sl = slice(None, None, 2)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Esempio n. 20
0
 def test05(self):
     """Testing `__getitem()__` method with negative steps"""
     a = np.arange(1e3)
     b = blz.barray(a, chunklen=10, rootdir=self.rootdir)
     sl = slice(None, None, -3)
     #print "b[sl]->", `b[sl]`
     self.assertRaises(NotImplementedError, b.__getitem__, sl)
Esempio n. 21
0
 def test02(self):
     """Testing copy() with lesser compression"""
     a = np.linspace(-1., 1., 1e4)
     b = blz.barray(a, rootdir=self.rootdir)
     c = b.copy(bparams=blz.bparams(clevel=1))
     #print "b.cbytes, c.cbytes:", b.cbytes, c.cbytes
     self.assert_(b.cbytes < c.cbytes, "clevel not changed")
Esempio n. 22
0
 def test02(self):
     """Testing fancy indexing (empty list)"""
     a = np.arange(101)
     b = blz.barray(a)
     c = b[[]]
     r = a[[]]
     assert_array_equal(c, r, "fancy indexing does not work correctly")
Esempio n. 23
0
 def test00(self):
     """Testing fancy indexing (short list)"""
     a = np.arange(1,111)
     b = blz.barray(a)
     c = b[[3,1]]
     r = a[[3,1]]
     assert_array_equal(c, r, "fancy indexing does not work correctly")
Esempio n. 24
0
 def test03(self):
     """Testing fancy indexing (list of floats)"""
     a = np.arange(1,101)
     b = blz.barray(a)
     c = b[[1.1, 3.3]]
     r = a[[1.1, 3.3]]
     assert_array_equal(c, r, "fancy indexing does not work correctly")
Esempio n. 25
0
 def test01(self):
     """Testing fancy indexing (large list, numpy)"""
     a = np.arange(1,1e4)
     b = blz.barray(a)
     idx = np.random.randint(1000, size=1000)
     c = b[idx]
     r = a[idx]
     assert_array_equal(c, r, "fancy indexing does not work correctly")
Esempio n. 26
0
 def test02c(self):
     """Testing `append()` method (large chunklen III)"""
     a = np.arange(1000*1000)
     b = blz.barray(a, chunklen=100*1000-1, rootdir=self.rootdir)
     b.append(a)
     #print "b->", `b`
     c = np.concatenate((a, a))
     assert_array_equal(c, b[:], "Arrays are not equal")
Esempio n. 27
0
 def test01(self):
     """Testing __sizeof__() (big arrays)"""
     a = np.arange(2e5)
     b = blz.barray(a, rootdir=self.rootdir)
     #print "size b uncompressed-->", b.nbytes
     #print "size b compressed  -->", b.cbytes
     self.assert_(sys.getsizeof(b) < b.nbytes,
                  "barray does not seem to compress at all")
Esempio n. 28
0
 def test02(self):
     """Testing __sizeof__() (small arrays)"""
     a = np.arange(111)
     b = blz.barray(a)
     #print "size b uncompressed-->", b.nbytes
     #print "size b compressed  -->", b.cbytes
     self.assert_(sys.getsizeof(b) > b.nbytes,
                  "barray compressed too much??")
Esempio n. 29
0
 def test07(self):
     """Testing `iter()` method with `limit` and `skip`"""
     a = np.arange(1e4, dtype='f8')
     b = blz.barray(a, chunklen=100, rootdir=self.rootdir)
     c = blz.fromiter((v for v in b.iter(limit=1010, skip=1010)), dtype='f8',
                     count=1010)
     #print "c ->", repr(c)
     assert_array_equal(a[1010:2020], c, "iterator fails on zeros")
Esempio n. 30
0
 def test00(self):
     """Testing unicode types (creation)"""
     a = np.array([[u"aŀle", u"eñe"], [u"açò", u"áèâë"]], dtype="U4")
     b = blz.barray(a)
     #print "b.dtype-->", b.dtype
     #print "b->", `b`
     self.assert_(a.dtype == b.dtype.base)
     assert_array_equal(a, b[:], "Arrays are not equal")
Esempio n. 31
0
 def test00(self):
     """Testing string types (creation)"""
     a = np.array([["ale", "ene"], ["aco", "ieie"]], dtype="S4")
     b = blz.barray(a)
     #print "b.dtype-->", b.dtype
     #print "b->", `b`
     self.assert_(a.dtype == b.dtype.base)
     assert_array_equal(a, b[:], "Arrays are not equal")
Esempio n. 32
0
 def test00(self):
     """Testing string types (creation)"""
     a = np.array([["ale", "ene"], ["aco", "ieie"]], dtype="S4")
     b = blz.barray(a)
     #print "b.dtype-->", b.dtype
     #print "b->", `b`
     self.assert_(a.dtype == b.dtype.base)
     assert_array_equal(a, b[:], "Arrays are not equal")
Esempio n. 33
0
 def test00(self):
     """Testing unicode types (creation)"""
     a = np.array([[u"aŀle", u"eñe"], [u"açò", u"áèâë"]], dtype="U4")
     b = blz.barray(a)
     #print "b.dtype-->", b.dtype
     #print "b->", `b`
     self.assert_(a.dtype == b.dtype.base)
     assert_array_equal(a, b[:], "Arrays are not equal")
Esempio n. 34
0
    def testImplicitDtype(self):
        """Testing barray construction keeping dimensions (implicit dtype)"""
        a = np.eye(6)  # 2d
        b = blz.barray(a, rootdir=self.rootdir)
        if self.open:
            b = blz.open(rootdir=self.rootdir)

        # array equality implies having the same shape
        assert_array_equal(a, b, "Arrays are not equal")
Esempio n. 35
0
 def test01(self):
     """Testing unicode types (append)"""
     a = np.ones((300, 4), dtype="U4")
     b = blz.barray([], dtype="U4").reshape((0, 4))
     b.append(a)
     #print "b.dtype-->", b.dtype
     #print "b->", `b`
     self.assert_(a.dtype == b.dtype.base)
     assert_array_equal(a, b[:], "Arrays are not equal")
Esempio n. 36
0
 def test_barray_record_as_object(self):
     src_data = np.empty((10,), dtype=np.dtype('u1,O'))
     src_data[:] = [(i, 's'*i) for i in range(10)]
     carr = blz.barray(src_data, dtype=np.dtype('O'))
     self.assertEqual(len(carr.shape), 1)
     self.assertEqual(len(src_data), carr.shape[0])
     for i in range(len(carr)):
         self.assertEqual(carr[i][0], src_data[i][0])
         self.assertEqual(carr[i][1], src_data[i][1])
Esempio n. 37
0
    def test_barray_1d_source(self):
        """Testing barray of objects, 1d source"""
        src_data = ['s'*i for i in range(10)]
        carr = blz.barray(src_data, dtype=np.dtype('O'))

        self.assertEqual(len(carr.shape), 1)
        self.assertEqual(len(src_data), carr.shape[0])
        for i in range(len(carr)):
            self.assertEqual(carr[i], src_data[i])
            self.assertEqual(carr[i], src_data[i])
Esempio n. 38
0
 def test00(self):
     """Testing sum()."""
     a = np.arange(1e5).reshape(10, 1e4)
     sa = a.sum()
     ac = blz.barray(a)
     sac = ac.sum()
     #print "numpy sum-->", sa
     #print "barray sum-->", sac
     self.assert_(sa.dtype == sac.dtype, "sum() is not working correctly.")
     self.assert_(sa == sac, "sum() is not working correctly.")
Esempio n. 39
0
 def test04c(self):
     """Testing `__getitem()__` method with shape reduction (III)"""
     a = np.arange(6000).reshape((50, 40, 3))
     b = blz.barray(a, rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = (1, slice(1, 4, 2), 2)
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Esempio n. 40
0
    def testExplicitDtype(self):
        """Testing barray construction keeping dimensions (explicit dtype)"""
        dtype = np.dtype(np.float64)
        a = np.eye(6, dtype=dtype)
        b = blz.barray(a, dtype=dtype, rootdir=self.rootdir)
        if self.open:
            b = blz.open(rootdir=self.rootdir)

        # array equality implies having the same shape
        assert_array_equal(a, b, "Arrays are not equal")
Esempio n. 41
0
 def test03c(self):
     """Testing `__getitem()__` method with several slices (III)"""
     a = np.arange(120 * 1000).reshape((5 * 1000, 4, 3, 2))
     b = blz.barray(a, rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = (slice(None, None, 3), slice(1, 3, 2), slice(1, 4, 2))
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Esempio n. 42
0
 def test04c(self):
     """Testing `__setitem()__` method with shape reduction (III)"""
     a = np.arange(24).reshape((4, 3, 2))
     b = blz.barray(a, rootdir=self.rootdir)
     sl = (1, 2, slice(None, None, None))
     #print "before->", `b[sl]`
     a[sl] = 2
     b[sl] = 2
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "after->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
Esempio n. 43
0
 def test03d(self):
     """Testing `__setitem()__` method with several slices (IV)"""
     a = np.arange(120).reshape((5, 4, 3, 2))
     b = blz.barray(a, rootdir=self.rootdir)
     sl = (slice(1, 3), slice(1, 3, 1), slice(1, None, 2), slice(1))
     #print "before->", `b[sl]`
     a[sl] = 2
     b[sl] = 2
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "after->", `b[sl]`
     assert_array_equal(a[:], b[:], "Arrays are not equal")
    def __setitem__(self, key, value):
        # Check config file
        location = (_config().caching
                    if hasattr(_config(), 'caching') else None)
        assert location in ('disk', 'memory')

        # Use tempfolder for saving on disk
        save_folder = tempfile.mkdtemp() if location == 'disk' else None
        self.compress = lambda v: blz.barray(v, rootdir=save_folder)

        if type(value) == np.ndarray:
            value = self.compress(value)
        return super(CompressedCache, self).__setitem__(key, value)
Esempio n. 45
0
def modelPredictor(modelsPath_modelIndex_dataPath_colNames_tuple):
    """
    Input: A tuple, with following two attributes (with order):
            modelsPath: string, the path to the trained models. (pickle file)
            modelIndex: integer, the index of the model to predict.
            dataPath: string, the path to the data.
            colNames: a list of strings, column names of the output table. It should be like ["Id", "V1", ...]
    Output: A btable, consists of Id column, Predicted column and the data.
    
    Notes:
    modelPredictor will create following directories for you if they do not exist.
            1. Model_No{modelIndex}_predicted_array: it will be under the dataPath.
    """
    # Set up necessary constance.
    divideN = 300000
    modelsPath, modelIndex, dataPath, colNames = modelsPath_modelIndex_dataPath_colNames_tuple
    def data_abspath(colname):
        return os.path.abspath(os.path.join(dataPath, colname))
    with open(modelsPath, "rb") as rf:
        models = pickle.load(rf)
    model = models[modelIndex]
    del models
    
    # Read in data with btable.
    Id = blz.open(os.path.join(dataPath, colNames[0]))
    totalN = len(Id)
    if totalN % divideN == 0:
        nodes_list = [i * divideN for i in range(totalN / divideN + 1)]
    else:
        nodes_list = [i * divideN for i in range(totalN / divideN + 1)] + [totalN]
    nodes_pair_list = zip(nodes_list[:-1], nodes_list[1:])
    
    # Prediction.
    y_predict = np.zeros(totalN)
    print "[Model No.{modelIndex}] Prediction process begins.".format(modelIndex = modelIndex)
    for begin, end in nodes_pair_list:
        print "[Model No.{modelIndex}] Processing {begin} ~ {end} observations.".format(modelIndex=modelIndex, begin = begin + 1, end = end)
        columns = [blz.open(os.path.join(dataPath, colname))[begin:end] for colname in colNames[1:]]
        X = np.column_stack(columns)
        temp = model.predict(X)
        y_predict[begin:end] = temp
    
    columns = [Id, blz.barray(y_predict)]
    data_rootdir = os.path.join(dataPath, "Model_No{modelIndex}_predicted_array".format(modelIndex = modelIndex))
    if data_rootdir in os.listdir(dataPath):
        print "Removing Old result_table directory for new btable."
        command = "rm -rf " + data_rootdir
        os.system(command)
    final_table = blz.btable(columns = columns, names = ["Id", "Predict"], rootdir = data_rootdir)
    print "The result_table btable rootdir is under {path}".format(path=data_rootdir)
Esempio n. 46
0
    def test_barray_2d_source(self):
        """Testing barray of objects, 2d source

        Expected result will be a 1d barray whose elements are
        containers holding the inner dimension
        """
        src_data = [(i, 's'*i) for i in range(10)]
        carr = blz.barray(src_data, dtype=np.dtype('O'))
        # note that barray should always create a 1 dimensional
        # array of objects.
        self.assertEqual(len(carr.shape), 1)
        self.assertEqual(len(src_data), carr.shape[0])
        for i in range(len(carr)):
            self.assertEqual(carr[i][0], src_data[i][0])
            self.assertEqual(carr[i][1], src_data[i][1])
Esempio n. 47
0
    def test_barray_tuple_source(self):
        """Testing a barray of objects that are tuples

        This uses a numpy container as source. Tuples should be
        preserved
        """
        src_data = np.empty((10,), dtype=np.dtype('O'))
        src_data[:] = [(i, 's'*i) for i in range(src_data.shape[0])]
        carr = blz.barray(src_data)
        self.assertEqual(len(carr.shape), 1)
        self.assertEqual(len(src_data), carr.shape[0])
        self.assertEqual(type(carr[0]), tuple)
        self.assertEqual(type(carr[0]), type(src_data[0]))
        for i in range(len(carr)):
            self.assertEqual(carr[i][0], src_data[i][0])
            self.assertEqual(carr[i][1], src_data[i][1])
Esempio n. 48
0
    def _test_barray_record_inferred_opt2(self):
        """Testing barray handling of inferred record dtypes
        containing objects.  When there is no explicit dtype in the
        barray constructor, the dtype becomes 'O', and the barrays
        behaves accordingly (one dimensional)
        """
        src_data = np.empty((10,), dtype=np.dtype('u1,O'))
        src_data[:] = [(i, 's'*i) for i in range(10)]

        carr = blz.barray(src_data)
        # note: this is similar as if it was created with dtype='O'
        self.assertEqual(len(carr.shape), 1)
        self.assertEqual(len(src_data), carr.shape[0])
        for i in range(len(carr)):
            self.assertEqual(carr[i][0], src_data[i][0])
            self.assertEqual(carr[i][1], src_data[i][1])
Esempio n. 49
0
## Benchmark to check the creation of an array of length > 2**32 (5e9)

import blz
from time import time

t0 = time()
#cn = blz.zeros(5e9, dtype="i1")
cn = blz.zeros(5e9, dtype="i1", rootdir='ondisk_barray', mode='w')
print "Creation time:", round(time() - t0, 3)
assert len(cn) == int(5e9)

t0 = time()
cn = blz.barray(rootdir='ondisk_barray', mode='a')
print "Re-open time:", round(time() - t0, 3)
print "len(cn)", len(cn)
assert len(cn) == int(5e9)

# Now check some accesses
cn[1] = 1
assert cn[1] == 1
cn[int(2e9)] = 2
assert cn[int(2e9)] == 2
cn[long(3e9)] = 3
assert cn[long(3e9)] == 3
cn[-1] = 4
assert cn[-1] == 4

t0 = time()
assert cn.sum() == 10
print "Sum time:", round(time() - t0, 3)
def array(obj, dshape=None, ddesc=None):
    """Create a Blaze array.

    Parameters
    ----------
    obj : array_like
        Initial contents for the array.

    dshape : datashape
        The datashape for the resulting array. By default the
        datashape will be inferred from data. If an explicit dshape is
        provided, the input data will be coerced into the provided
        dshape.

    ddesc : data descriptor instance
        This comes with the necessary info for storing the data.  If
        None, a DyND_DDesc will be used.

    Returns
    -------
    out : a concrete blaze array.

    """
    dshape = _normalize_dshape(dshape)

    if ((obj is not None) and (not inspect.isgenerator(obj))
            and (dshape is not None)):
        dt = ndt.type(str(dshape))
        if dt.ndim > 0:
            obj = nd.array(obj, type=dt, access='rw')
        else:
            obj = nd.array(obj, dtype=dt, access='rw')

    if obj is None and ddesc is None:
        raise ValueError('you need to specify at least `obj` or `ddesc`')

    if isinstance(obj, Array):
        return obj
    elif isinstance(obj, DDesc):
        if ddesc is None:
            ddesc = obj
            return Array(ddesc)
        else:
            raise ValueError(('you cannot specify `ddesc` when `obj` '
                              'is already a DDesc instance'))

    if ddesc is None:
        # Use a dynd ddesc by default
        try:
            array = nd.asarray(obj, access='rw')
        except:
            raise ValueError(('failed to construct a dynd array from '
                              'object %r') % obj)
        ddesc = DyND_DDesc(array)
        return Array(ddesc)

    # The DDesc has been specified
    if isinstance(ddesc, DyND_DDesc):
        if obj is not None:
            raise ValueError(('you cannot specify simultaneously '
                              '`obj` and a DyND `ddesc`'))
        return Array(ddesc)
    elif isinstance(ddesc, BLZ_DDesc):
        if inspect.isgenerator(obj):
            dt = None if dshape is None else to_numpy_dtype(dshape)
            # TODO: Generator logic could go inside barray
            ddesc.blzarr = blz.fromiter(obj,
                                        dtype=dt,
                                        count=-1,
                                        rootdir=ddesc.path,
                                        mode=ddesc.mode,
                                        **ddesc.kwargs)
        else:
            if isinstance(obj, nd.array):
                obj = nd.as_numpy(obj)
            if dshape and isinstance(dshape.measure, datashape.Record):
                ddesc.blzarr = blz.btable(obj,
                                          rootdir=ddesc.path,
                                          mode=ddesc.mode,
                                          **ddesc.kwargs)
            else:
                ddesc.blzarr = blz.barray(obj,
                                          rootdir=ddesc.path,
                                          mode=ddesc.mode,
                                          **ddesc.kwargs)
    elif isinstance(ddesc, HDF5_DDesc):
        if isinstance(obj, nd.array):
            obj = nd.as_numpy(obj)
        with tb.open_file(ddesc.path, mode=ddesc.mode) as f:
            where, name = split_path(ddesc.datapath)
            if dshape and isinstance(dshape.measure, datashape.Record):
                # Convert the structured array to unaligned dtype
                # We need that because PyTables only accepts unaligned types,
                # which are the default in NumPy
                obj = np.array(obj, datashape.to_numpy_dtype(dshape.measure))
                f.create_table(where, name, filters=ddesc.filters, obj=obj)
            else:
                f.create_earray(where, name, filters=ddesc.filters, obj=obj)
        ddesc.mode = 'a'  # change into 'a'ppend mode for further operations

    return Array(ddesc)
Esempio n. 51
0
import numpy as np
import blz
from time import time

N = 1e8  # the number of elements in x
clevel = 5  # the compression level
sexpr = "(x-1) < 10."  # the expression to compute
#sexpr = "((x-1) % 1000) == 0."  # the expression to compute
#sexpr = "(2*x**3+.3*y**2+z+1)<0"  # the expression to compute

bparams = blz.bparams(clevel)

print "Creating inputs with %d elements..." % N

x = np.arange(N)
cx = blz.barray(x, bparams=bparams)
if 'y' not in sexpr:
    ct = blz.btable((cx, ), names=['x'])
else:
    y = np.arange(N)
    z = np.arange(N)
    cy = blz.barray(y, bparams=bparams)
    cz = blz.barray(z, bparams=bparams)
    ct = blz.btable((cx, cy, cz), names=['x', 'y', 'z'])

print "Evaluating...", sexpr
t0 = time()
cbout = ct.eval(sexpr)
print "Time for evaluation--> %.3f" % (time() - t0, )
print "Converting to numy arrays"
bout = cbout[:]
Esempio n. 52
0
import numpy as np
import blz
from time import time

N = 100 * 1000 * 1000
CLEVEL = 5

a = np.linspace(0, 1, N)

t0 = time()
ac = blz.barray(a, bparams=blz.bparams(clevel=CLEVEL))
print "time creation (memory) ->", round(time()-t0, 3)
print "data (memory):", repr(ac)

t0 = time()
b = blz.barray(a, bparams=blz.bparams(clevel=CLEVEL),
               rootdir='myarray', mode='w')
b.flush()
print "time creation (disk) ->", round(time()-t0, 3)
#print "meta (disk):", b.read_meta()

t0 = time()
an = np.array(a)
print "time creation (numpy) ->", round(time()-t0, 3)

t0 = time()
c = blz.barray(rootdir='myarray')
print "time open (disk) ->", round(time()-t0, 3)
#print "meta (disk):", c.read_meta()
print "data (disk):", repr(c)
Esempio n. 53
0
    #print "cout-->", repr(cout)


if __name__=="__main__":

    N = 1e8       # the number of elements in x
    clevel = 5    # the compression level
    sexpr = "(x+1)<0"
    sexpr = "(((.25*x + .75)*x - 1.5)*x - 2)<0"
    #sexpr = "(((.25*x + .75)*x - 1.5)*x - 2)"
    doprofile = True

    print("Creating inputs...")
    x = np.arange(N)
    #x = np.linspace(0,100,N)
    cx = blz.barray(x, bparams=blz.bparams(clevel))

    print("Evaluating '%s' with 10^%d points" % (sexpr, int(math.log10(N))))

    t0 = time()
    cout = ne.evaluate(sexpr)
    print "Time for numexpr --> %.3f" % (time()-t0,)

    if doprofile:
        import pstats
        import cProfile as prof
        prof.run('compute_barray(sexpr, clevel=clevel, kernel="numexpr")',
        #prof.run('compute_barray(sexpr, clevel=clevel, kernel="python")',
                 'eval.prof')
        stats = pstats.Stats('eval.prof')
        stats.strip_dirs()
Esempio n. 54
0
def append(data, clevel, cname):
    alldata = blz.barray(data[0], bparams=blz.bparams(clevel, cname=cname))
    for carr in data[1:]:
        alldata.append(carr)

    return alldata
Esempio n. 55
0
File: sum.py Progetto: pombreda/blz
import numpy as np
import blz
from time import time

N = 1e8
#a = np.arange(N, dtype='f8')
a = np.random.randint(0, 10, N).astype('bool')

t0 = time()
sa = a.sum()
print "Time sum() numpy --> %.3f" % (time() - t0)

t0 = time()
ac = blz.barray(a, bparams=blz.bparams(9))
print "Time barry conv --> %.3f" % (time() - t0)
print "ac-->", ` ac `

t0 = time()
sac = ac.sum()
#sac = ac.sum(dtype=np.dtype('i8'))
print "Time sum() barray --> %.3f" % (time() - t0)

# t0 = time()
# sac = sum(i for i in ac)
# print "Time sum() carray (iter) --> %.3f" % (time()-t0)

print "sa, sac-->", sa, sac, type(sa), type(sac)
assert (sa == sac)
Esempio n. 56
0
import blz
from time import time

N = 1e7  # the number of elements in x
M = 100000  # the elements to get
clevel = 1  # the compression level

print "Creating inputs with %d elements..." % N

bparams = blz.bparams(clevel)

#x = np.arange(N)
x = np.zeros(N, dtype="f8")
y = x.copy()
z = x.copy()
cx = blz.barray(x, bparams=bparams)
cy = cx.copy()
cz = cx.copy()
ct = blz.btable((cx, cy, cz), names=['x', 'y', 'z'])
t = ct[:]

print "Starting benchmark now for getting %d elements..." % M
# Retrieve from a ndarray
t0 = time()
vals = [x[i] for i in xrange(0, M, 3)]
print "Time for array--> %.3f" % (time() - t0, )
print "vals-->", len(vals)

#blz.set_num_threads(blz.ncores//2)

# Retrieve from a barray
Esempio n. 57
0
def array(obj, dshape=None, caps={'efficient-write': True}, storage=None):
    """Create a Blaze array.

    Parameters
    ----------
    obj : array_like
        Initial contents for the array.

    dshape : datashape
        The datashape for the resulting array. By default the
        datashape will be inferred from data. If an explicit dshape is
        provided, the input data will be coerced into the provided
        dshape.

    caps : capabilities dictionary
        A dictionary containing the desired capabilities of the array.

    storage : Storage instance
        A Storage object with the necessary info for storing the data.

    Returns
    -------
    out : a concrete blaze array.

    Bugs
    ----
    Right now the explicit dshape is ignored. This needs to be
    corrected. When the data cannot be coerced to an explicit dshape
    an exception should be raised.

    """
    dshape = _normalize_dshape(dshape)

    storage = _storage_convert(storage)

    if isinstance(obj, Array):
        return obj
    elif isinstance(obj, IDataDescriptor):
        # TODO: Validate the 'caps', convert to another kind
        #       of data descriptor if necessary
        # Note by Francesc: but if it is already an IDataDescriptor I wonder
        # if `caps` should be ignored.  Hmm, probably not...
        #
        # Note by Oscar: Maybe we shouldn't accept a datadescriptor at
        #   all at this level. If you've got a DataDescriptor you are
        #   playing with internal datastructures anyways, go to the
        #   Array constructor directly. If you want to transform to
        #   another datadescriptor... convert it yourself (you are
        #   playing with internal datastructures, remember? you should
        #   be able to do it in your own.
        dd = obj
    elif storage is not None:
        dt = None if dshape is None else to_numpy_dtype(dshape)
        if inspect.isgenerator(obj):
            # TODO: Generator logic can go inside barray
            dd = BLZDataDescriptor(
                blz.barray(obj, dtype=dt, count=-1, rootdir=storage.path))
        else:
            dd = BLZDataDescriptor(
                blz.barray(obj, dtype=dt, rootdir=storage.path))
    elif 'efficient-write' in caps and caps['efficient-write'] is True:
        # In-Memory array
        if dshape is None:
            dd = DyNDDataDescriptor(nd.asarray(obj, access='rw'))
        else:
            # Use the uniform/full dtype specification in dynd depending
            # on whether the datashape has a uniform dim
            dt = ndt.type(str(dshape))
            if dt.ndim > 0:
                dd = DyNDDataDescriptor(nd.array(obj, type=dt, access='rw'))
            else:
                dd = DyNDDataDescriptor(nd.array(obj, dtype=dt, access='rw'))
    elif 'compress' in caps and caps['compress'] is True:
        dt = None if dshape is None else to_numpy_dtype(dshape)
        # BLZ provides compression
        if inspect.isgenerator(obj):
            # TODO: Generator logic can go inside barray
            dd = BLZDataDescriptor(blz.fromiter(obj, dtype=dt, count=-1))
        else:
            dd = BLZDataDescriptor(blz.barray(obj, dtype=dt))

    elif isinstance(obj, np.ndarray):
        dd = DyNDDataDescriptor(nd.view(obj))
    elif isinstance(obj, nd.array):
        dd = DyNDDataDescriptor(obj)
    elif isinstance(obj, blz.barray):
        dd = BLZDataDescriptor(obj)
    else:
        raise TypeError(('Failed to construct blaze array from '
                         'object of type %r') % type(obj))
    return Array(dd)
Esempio n. 58
0
# Benchmark to compare times for iterators in generator contexts by
# using barrays vs plain numpy arrays.

import numpy as np
import blz
from time import time

N = 1e7

a = np.arange(N)
b = blz.barray(a)

t0 = time()
#sum1 = sum(a)
sum1 = sum((v for v in a[2::3] if v < 10))
t1 = time()-t0
print "Summing using numpy iterator: %.3f" % t1

t0 = time()
#sum2 = sum(b)
sum2 = sum((v for v in b.iter(2, None, 3) if v < 10))
t2 = time()-t0
print "Summing using barray iterator: %.3f  speedup: %.2f" % (t2, t1/t2)

assert sum1 == sum2, "Summations are not equal!"
Esempio n. 59
0
File: eval.py Progetto: pombreda/blz
        kernel,
        time() - t0,
    ),
    print ", cratio (out): %.1f" % (cout.nbytes / float(cout.cbytes))
    #print "cout-->", repr(cout)


if __name__ == "__main__":

    print "Creating inputs..."

    bparams = blz.bparams(clevel)

    y = x.copy()
    z = x.copy()
    cx = blz.barray(x, bparams=bparams)
    cy = blz.barray(y, bparams=bparams)
    cz = blz.barray(z, bparams=bparams)

    for sexpr in sexprs:
        print "Evaluating '%s' with 10^%d points" % (sexpr, int(math.log10(N)))
        compute_ref(sexpr)
        for kernel in "python", "numexpr":
            compute_blz(sexpr, clevel=0, kernel=kernel)
        if doprofile:
            import pstats
            import cProfile as prof
            #prof.run('compute_blz(sexpr, clevel=clevel, kernel="numexpr")',
            prof.run(
                'compute_blz(sexpr, clevel=0, kernel="numexpr")',
                #prof.run('compute_blz(sexpr, clevel=clevel, kernel="python")',