Exemple #1
0
    def tohdf5(self,
               filepath,
               nodepath='/ctable',
               mode='w',
               cparams=None,
               cname=None):
        """Write this object into an HDF5 file.

        Parameters
        ----------
        filepath : string
            The path of the HDF5 file.
        nodepath : string
            The path of the node inside the HDF5 file.
        mode : string
            The mode to open the PyTables file.  Default is 'w'rite mode.
        cparams : cparams object
            The compression parameters.  The defaults are the same than for
            the current bcolz environment.
        cname : string
            Any of the compressors supported by PyTables (e.g. 'zlib').  The
            default is to use 'blosc' as meta-compressor in combination with
            one of its compressors (see `cparams` parameter above).

        See Also
        --------
        ctable.fromhdf5

        """
        if bcolz.tables_here:
            import tables as tb
        else:
            raise ValueError("you need PyTables to use this functionality")

        if os.path.exists(filepath):
            raise IOError("path '%s' already exists" % filepath)

        f = tb.open_file(filepath, mode=mode)
        cparams = cparams if cparams is not None else bcolz.defaults.cparams
        cname = cname if cname is not None else "blosc:" + cparams['cname']
        filters = tb.Filters(complevel=cparams['clevel'],
                             shuffle=cparams['clevel'],
                             complib=cname)
        t = f.create_table(f.root,
                           nodepath[1:],
                           self.dtype,
                           filters=filters,
                           expectedrows=len(self))
        # Set the attributes
        for key, val in self.attrs:
            t.attrs[key] = val
        # Copy the data
        for block in bcolz.iterblocks(self):
            t.append(block)
        f.close()
Exemple #2
0
	def _create_id_index_map(ctable):
		'''
		create a dictionary taking ids to indeces (source)
		'''

		i = 0
		id_index_map = {}
		for block in bcolz.iterblocks(ctable['id']):
			for item in block:
				id_index_map[str(item)] = i
				i += 1

		return id_index_map
Exemple #3
0
	def _create_index_id_map(ctable):
		'''
		create a dictionary taking an index to an id (target)
		'''

		i = 0
		index_id_map = {}
		for block in bcolz.iterblocks(ctable['id']):
			for item in block:
				index_id_map[i] = str(item)
				i += 1

		return index_id_map
Exemple #4
0
 def test01(self):
     """Testing `iterblocks()` (w/ start, stop)"""
     a = np.ones((2, 3), dtype="i4")
     b = bcolz.ones((1000, 3), dtype="i4")
     # print "b->", `b`
     l, s = 0, 0
     for block in bcolz.iterblocks(b, blen=2, start=10, stop=100):
         assert_array_equal(a, block, "Arrays are not equal")
         l += len(block)
         s += block.sum()
     self.assertEqual(l, 90)
     # as per Gauss summation formula
     self.assertEqual(s, 90 * 3)
Exemple #5
0
 def test01(self):
     """Testing `iterblocks()` (w/ start, stop)"""
     a = np.ones((2,3), dtype="i4")
     b = bcolz.ones((1000, 3), dtype="i4")
     # print "b->", `b`
     l, s = 0, 0
     for block in bcolz.iterblocks(b, blen=2, start=10, stop=100):
         assert_array_equal(a, block, "Arrays are not equal")
         l += len(block)
         s += block.sum()
     self.assertEqual(l, 90)
     # as per Gauss summation formula
     self.assertEqual(s, 90*3)
Exemple #6
0
    def tohdf5(self, filepath, nodepath='/ctable', mode='w',
               cparams=None, cname=None):
        """
        tohdf5(filepath, nodepath='/ctable', mode='w',
               cparams=None, cname=None)

        Write this object into an HDF5 file.

        Parameters
        ----------
        filepath : string
            The path of the HDF5 file.
        nodepath : string
            The path of the node inside the HDF5 file.
        mode : string
            The mode to open the PyTables file.  Default is 'w'rite mode.
        cparams : cparams object
            The compression parameters.  The defaults are the same than for
            the current bcolz environment.
        cname : string
            Any of the compressors supported by PyTables (e.g. 'zlib').  The
            default is to use 'blosc' as meta-compressor in combination with
            one of its compressors (see `cparams` parameter above).

        See Also
        --------
        ctable.fromhdf5

        """
        if bcolz.tables_here:
            import tables as tb
        else:
            raise ValueError("you need PyTables to use this functionality")

        if os.path.exists(filepath):
            raise IOError("path '%s' already exists" % filepath)

        f = tb.open_file(filepath, mode=mode)
        cparams = cparams if cparams is not None else bcolz.defaults.cparams
        cname = cname if cname is not None else "blosc:"+cparams['cname']
        filters = tb.Filters(complevel=cparams['clevel'],
                             shuffle=cparams['clevel'],
                             complib=cname)
        t = f.create_table(f.root, nodepath[1:], self.dtype, filters=filters)
        # Set the attributes
        for key, val in self.attrs:
            t.attrs[key] = val
        # Copy the data
        for block in bcolz.iterblocks(self):
            t.append(block)
        f.close()
Exemple #7
0
 def test00(self):
     """Testing `iterblocks()` (no start, stop, step)"""
     N = 1000
     a = np.ones((2, 3), dtype="i4")
     b = bcolz.ones((N, 3), dtype="i4")
     # print "b->", `b`
     l, s = 0, 0
     for block in bcolz.iterblocks(b, blen=2):
         assert_array_equal(a, block, "Arrays are not equal")
         l += len(block)
         s += block.sum()
     self.assertEqual(l, N)
     # as per Gauss summation formula
     self.assertEqual(s, N * 3)
Exemple #8
0
 def test00(self):
     """Testing `iterblocks()` (no start, stop, step)"""
     N = 1000
     a = np.ones((2,3), dtype="i4")
     b = bcolz.ones((N, 3), dtype="i4")
     # print "b->", `b`
     l, s = 0, 0
     for block in bcolz.iterblocks(b, blen=2):
         assert_array_equal(a, block, "Arrays are not equal")
         l += len(block)
         s += block.sum()
     self.assertEqual(l, N)
     # as per Gauss summation formula
     self.assertEqual(s, N*3)
# Row-by-row using an iterator
# t0 = time()
# f = tb.open_file(filepath, 'w')
# t = f.create_table(f.root, nodepath[1:], ct.dtype)
# for row in ct:
#     t.append([row])
# f.close()
# tt = time() - t0
# print("time with iterator: %.2f (%.2f GB/s)" % (tt, dsize / tt))

# Using blocked write
t0 = time()
f = tb.open_file(filepath, 'w')
t = f.create_table(f.root, nodepath[1:], ct.dtype)
for block in bcolz.iterblocks(ct):
    t.append(block)
f.close()
tt = time() - t0
print("time with blocked write: %.2f (%.2f GB/s)" % (tt, dsize / tt))

# Using generic implementation
os.remove(filepath)
t0 = time()
#ct.tohdf5(filepath, nodepath)
ct.tohdf5(filepath, nodepath, cname="blosc:blosclz")
tt = time() - t0
print("time with tohdf5: %.2f (%.2f GB/s)" % (tt, dsize / tt))


#print(repr(ct))
Exemple #10
0
def iterblocks2(arr):
    sum = 0.
    for b in bcolz.iterblocks(arr, blen=arr.chunklen):
        sum += b.sum()
    return sum
Exemple #11
0
def iterblocks2(arr):
    sum = 0.
    for b in bcolz.iterblocks(arr, blen=arr.chunklen):
        sum += b.sum()
    return sum