def test_partition_shape_4(): assert get_partition_shape(Shape((128, 15, 15, 16, 16), sig_dims=2), target_size_items=15 * 512) == (( 1, 2, 15, ))
def partition_shape(self, dtype, target_size, min_num_partitions=None): """ Calculate partition shape for the given ``target_size`` Parameters ---------- dtype : numpy.dtype or str data type of the dataset target_size : int target size in bytes - how large should each partition be? min_num_partitions : int minimum number of partitions desired. Defaults to the number of workers in the cluster. Returns ------- Tuple[int] the shape calculated from the given parameters """ if min_num_partitions is None: min_num_partitions = self._cores return get_partition_shape(dataset_shape=self.shape, target_size_items=target_size // np.dtype(dtype).itemsize, min_num=min_num_partitions)
def partition_shape(self, datashape, framesize, dtype, target_size, min_num_partitions=None): """ Calculate partition shape for the given ``target_size`` Parameters ---------- datashape : (int, int, int, int) size of the whole dataset framesize : int number of pixels per frame dtype : numpy.dtype or str data type of the dataset target_size : int target size in bytes - how large should each partition be? min_num_partitions : int minimum number of partitions desired. Defaults to the number of workers in the cluster. Returns ------- (int, int, int, int) the shape calculated from the given parameters """ if min_num_partitions is None: min_num_partitions = self._cores return get_partition_shape(datashape, framesize, dtype, target_size, min_num_partitions)
def _get_tileshape(self, dest_dtype, target_size=None): if self.tileshape is not None: return self.tileshape if target_size is None: target_size = 1 * 1024 * 1024 nav_shape = get_partition_shape( dataset_shape=self.slice_nd.shape, target_size_items=target_size // np.dtype(dest_dtype).itemsize, ) return nav_shape + tuple(self.slice_nd.shape.sig)
def make_index(self, data, dtype, min_num_partitions=16, target_size=512 * 1024 * 1024): """ create the json-serializable index structure. decides about the concrete partitioning, which will later be used to split the input data """ partition_shape = get_partition_shape( datashape=data.shape, framesize=data[0][0].size, dtype=dtype, min_num_partitions=min_num_partitions, target_size=target_size, ) partitions = self.make_partitions( data=data, partition_shape=partition_shape, ) fname_fmt = "partition-%(idx)08d.raw" index = { "dtype": str(dtype), "mode": "rect", "shape": data.shape, "partitions": [{ "origin": p['origin'], "shape": p['shape'], "filename": fname_fmt % { "idx": i }, } for (i, p) in enumerate(partitions)] } return index
def test_partition_shape_1d(): ds_shape = Shape((15, 16, 16), sig_dims=2) pshape = get_partition_shape(dataset_shape=ds_shape, target_size_items=256 * 1024, min_num=2) assert pshape == (7, )
def test_partition_shape_small(): assert get_partition_shape(Shape((15, 16, 16), sig_dims=2), target_size_items=4) == ((1, ))
def test_partition_shape_5(): assert get_partition_shape(Shape((2, 16, 16), sig_dims=2), target_size_items=512, min_num=3) == ((1, ))
def test_partition_shape_1(): assert get_partition_shape(Shape((15, 16, 16), sig_dims=2), target_size_items=512) == ((2, ))